...
 
Commits (2)
......@@ -234,7 +234,7 @@ class Dataset(ABC):
"""
def __init__(self,
dataset: Union[str, xr.Dataset, 'Dataset', dict],
dataset: Union[str, xr.Dataset, 'Dataset', dict] = None,
mode: OpenMode = OpenMode.READ_ONLY,
view: Optional[Dict[str, slice]] = None,
dim_matching: Optional[Dict[str, str]] = DIM_MATCHING,
......@@ -244,7 +244,7 @@ class Dataset(ABC):
**kwargs):
"""
"""
if dataset is None:
if dataset is None and 'data_vars' not in kwargs:
raise ValueError('an url or dataset object must be provided')
object.__init__(self)
......@@ -275,7 +275,33 @@ class Dataset(ABC):
elif isinstance(dataset, xr.Dataset):
self.dataset = dataset
elif isinstance(dataset, dict):
elif isinstance(dataset, dict) or dataset is None:
# dataset provided as a xarray compliant dictionary or xarray
# dataset creation keywords
if dataset is not None and 'data_vars' in kwargs:
raise ValueError(
"data fields can not be provided both as main argument"
" and with data_vars keyword"
)
if dataset is None and 'data_vars' not in kwargs:
raise ValueError(
"either dataset or data_vars argument must be provided"
)
def data_as_tuple(arg):
if not isinstance(arg, dict) or len(arg) == 0:
raise TypeError("Badly formatted input: {}".format(arg))
return isinstance(arg[next(iter(arg))], tuple)
if dataset is None or data_as_tuple(dataset):
# xarray from classic arguments
dst = {'data_vars': dataset}
for kw in ['data_vars', 'coords', 'attrs', 'dims']:
if kw in kwargs:
dst[kw] = kwargs[kw]
self.dataset = xr.Dataset(**dst)
else:
# xarray from_dict
self._create_from_dict(dataset)
elif isinstance(dataset, Dataset):
self.dataset = dataset
......@@ -364,7 +390,7 @@ class Dataset(ABC):
)
return result
def _create_from_dict(self, data):
def _create_from_dict(self, data, **kwargs):
"""
Create the dataset from a dict of fields and attributes.
"""
......@@ -477,7 +503,7 @@ class Dataset(ABC):
return '%Y-%m-%dT%H:%M:%SZ'
def has_field(self, fieldname: str) -> bool:
"""Return True if the field ``fieldname`` exists."""
"""Return True if the field ``fieldname`` exists in the dataset."""
return fieldname in self.fieldnames
def rename_field(self, name: str, newname: str) -> None:
......@@ -943,11 +969,11 @@ class Dataset(ABC):
])
@property
def geodimnames(self):
def geodimnames(self) -> Tuple[str]:
return tuple(self.geodims.keys())
@property
def geodimsizes(self):
def geodimsizes(self) -> Tuple[int]:
return tuple(self.geodims.values())
def get_field_fillvalue(self, fieldname: str) -> Any:
......@@ -1144,7 +1170,6 @@ class Dataset(ABC):
else:
raise NotImplementedError
def get_lon(self, **kwargs):
"""Return the longitude values of a feature.
......@@ -1412,8 +1437,8 @@ class Dataset(ABC):
return self.attrs['time_coverage_start']
@time_coverage_start.setter
def time_coverage_start(self,
date: Union[str, 'datetime.datetime'] = 'auto'):
def time_coverage_start(
self, date: Union[str, 'datetime.datetime'] = 'auto'):
"""Set the minimum sensing time of the data contained in the dataset
Args:
......@@ -1558,10 +1583,10 @@ class Dataset(ABC):
)
def save(self,
dest=None,
format='NETCDF4',
preprocess=None,
profile='default_saving_profile.yaml'
dest: Union[str, 'Dataset', None] = None,
format: str = 'NETCDF4',
preprocess: bool = False,
profile: str = 'default_saving_profile.yaml'
):
"""
Args:
......@@ -1621,7 +1646,7 @@ class Dataset(ABC):
self._sync_internals()
# apply new formatting rules
if preprocess is not None:
if preprocess:
saved_dataset = preprocess(self.original)
else:
saved_dataset = self._convert_format(profile)
......
......@@ -22,6 +22,9 @@ class NCDataset(Dataset):
Args:
format ({‘NETCDF4’, ‘NETCDF4_CLASSIC’, ‘NETCDF3_64BIT’,
‘NETCDF3_CLASSIC’}, optional): format (default is 'NETCDF4')
See:
:class:`~cerbere.dataset.dataset.Dataset`
"""
def __init__(
self,
......
......@@ -8,13 +8,9 @@ from __future__ import print_function
from abc import abstractmethod, ABC
from collections import OrderedDict
import datetime
import logging
import warnings
from typing import Tuple
from typing import Tuple, Mapping
import numpy as np
import pandas as pd
import xarray as xr
from ..dataset.field import Field
......@@ -30,18 +26,16 @@ class Feature(Dataset, ABC):
.. |dataset| replace:: :mod:`~cerbere.dataset`
.. |feature| replace:: :mod:`~cerbere.feature`
All feature| classes are inherited from this parent class.
All |feature| classes are inherited from this parent class.
A feature object is specialized |dataset| object as it
A feature object is a specialized |dataset| object as it
can only contains typed observations, where the type is defined by the data
structure or observation pattern. Being a |dataset| object,
it can be created from the same arguments as any|dataset| class instance.
Refer to |dataset|.
In most cases, a Feature object will be instantiated from a file content,
In many cases, a |feature| object will be instantiated from a file content,
read through a |dataset| class handling the specific format of this file.
For instance:
......@@ -74,7 +68,6 @@ class Feature(Dataset, ABC):
if not self._check_dimensions():
raise ValueError("data structure does not match the feature type")
@property
def _dataset_class(self):
if isinstance(self.dataset, Dataset):
......@@ -242,79 +235,22 @@ class Feature(Dataset, ABC):
for fieldname, field in child_fields.items():
self.add_field(field)
def get_values(self, fieldname, **kwargs):
def get_values(self, *args, **kwargs):
"""
Args:
expand (bool): expand the geocoordinate values to the full data
grid. This is only for lat, lon and times in the cases for
instance of cylindrical grids (where lat, lon are provided as
vectors) or average/climatological products (where time is
given as a single value for the whole grid). This returns a lat,
lon or times value for each grid pixel.
For more details on the other arguments, refer to:
:func:`cerbere.dataset.Dataset.get_values`
See:
:meth:`~cerbere.dataset.Dataset.get_values`
"""
if 'expand_dims' not in kwargs:
kwargs['expand_dims'] = self._feature_geodimnames
return super(Feature, self).get_values(
fieldname, **kwargs
*args, **kwargs
)
# def save(self, output=None, infer_cf_attrs=False, **kwargs):
# """
# Save the feature to a storage (file,...)
#
# Args:
# output (:class:`~cerbere.dataset.xarraydataset.XArrayDataset`):
# storage object which to save the feature data to.
#
# infer_cf_attrs (bool, optional): infer the values of some CF
# attributes from the data content.
# """
# if 'attrs' in kwargs:
# warnings.warn(
# "Deprecated `attrs` argument",
# FutureWarning
# )
# self.attrs.update(kwargs['attrs'])
#
# if infer_cf_attrs:
# bbox_attrs = [
# 'geospatial_lat_min',
# 'geospatial_lon_min',
# 'geospatial_lat_max',
# 'geospatial_lon_max'
# ]
# if any([self.attrs[_] is None for _ in bbox_attrs]):
# lonmin, latmin, lonmax, latmax = self.get_bbox()
# self.xarray.attrs['geospatial_lat_min'] = latmin
# self.xarray.attrs['geospatial_lon_min'] = lonmin
# self.xarray.attrs['geospatial_lat_max'] = latmax
# self.xarray.attrs['geospatial_lon_max'] = lonmax
#
# if output is None:
# if self.dataset is None:
# raise ValueError(
# "No mapper provided or attached to this feature. You must "
# "provide a mapper in `output` argument"
# )
# self.dataset.save(**kwargs)
#
# elif isinstance(output, XArrayDataset):
# if not output.is_empty():
# raise ValueError("output mapper must be empty")
# output.dataset = self.xarray
# output.save(**kwargs)
#
# else:
# raise TypeError("Unknown object type provided as output")
def extract(
self,
bbox=None,
footprint=None,
**kwargs):
**kwargs) -> 'Feature':
"""Extract a subset as a new ``Feature`` object.
The subset can be selected through one of the following:
......@@ -331,18 +267,17 @@ class Feature(Dataset, ABC):
"""
if bbox is not None or footprint is not None:
raise NotImplementedError
return self.__class__(super().extract(**kwargs))
return self.__class__(super(Feature, self).extract(**kwargs))
def extract_field(
self,
fieldname,
index=None,
padding=False,
prefix=None,
**kwargs
):
fieldname: str,
index: Mapping[str, slice] = None,
padding: bool = False,
prefix: str = None,
**kwargs):
"""
Create a copy of a field, limiting to a set of slices or indices, and
Create a copy of a field, or subset of a field, and
padding out as required.
Args
......
......@@ -15,15 +15,17 @@ __all__ = ['PointCollection']
class PointCollection(Feature):
"""
Feaure class for a set of randomly sampled points.
Feature class for a set of randomly sampled points.
A pointcollection is defined by geolocation coordinates lon(time), lat(time),
time(time), z(time) all having a single geolocation dimension: time.
A :class:`~cerbere.feature.pointcollection.PointCollection` feature is
defined by geolocation coordinates field: ``lon(time)``, ``lat(time)``,
``time(time)``, optionally ``z(time)``, all having a single geolocation
dimension: ``time``.
"""
@property
def _feature_geodimnames(self):
return ('time',)
return 'time',
def get_geocoord_dimnames(self, *args, **kwargs):
return ('time',)
return 'time',
......@@ -22,20 +22,20 @@ class Swath(Feature):
"""
def __init__(self, *args, **kwargs):
# create feature
super().__init__(
super(Swath, self).__init__(
*args,
**kwargs
)
@property
def _feature_geodimnames(self):
return ('row', 'cell')
return 'row', 'cell',
def get_geocoord_dimnames(self, fieldname, shape=None):
if fieldname == 'depth':
return ('depth',)
return 'depth',
else:
return ('row', 'cell',)
return 'row', 'cell',
......
......@@ -6,7 +6,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
from cerbere.feature.feature import Feature
......@@ -16,15 +15,17 @@ __all__ = ['Trajectory']
class Trajectory(Feature):
"""
Feaure class for the trajectory observation patterns.
Feature class for the trajectory observation patterns.
A trajectory is defined by geolocation coordinates lon(time), lat(time),
time(time), z(time) all having a single geolocation dimension: time.
A :class:`~cerbere.feature.trajectory.Trajectory` feature is defined by
geolocation coordinates field: ``lon(time)``, ``lat(time)``,
``time(time)``, optionally ``z(time)``, all having a single geolocation
dimension: ``time``.
"""
@property
def _feature_geodimnames(self):
return ('time',)
return 'time',
def get_geocoord_dimnames(self, *args, **kwargs):
return ('time',)
return 'time',
......@@ -65,7 +65,6 @@ Dataset contents
Dataset.exists
Dataset.has_field
Dataset.rename_field
Dataset.add_default_attrs
Dataset.get_values
Dataset.set_values
Dataset.get_lat
......@@ -160,7 +159,6 @@ Attributes
Feature.geodimnames
Feature.geodimsizes
Feature.xarray
Feature contents
......
=================
Creating datasets
=================
.. |dataset| replace:: :mod:`~cerbere.dataset`
.. |Dataset| replace:: :mod:`~cerbere.dataset.dataset.Dataset`
.. |NCDataset| replace:: :class:`~cerbere.dataset.ncdataset.NCDataset`
.. _xarray: http://xarray.pydata.org
Reading from a file
===================
The |dataset| package and other contribution packages provide various
classes to read and standardize the format and content of EO data products. To
each EO data product type should correspond a specific class in |dataset| to
read its content. Some of these classes, such as the |NCDataset| for CF
compliant NetCDF files, can read a wide range of EO products sharing similar
format conventions. Each class derives from the main |Dataset| base class and
inherits all its methods.
To read data from a file, first instantiate a |dataset| object of the
corresponding class, specifying the path to this file. For instance, let's
create a dataset object from a Mercator Ocean Model file (test file available at
ftp://ftp.ifremer.fr/ifremer/cersat/projects/cerbere/test_data/NCDataset/mercatorpsy4v3r1_gl12_hrly_20200219_R20200210.nc
). It is a CF compliant NetCDF file and can then use the |NCDataset] class:
>>> from cerbere.dataset.ncdataset import NCDataset
>>> # instantiate the dataset object with the file path as argument
>>> dst = NCDataset("mercatorpsy4v3r1_gl12_hrly_20200219_R20200210.nc")
Print the dataset description:
>>> print(dst)
A Dataset can also be created from a list of files.
Creating a new dataset
======================
A |Dataset| class object (or from an inherited class in |dataset| package) can
be created in memory without pre-existing file. A |Dataset| object can be
created in different ways:
* from a xarray_ :class:`~xarray.Dataset` object
* using xarray_ ``data_vars``, ``coords``, ``attrs`` arguments
* from a dict, using xarray_ syntax (as in xarray_ :meth:`from_dict`)
* from another cerbere |dataset| object
Creating a Dataset from an xarray_ :class:`~xarray.Dataset` object
------------------------------------------------------------------
The xarray_ :class:`~xarray.Dataset` object must have latitude, longitude and
time coordinates with valid `cerbere` names (``lat``, ``lon``, ``time``):
>>> import xarray as xr
>>> import numpy as np
>>> xrobj = xr.Dataset(
coords={
'lat': np.arange(0,10, 0.1),
'lon': np.arange(5,15, 0.1),
'time': np.full((100,), np.datetime64(('2010-02-03'), dtype='D'))
},
data_vars={'myvar': (('time',), np.ones(100))}
)
>>> dst = Dataset(xrobj)
Creating a dataset from a dictionary
------------------------------------
Using the same syntax as xarray (see:
http://xarray.pydata.org/en/stable/generated/xarray.Dataset.from_dict.html#xarray.Dataset.from_dict
) by providing these arguments as a dictionary.
The provided dict must have latitude, longitude and time coordinates with valid
**cerbere** names (``lat``, ``lon``, ``time``, optionally ``z``):
>>> from cerbere.dataset.dataset import Dataset
>>> import numpy as np
>>> from datetime import datetime
>>> dst = Dataset(
... {'time': {'dims': ('time'), 'data': [datetime(2018, 1, 1)]},
... 'lat': {'dims': ('lat'), 'data': np.arange(-80, 80, 1)},
... 'lon': {'dims': ('lon',), 'data': np.arange(-180, 180, 1)},
... 'myvar': {'dims': ('lat', 'lon',),
... 'data': np.ones(shape=(160, 360))}
... }
... )
>>> print(dst)
Dataset: Dataset
Feature Dims :
. lat : 160
. lon : 360
. time : 1
Other Dims :
Feature Coordinates :
. time (time: 1)
. lat (lat: 160)
. lon (lon: 360)
Other Coordinates :
Fields :
. myvar (lat: 160, lon: 360)
Global Attributes :
. time_coverage_start 2018-01-01 00:00:00
. time_coverage_end 2018-01-01 00:00:00
Another syntax accepted by xarray_ provides explicit coordinates (``coords``),
fields (``data_vars``), dimensions (``dims``) and global attributes (``attrs``),
which, again, have to be passed as a dictionary to the |Dataset| creator:
>>> dst = Dataset({
... 'coords': {
... 'time': {'dims': ('time'), 'data': [datetime(2018, 1, 1)],
... 'attrs': {'units': 'seconds since 2001-01-01 00:00:00'}},
... 'lat': {'dims': ('lat'), 'data': np.arange(-80, 80, 1)},
... 'lon': {'dims': ('lon',), 'data': np.arange(-180, 180, 1)}},
... 'attrs': {'gattr1': 'gattr_val'},
... 'dims': ('time', 'lon', 'lat'),
... 'data_vars': {'myvar': {'dims': ('lat', 'lon',),
... 'data': np.ones(shape=(160, 360))}}}
... )
>>> print(dst)
Dataset: Dataset
Feature Dims :
. lat : 160
. lon : 360
. time : 1
Other Dims :
Feature Coordinates :
. time (time: 1)
. lat (lat: 160)
. lon (lon: 360)
Other Coordinates :
Fields :
. myvar (lat: 160, lon: 360)
Global Attributes :
. gattr1 gattr_val
. time_coverage_start 2018-01-01 00:00:00
. time_coverage_end 2018-01-01 00:00:00
**cerbere** :class:`~cerbere.dataset.field.Field` objects can also be mixed in:
>>> from cerbere.dataset.field import Field
>>> field = Field(
... np.ones(shape=(160, 360)),
... 'myvar',
... dims=('lat', 'lon',),
... attrs={'myattr': 'attr_val'}
... )
>>> dst = Dataset(
... {'time': {'dims': ('time'), 'data': [datetime(2018, 1, 1)]},
... 'lat': {'dims': ('lat'), 'data': np.arange(-80, 80, 1)},
... 'lon': {'dims': ('lon',), 'data': np.arange(-180, 180, 1)},
... 'myvar': field
... }
... )
>>> print(dst)
Dataset: Dataset
Feature Dims :
. lat : 160
. lon : 360
. time : 1
Other Dims :
Feature Coordinates :
. time (time: 1)
. lat (lat: 160)
. lon (lon: 360)
Other Coordinates :
Fields :
. myvar (lat: 160, lon: 360)
Global Attributes :
. time_coverage_start 2018-01-01 00:00:00
. time_coverage_end 2018-01-01 00:00:00
Creating a dataset from xarray_ arguments
------------------------------------
Using the same syntax as xarray (see:
http://xarray.pydata.org/en/stable/data-structures.html#dataset
).
The provided coords must have latitude, longitude and time coordinates with
valid **cerbere** names (``lat``, ``lon``, ``time``, optionally ``z``) and the
same goes for dimensions:
>>> dst = Dataset(
... {'myvar': (['lat', 'lon'], np.ones(shape=(160, 360)))},
... coords={
... 'time': (['time'], [datetime(2018, 1, 1)], {'units': 'seconds since 2001-01-01 00:00:00'}),
... 'lat': (['lat'], np.arange(-80, 80, 1)),
... 'lon': (['lon'], np.arange(-180, 180, 1))
... },
... attrs={'gattr1': 'gattr_val'}
... )
=================
Creating features
=================
.. _xarray: http://xarray.pydata.org
There are several ways of instantiating a feature object.
.. |dataset| replace:: :mod:`~cerbere.dataset`
.. |feature| replace:: :mod:`~cerbere.feature`
.. |Feature| replace:: :class:`~cerbere.feature.feature.Feature`
.. |Dataset| replace:: :class:`~cerbere.dataset.dataset.Dataset`
.. |CylindricalGrid| replace:: :class:`~cerbere.feature.grid.CylindricalGrid`
Reading a feature from a file
-----------------------------
|feature| objects inherit from the main |Feature| base class which itself is
particular type of |Dataset| (and derives from it). As such they can be created
exactly in the same way as a |Dataset| object. They can also be created from
other instances of |dataset| classes. |Feature| is an abstract base class and
only objects from other classes in |feature| package can be instantiated.
A |feature| object can therefore be created, as any |Dataset| object, but it has
to match the expected requirements on dimensions and coordinates of the
instantiated |feature| class (see :ref:`creating_datasets`). In addition, it can
be created from (and saved to) any **cerbere** object of |dataset| class.
Examples are provided in the following sections.
Creating a feature from a xarray dataset
----------------------------------------
Creating a feature from or like a xarray_ :class:`~xarray.Dataset` object
-------------------------------------------------------------------------
Features can be created from a xarray :class:`xr.Dataset` object or using the
same arguments as the ones used for creating a :class:`xr.Dataset` object. The
definition of the :class:`xr.Dataset` object must match the requirements of the
feature to be instantiated, in terms of dimensions and coordinate variables.
Features can be created from a xarray :class:`xarray.Dataset` object or using
the same arguments as the ones used for creating a :class:`xarray.Dataset`
object. The definition of the :class:`xarray.Dataset` object must match the
requirements of the feature to be instantiated, in terms of dimensions and
coordinate variables.
For instance to create a :class:`~cerbere.feature.grid.CylindricalGrid` feature
from a :class:`xr.Dataset` object, the later must have:
For instance to create a |CylindricalGrid| feature from a
:class:`xarray.Dataset` object, the later must have:
* ``lat``, ``lon``, ``time`` dimensions. ``time`` dimension must have a length
equal to 1.
......@@ -26,31 +42,59 @@ from a :class:`xr.Dataset` object, the later must have:
* data variables with spatial dimensions (``lat``, ``lon``,). Extra dimensions
are allowed (except ``time``).
.. code-block:: python
import xarray as xr
import numpy as np
from datetime import datetime
from cerbere.feature.grid import CylindricalGrid
# create an xarray Dataset with the structure of a cylindrical grid
lat = xr.DataArray(data=np.arange(-80, 80, 1), dims=['lat'])
lon = xr.DataArray(data=np.arange(-180, 180, 1), dims=['lon'])
time = xr.DataArray([datetime(2018, 1, 1)], dims='time')
var = xr.DataArray(
>>> import xarray as xr
>>> import numpy as np
>>> from datetime import datetime
>>> from cerbere.feature.grid import CylindricalGrid
>>>
>>> # create an xarray Dataset with the structure of a cylindrical grid
>>> lat = xr.DataArray(data=np.arange(-80, 80, 1), dims=['lat'])
>>> lon = xr.DataArray(data=np.arange(-180, 180, 1), dims=['lon'])
>>> time = xr.DataArray([datetime(2018, 1, 1)], dims='time')
>>> var = xr.DataArray(
... data=np.ones(shape=(160, 360)),
... dims=['lat', 'lon'],
... attrs={'myattr': 'test_attr_val'}
... )
xrdataset = xr.Dataset(
>>> xrdataset = xr.Dataset(
... coords={'lat': lat, 'lon': lon, 'time': time},
... data_vars={'myvar': var},
... attrs={'gattr1': 'gattr1_val', 'gattr2': 'gattr2_val'}
... )
>>>
>>> # create the cylindrical grid feature from the xarray dataset object
>>> grid = CylindricalGrid(xrdataset)
We can also pass directly the arguments for creating the :class:`xarray.Dataset`
to the |CylindricalGrid| constructor:
>>> grid = CylindricalGrid({
... 'coords': {
... 'time': {'dims': ('time'), 'data': [datetime(2018, 1, 1)]},
... 'lat': {'dims': ('lat'), 'data': np.arange(-80, 80, 1)},
... 'lon': {'dims': ('lon',), 'data': np.arange(-180, 180, 1)}},
... 'attrs': {'gattr1': 'gattr_val'},
... 'data_vars': {'myvar': {'dims': ('lat', 'lon',), 'data': np.ones(shape=(160, 360))}}
... })
>>> grid = CylindricalGrid(
... {'myvar': (['lat', 'lon'], np.ones(shape=(160, 360)))},
... coords={
... 'time': (['time'], [datetime(2018, 1, 1)], {
... 'units': 'seconds since 2001-01-01 00:00:00'}),
... 'lat': (['lat'], np.arange(-80, 80, 1)),
... 'lon': (['lon'], np.arange(-180, 180, 1))
... },
... attrs={'gattr1': 'gattr_val'}
... )
... or any other way described in :ref:`creating_datasets`.
Reading a feature from a file
-----------------------------
# create the cylindrical grid feature from the xarray dataset object
grid = CylindricalGrid(xrdataset)
Saving a feature to disk
......
......@@ -2,63 +2,3 @@
Feature classes
===============
The `feature` classes provided in ``cerbere.datamodel`` modules correspond to the main sampling patterns usually used
for Earth Observation data. They correspond to **typed** dataset objects. For each kind of feature, cerbere provides a
standard representation which allows to manipulate them in a generic way, and to implement feature specific handling or
display functions.
Features
========
The following table describes the dimensions and spatio-temporal coordinate (geolocation) variables associated with
each feature in ``cerbere.datamodel``, :
+------------------------+-------------------+-------------------------------------+--------------------------------+
| Feature | Dimensions (size) | Space/Time coordinates (dimensions) | Geophysical field (dimensions) |
+========================+===================+=====================================+================================+
| Swath | | - time (row,cell) | V (row, cell) |
| | - row (y) | - lat (row,cell) | |
| | - cell (x) | - lon (row,cell) | |
+------------------------+-------------------+-------------------------------------+--------------------------------+
| Image | - time (1) | - time (time) | V (row, cell) |
| | - row (y) | - lat (row,cell) | |
| | - cell (x) | - lon (row,cell) | |
+------------------------+-------------------+-------------------------------------+--------------------------------+
| grid | - time (1) | - time (time) | V (y, x) |
| | - y (y) | - lat (y,x) | |
| | - x (x) | - lon (y,x) | |
+------------------------+-------------------+-------------------------------------+--------------------------------+
| grid time series | - time (t) | - time (time) | V (time, y, x) |
| | - y (y) | - lat (y,x) | |
| | - x (x) | - lon (y,x) | |
+------------------------+-------------------+-------------------------------------+--------------------------------+
| point collection | - station (x) | - time (station) | V (station) |
| | | - lat (station) | |
| | | - lon (station) | |
+------------------------+-------------------+-------------------------------------+--------------------------------+
| time series at a point | - station (1) | - time (t) | V (time) |
| | - time (t) | - lat (station) | |
| | | - lon (station) | |
+------------------------+-------------------+-------------------------------------+--------------------------------+
| trajectory | - time (t) | - time (t) | V (time) |
| | | - lat (t) | |
| | | - lon (t) | |
+------------------------+-------------------+-------------------------------------+--------------------------------+
| spatial section | | | |
+------------------------+-------------------+-------------------------------------+--------------------------------+
| section time series | | | |
+------------------------+-------------------+-------------------------------------+--------------------------------+
Creating a feature object
=========================
The `feature` classes are specializations built around xarray.Dataset objects.
A `feature` object from above list can be created :
* from a xarray.Dataset object that meets the expected requirements of the considered feature
* from a object inherited from :class:`~cerbere.mapper.xarraydataset.XArrayDataset` class
* from dictionaries of coordinates, dimensions and fields that meets the expected requirements of the considered feature
......@@ -2,10 +2,11 @@
Format profiles
===============
:mod:`cerbere` was designed to easy data management tasks. It can be used to
easily convert data files from one format to another, or to reformat data to
another formatting convention matching a specific project requirements. For some
examples of such operation, refer also to :ref:`format_profile`.
:mod:`cerbere` was designed to ease data management tasks. It can be used to
convert data files from one format to another, or to reformat data to
another formatting convention matching a specific project requirements, with
minimum code effort. For some examples of such operation, refer also to
:ref:`format_profile`.
When saving the content of a :mod:`~cerbere.dataset` class object, the output
file is formatted following the default settings and conventions implemented in
......@@ -25,6 +26,257 @@ particular, it can define:
Let's format for instance data to GHRSST format (as defined in GDS 2.1
document). We define these requirements in a profile file as follow:
.. code-block:: yaml
---
# Defines the list and default values of the global attributes of a Cerbere new feature
attributes:
# Description
id:
naming_authority: org.ghrsst
title:
summary:
cdm_data_type:
keywords: Oceans > Ocean Temperature > Sea Surface Temperature
acknowledgement: "Please acknowledge the use of these data with the following statement: these data were produced by the Centre de Recherche et d'Exploitation Satellitaire (CERSAT), at IFREMER, Plouzane (France)"
processing_level:
metadata_link:
comment:
file_quality_level:
# Observation
platform:
platform_type:
instrument:
instrument_type:
band:
# Conventions
Conventions: CF 1.7, ACDD 1.3, ISO 8601
Metadata_Conventions: Climate and Forecast (CF) 1.7, Attribute Convention for Data Discovery (ACDD) 1.3
standard_name_vocabulary: NetCDF Climate and Forecast (CF) Metadata Convention
keywords_vocabulary: NASA Global Change Master Directory (GCMD) Science Keywords
format_version: GDSv1.2
gds_version_id:
platform_vocabulary: CEOS mission table
instrument_vocabulary: CEOS instrument table
# Authorship
institution: Institut Francais de Recherche et d'Exploitation de la Mer (Ifremer) Centre de Recherche et d'Exploitation satellitaire (CERSAT)
institution_abbreviation: Ifremer/CERSAT
project: Group for High Resolution Sea Surface Temperature (GHRSST)
program: CMEMS
license: GHRSST protocol describes data use as free and open.
publisher_name: CERSAT
publisher_url: http://cersat.ifremer.fr
publisher_email: cersat@ifremer.fr
publisher_institution: Ifremer
publisher_type: institution
creator_name: CERSAT
creator_url: http://cersat.ifremer.fr
creator_email: cersat@ifremer.fr
creator_type: institution
creator_institution: Ifremer
contributor_name:
contributor_role:
references:
# Traceability
processing_software: Telemachus 1.0
product_version: 3.0
netcdf_version_id:
uuid:
history:
source:
source_version:
date_created:
date_modified:
date_issued:
date_metadata_modified:
# BBox
geospatial_lat_min:
geospatial_lat_max:
geospatial_lat_units: degrees
geospatial_lon_min:
geospatial_lon_max:
geospatial_lon_units: degrees
geospatial_bounds:
geospatial_bounds_crs: WGS84
# Resolution
spatial_resolution:
geospatial_lat_resolution:
geospatial_lon_resolution:
# Temporal
time_coverage_start:
time_coverage_end:
time_coverage_resolution:
fields:
lat:
standard_name: latitude
units: degrees_north
valid_range: -90, 90
comment: geographical coordinates, WGS84 projection
coordinates: lon lat
lon:
standard_name: longitude
units: degrees_east
valid_range: -180., 180
comment: geographical coordinates, WGS84 projection
time:
long_name: reference time of sst file
standard_name: time
sea_surface_temperature:
long_name: sea surface foundation temperature
standard_name: sea_surface_foundation_temperature
units: kelvin
valid_range: -2., 50.
sst_dtime:
long_name: time difference from reference time
units: seconds
valid_range: -86400, 86400
comment: time plus sst_dtime gives each measurement time
solar_zenith_angle:
long_name: solar zenith angle
units: angular_degree
valid_range: 0, 180
comment: the solar zenith angle at the time of the SST observations
sses_bias:
long_name: SSES bias estimate
units: kelvin
valid_range: -2.54, 2.54
comment: Bias estimate derived using the techniques described at http://www.ghrsst.org/SSES-Description-of-schemes.html
sses_standard_deviation:
long_name: SSES standard deviation
valid_range: 0., 2.54
comment: Standard deviation estimate derived using the techniques described at http://www.ghrsst.org/SSES-Description-of-schemes.html
quality_level:
long_name: quality level of SST pixel
valid_range: 0, 5
flag_meanings: no_data bad_data worst_quality low_quality acceptable_quality best_quality
flag_values: 0, 1, 2, 3, 4, 5
comment: These are the overall quality indicators and are used for all GHRSST SSTs
or_latitude:
units: degrees_north
valid_range: -80., 80
long_name: original latitude of the SST value
standard_name: latitude
or_longitude:
units: degrees_east
valid_range: -180., 180.
long_name: original longitude of the SST value
standard_name: longitude
or_number_of_pixels:
long_name: original number of pixels from the L2Ps contributing to the SST value
valid_range: -32767, 32767
satellite_zenith_angle:
long_name: satellite zenith angle
units: angular_degree
comment: the satellite zenith angle at the time of the SST observations
valid_min: 0
valid_max: 90
adjusted_sea_surface_temperature:
long_name: adjusted collated sea surface temperature
standard_name: sea_surface_subskin_temperature
units: kelvin
comment: bias correction using a multi-sensor reference field
valid_min: -300
valid_max: 4500
encoding:
lat:
dtype: float32
least_significant_digit: 3
lon:
dtype: float32
least_significant_digit: 3
sea_surface_temperature:
dtype: int16
_FillValue: -32768
scale_factor: 0.01
add_offset: 273.15
sst_dtime:
_FillValue: -2147483648
add_offset: 0
scale_factor: 1
dtype: int32
solar_zenith_angle:
_FillValue: -128
add_offset: 90.
scale_factor: 1.
quality_level:
_FillValue: -128
dtype: byte
sses_bias:
_FillValue: -128
dtype: byte
add_offset: 0.
scale_factor: 0.02
sses_standard_deviation:
_FillValue: -128
dtype: byte
add_offset: 2.54
scale_factor: 0.02
or_latitude:
dtype: int16
_FillValue: -32768
add_offset: 0.
scale_factor: 0.01
units: degrees_north
or_longitude:
dtype: int16
_FillValue: -32768
add_offset: 0.
scale_factor: 0.01
or_number_of_pixels:
dtype: byte
_FillValue: -32768
add_offset: 0
scale_factor: 1
satellite_zenith_angle:
dtype: byte
_FillValue: -128
add_offset: 0.
scale_factor: 1.
adjusted_sea_surface_temperature:
dtype: int16
_FillValue: -32768
add_offset: 273.15
scale_factor: 0.01
This profile file can be passed on to the netCDF dedicated dataset object,
provided by :class:`cerbere.dataset.ncdataset.NCDataset` class when saving the
object content to disk:
......
......@@ -33,6 +33,7 @@ User guide
creating_datasets
creating_features
inspecting_content
format_profile
Available dataset classes
......
===================================
Inspect Dataset and Feature objects
===================================
The following section describes the basic operations you can perform with
:mod:`cerbere` to handle Earth Observation data files with
|dataset| package. This package can be seen as a unified API to
access any data file content, whatever the format and content. There is one
|dataset| class per data format, convention, product type...
Using objects from `dataset` subpackage
=======================================
A |dataset| class provides a set of methods to inspect the content
of the read data. They allow to retrieve information from a file in the same way
whatever its format.
Get the list of fields in a file (all but the geolocation fields) with
:attr:`~cerbere.dataset.dataset.Dataset.fieldnames`:
.. code-block:: python
f.fieldnames
Get the dimensions (like in netCDF) of a file (Note that the geolocation dimension
names returned are standardized):
.. code-block:: python
f.dims
Get the dimensions (like in netCDF) of a particular field:
.. code-block:: python
f.get_field_dims('sea_surface_temperature')
Get the size of a dimension (standard names can be used for geolocation
dimensions):
.. code-block:: python
f.get_dimsize('row') # standard dimension name for Swath feature
Get a field and print it:
.. code-block:: python
field = f.get_field('sea_surface_temperature')
print(field)
.. note::
|Field| are similar to variables in netcdf or `DataArray` in xarray_. A field consists of :
* an attached *variable* describing the geophysical quantity provided by the field (together with a few descriptive attributes such standard name, etc...)
* *attributes* further documenting the provided observation values (units,...) similar to the variable attributes in netCDF
* an *array of values* (observations)
* an optional array of *quality flags* (one for each observation value)
* an optional array of *quality history* (one for each observation value) documenting the reason why a value was flagged
Get the field attributes, as a dictionary:
.. code-block:: python
attrs = f.get_field_attrs('sea_surface_temperature')
print(attrs)
Get the dataset global attributes, as a dictionary:
.. code-block:: python
attrs = f.attrs()
print(attr)
Then instantiate a feature class object from the above dataset object.
The feature object is now initialized with the content of the above file:
.. code-block:: python
import feature.trajectory
traj = feature.trajectory.Trajectory(ncf)
Saving data
-----------
Extracting sub-features
-----------------------
============
Installation
============
.. _conda: https://docs.conda.io/en/latest/
Requirements
============
* Netcdf4
* numpy
* xarray
* Python 3
Cerbere 2 works with python 3. Python dependencies include:
* netCDF4
* numpy
* pandas
* xarray>=0.15
* shapely
* python-dateutil
Install these with your package manager. We recommend using conda_ for this.
......
......@@ -7,6 +7,15 @@ Overview
.. |feature| replace:: :mod:`~cerbere.feature`
.. |Field| replace:: :mod:`~cerbere.dataset.field.Field`
.. |Grid| replace:: :class:`~cerbere.feature.grid.Grid`
.. |CylindricalGrid| replace:: :class:`~cerbere.feature.grid.CylindricalGrid`
.. |Swath| replace:: :class:`~cerbere.feature.swath.Swath`
.. |Image| replace:: :class:`~cerbere.feature.image.Image`
.. |GridTimeSeries| replace:: :class:`~cerbere.feature.gridtimeseries.GridTimeSeries`
.. |Trajectory| replace:: :class:`~cerbere.feature.trajectory.Trajectory`
.. |PointTimeSeries| replace:: :class:`~cerbere.feature.pointtimeseries.PointTimeSeries`
.. |PointCollection| replace:: :class:`~cerbere.feature.pointcollection.PointCollection`
Concept
=======
......@@ -101,7 +110,9 @@ from CF convention and other works. It therefore extends above category to
"typed geospatial" xarray_ ``Dataset`` objects, providing further restrictions
on the content of a set of data so that similar patterns of observation are
represented and accessed in the same way, allowing generic manipulation of
observation data.
observation data and specific handling or display functions. It consists in a
higher level of abstraction above :mod:`~cerbere.dataset` classes, fitting
data into common templates.
Currently managed features include :
......@@ -120,107 +131,41 @@ Currently managed features include :
data into/from a |feature| object into a specific format (or format
convention).
The classes provided in :mod:`~cerbere.feature` modules and listed above
correspond to the main sampling patterns usually used for Earth Observation
data. The following table describes the dimensions and spatio-temporal
coordinate (geolocation) fields associated with each feature in
:mod:`~cerbere.feature`:
================= ==================== ======================== ========================
Feature Dims [size] Coords [dims] Fields [dims]
================= ==================== ======================== ========================
|Swath| | - ``row (y)`` | - ``time (row, cell)`` | ``<name> (row, cell)``
| - ``cell (x)`` | - ``lat (row, cell)``
| - ``lon (row, cell)``
|Image| | - ``time (1)`` | - ``time (time)`` | ``<name> (row, cell)``
| - ``row (y)`` | - ``lat (row, cell)``
| - ``cell (x)`` | - ``lon (row, cell)``
|Grid| | - ``time (1)`` | - ``time (time)`` | ``<name> (y, x)``
| - ``y (y)`` | - ``lat (y, x)``
| - ``x (x)`` | - ``lon (y, x)``
|CylindricalGrid| | - ``time (1)`` | - ``time (time)`` | ``<name> (lat, lon)``
| - ``lat (y)`` | - ``lat (lat, lon)``
| - ``lon (x)`` | - ``lon (lat, lon)``
|GridTimeSeries| | - ``time (t)`` | - ``time (time)`` | ``<name> (time, lat, lon)``
| - ``y (y)`` | - ``lat (y, x)``
| - ``x (x)`` | - ``lon (y, x)``
|PointCollection| | - ``station (x)`` | - ``time (station)`` | ``<name> (station)``
| - ``lat (station)``
| - ``lon (station)``
|Trajectory| | - ``time (t)`` | - ``time (time)`` | ``<name> (time)``
| - ``lat (time)``
| - ``lon (time)``
TimeSeries | - ``station (1)`` | - ``time (time)`` | ``<name> (time)``
| - ``time (t)`` | - ``lat (station)``
| - ``lon (station)``
Section
SectionTimeSeries
================= ==================== ======================== ========================
Using objects from `dataset` subpackage
=======================================
The following section describes the basic operations you can perform with
:mod:`cerbere` to handle Earth Observation data files with
|dataset| package. This package can be seen as a unified API to
access any data file content, whatever the format and content. There is one
|dataset| class per data format, convention, product type...
Reading data from a file
------------------------
A |dataset| class must be available for the considered format. Generic mapper
classes are available, for instance
:class:`~cerbere.dataset.ncdataset.NCDataset` for CF compliant NetCDF files.
To read data from a file, first instantiate a dataset object of the
corresponding class, specifying the path to this file:
.. code-block:: python
from cerbere.dataset.ncdataset import NCDataset
ncf = NCDataset(
"./test/GW_L2P_ALT_ENVI_GDR_20101210_120905_20101210_125912_097_196.nc"
)
A |dataset| class provides a set of methods to inspect the content
of the read data. They allow to retrieve information from a file in the same way
whatever its format.
Get the list of fields in a file (all but the geolocation fields) with
:attr:`~cerbere.dataset.dataset.Dataset.fieldnames`:
.. code-block:: python
f.fieldnames
Get the dimensions (like in netCDF) of a file (Note that the geolocation dimension
names returned are standardized):
.. code-block:: python
f.dims
Get the dimensions (like in netCDF) of a particular field:
.. code-block:: python
f.get_field_dims('sea_surface_temperature')
Get the size of a dimension (standard names can be used for geolocation
dimensions):
.. code-block:: python
f.get_dimsize('row') # standard dimension name for Swath feature
Get a field and print it:
.. code-block:: python
field = f.get_field('sea_surface_temperature')
print(field)
.. note::
|Field| are similar to variables in netcdf or `DataArray` in xarray_. A field consists of :
* an attached *variable* describing the geophysical quantity provided by the field (together with a few descriptive attributes such standard name, etc...)
* *attributes* further documenting the provided observation values (units,...) similar to the variable attributes in netCDF
* an *array of values* (observations)
* an optional array of *quality flags* (one for each observation value)
* an optional array of *quality history* (one for each observation value) documenting the reason why a value was flagged
Get the field attributes, as a dictionary:
.. code-block:: python
attrs = f.get_field_attrs('sea_surface_temperature')
print(attrs)
Get the dataset global attributes, as a dictionary:
.. code-block:: python
attrs = f.attrs()
print(attr)
Then instantiate a feature class object from the above dataset object.
The feature object is now initialized with the content of the above file:
.. code-block:: python
import feature.trajectory
traj = feature.trajectory.Trajectory(ncf)
Saving data
-----------
Extracting sub-features
-----------------------
......@@ -82,6 +82,7 @@ setup(
'cerbere.mapper',
'cerbere.feature',
'cerbere.dataset',
'tests',
'cerbere.share'
],
include_package_data=True,
......
......@@ -149,6 +149,35 @@ class TestCylindricalGridFeature(TestFeature):
print("Feature from: test_create_feature_from_dict_datetime64_1d")
print(feat)
def test_create_feature_from_xarray_keywords(self):
feat = self.get_feature_class()(
{'myvar': (['lat', 'lon'], np.ones(shape=(160, 360)))},
coords={
'time': (['time'], [datetime(2018, 1, 1)], {
'units': 'seconds since 2001-01-01 00:00:00'}),
'lat': (['lat'], np.arange(-80, 80, 1)),
'lon': (['lon'], np.arange(-180, 180, 1))
},
attrs={'gattr1': 'gattr_val'}
)
self.assertIsInstance(feat, self.get_feature_class())
self.assertEqual(len(feat.get_field_dimnames('time')), 1)
print("Feature from: test_create_feature_from_xarray_keywords")
print(feat)
def test_create_feature_from_xarray_args(self):
feat = self.get_feature_class()({
'coords': {
'time': {'dims': ('time'), 'data': [datetime(2018, 1, 1)]},
'lat': {'dims': ('lat'), 'data': np.arange(-80, 80, 1)},
'lon': {'dims': ('lon',), 'data': np.arange(-180, 180, 1)}},
'attrs': {'gattr1': 'gattr_val'},
'data_vars': {'myvar': {'dims': ('lat', 'lon',), 'data': np.ones(shape=(160, 360))}}
})
self.assertIsInstance(feat, self.get_feature_class())
self.assertEqual(len(feat.get_field_dimnames('time')), 1)
print("Feature from: test_create_feature_from_xarray_args")
print(feat)
def test_expanded_latlon(self):
basefeat = self.define_base_feature()
......
......@@ -93,6 +93,20 @@ class TestXArrayDataset(unittest.TestCase):
print(dst)
self.assertIsInstance(dst, Dataset)
def test_init_from_xarray_keywords(self):
dst = Dataset(
{'myvar': (['lat', 'lon'], np.ones(shape=(160, 360)))},
coords={
'time': (['time'], [datetime(2018, 1, 1)], {
'units': 'seconds since 2001-01-01 00:00:00'}),
'lat': (['lat'], np.arange(-80, 80, 1)),
'lon': (['lon'], np.arange(-180, 180, 1))
},
attrs={'gattr1': 'gattr_val'}
)
print(dst)
self.assertIsInstance(dst, Dataset)
def test_dimension_names(self):
print("...from xarray.Dataset")
refdims = list(('lat', 'lon', 'time'))
......