...
 
Commits (3)
This diff is collapsed.
This diff is collapsed.
......@@ -6,11 +6,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import copy
import warnings
from typing import Tuple
import numpy
import xarray as xr
from .feature import Feature
......@@ -28,16 +25,15 @@ DEFAULT_PROJECTION = (
class Projection(object):
"""Definition of a grid projection"""
def __init__(self,
proj4_definition=DEFAULT_PROJECTION,
identifier='regular'
proj4_definition: str = DEFAULT_PROJECTION,
identifier: str = 'regular'
):
self.definition = proj4_definition
self.identifier = identifier
def is_cylindrical(self):
"""return True if the projection is cylindrical"""
def is_cylindrical(self) -> bool:
"""True if the projection is cylindrical"""
return self.identifier == 'regular'
......@@ -85,10 +81,9 @@ class Grid(Feature):
values of the existing attributes with the same name in the file).
Use with caution in this context.
"""
def __init__(self,
*args,
projection=Projection(),
projection: 'Projection' = Projection(),
**kwargs
):
"""
......@@ -102,29 +97,22 @@ class Grid(Feature):
**kwargs
)
def is_unique_grid_time(self):
"""
Return True if a unique time is associated with the grid
(like in L4 products), False if there is time value per pixel
(like in L3)
"""
return self.dataset.get_dimsize('time') == 1
@property
def _feature_geodimnames(self):
return ('y', 'x')
def _feature_geodimnames(self) -> Tuple[str, ...]:
return 'y', 'x'
def get_geocoord_dimnames(self, fieldname, shape=None):
def get_geocoord_dimnames(
self, fieldname: str,
values: 'xr.DataArray') -> Tuple[str, ...]:
if fieldname in ['depth', 'height']:
return ('z',)
return 'z',
elif fieldname == 'time':
if len(shape) == 1:
return ('time',)
if len(values.shape) == 1:
return 'time',
else:
return ('y', 'x',)
return 'y', 'x'
else:
return ('y', 'x',)
return 'y', 'x'
# def save(self, output, attrs=None, infer_cf_attrs=False, **kwargs):
......@@ -372,21 +360,15 @@ class CylindricalGrid(Grid):
if fieldname in ['depth', 'height']:
if len(values.shape) != 1:
raise ValueError("z coordinate must be one-dimensional")
return OrderedDict([('z', len(values))])
return ('z',)
elif fieldname == 'time':
if len(values.shape) == 1:
return OrderedDict([('time', 1)])
return ('time',)
elif len(values.shape) == 2:
return OrderedDict(zip(['lat', 'lon'], list(values.shape)))
return ('lat', 'lon',)
else:
raise ValueError(
"time coordinate must be one or two dimensional"
)
elif fieldname in ['lat', 'lon']:
# lat and lon
if len(values.shape) != 1:
raise ValueError("x and y coordinate must be one-dimensional")
return OrderedDict([(fieldname, 1)])
else:
# any data field
return OrderedDict(zip(['lat', 'lon'], list(values.shape)))
\ No newline at end of file
return ('lat', 'lon',)
This diff is collapsed.
......@@ -66,6 +66,12 @@ Dataset contents
Dataset.has_field
Dataset.rename_field
Dataset.add_default_attrs
Dataset.get_values
Dataset.set_values
Dataset.get_lat
Dataset.get_lon
Dataset.get_times
Dataset.get_datetimes
Other built-in datasets
......@@ -162,12 +168,8 @@ Feature contents
.. autosummary::
:toctree: generated
Feature.get_lat
Feature.get_lon
Feature.get_times
Feature.extract
Feature.get_values
Feature.set_values
Feature.add_standard_attrs
Feature.xarray
Feature.append
\ No newline at end of file
Creating datasets
=================
\ No newline at end of file
Creating features
=================
There are several ways of instantiating a feature object.
Reading a feature from a file
-----------------------------
Creating a feature from a xarray dataset
----------------------------------------
Features can be created from a xarray :class:`xr.Dataset` object or using the
same arguments as the ones used for creating a :class:`xr.Dataset` object. The
definition of the :class:`xr.Dataset` object must match the requirements of the
feature to be instantiated, in terms of dimensions and coordinate variables.
For instance to create a :class:`~cerbere.feature.grid.CylindricalGrid` feature
from a :class:`xr.Dataset` object, the later must have:
* ``lat``, ``lon``, ``time`` dimensions. ``time`` dimension must have a length
equal to 1.
* a ``lat`` coordinate variable with dimension ``lat``.
* a ``lon`` coordinate variable with dimension ``lon``.
* a ``time`` coordinate variable with dimension ``time``.
* data variables with spatial dimensions (``lat``, ``lon``,). Extra dimensions
are allowed (except ``time``).
.. code-block:: python
import xarray as xr
import numpy as np
from datetime import datetime
from cerbere.feature.grid import CylindricalGrid
# create an xarray Dataset with the structure of a cylindrical grid
lat = xr.DataArray(data=np.arange(-80, 80, 1), dims=['lat'])
lon = xr.DataArray(data=np.arange(-180, 180, 1), dims=['lon'])
time = xr.DataArray([datetime(2018, 1, 1)], dims='time')
var = xr.DataArray(
... data=np.ones(shape=(160, 360)),
... dims=['lat', 'lon'],
... attrs={'myattr': 'test_attr_val'}
... )
xrdataset = xr.Dataset(
... coords={'lat': lat, 'lon': lon, 'time': time},
... data_vars={'myvar': var},
... attrs={'gattr1': 'gattr1_val', 'gattr2': 'gattr2_val'}
... )
# create the cylindrical grid feature from the xarray dataset object
grid = CylindricalGrid(xrdataset)
Saving a feature to disk
------------------------
A feature object can be saved to disk using a class object from
:mod:`~cerbere.dataset` package provided it implements the
:meth:`~cerbere.dataset.dataset.Dataset.save` method. Select the class
corresponding to the format in which the feature must be saved.
For instance to save a feature to a CF compliant NetCDF4 file, the
:class:`~cerbere.dataset.ncdataset.NCDataset` can be used.
.. code-block:: python
from cerbere.dataset.ncdataset import NCDataset
# instantiate a NCDataset object in write mode
ncf = NCDataset(dataset='test.nc', mode='w')
# save above cylindrical grid feature into a CF NetCDF4 file
grid.save(ncf)
......@@ -29,10 +29,10 @@ User guide
==========
.. toctree::
:maxdepth: 1
:maxdepth: 2
creating_datasets
creating_features
creating_datasets
creating_features
Available dataset classes
=========================
......
......@@ -5,6 +5,7 @@ Recipes
.. toctree::
:maxdepth: 1
recipes/convert_format
odyssea
cersat_gridded
recipes/reading_s1_ocn
......
Joggling with file formats
==========================
:mod:cerbere was designed to easy data management tasks. It can be used to
easily convert data files from one format to another, or to reformat data to
another formatting convention matching a specific project requirements.
Use case 1: format conversion
-----------------------------
Use case 2: changing format convention
--------------------------------------
Use case 3: using a format profile
----------------------------------
The format can also be customized through a external format profile file that
can be passed on when saving a dataset. It provides the directives to properly
format a dataset, using some convention or default settings.
Let's format for instance data to GHRSST format (as defined in GDS 2.1
document). We define these requirements in a profile file as follow:
This profile file can be passed on to the netCDF dedicated dataset object,
provided by :class:`cerbere.dataset.ncdataset.NCDataset` class when saving the
object content to disk:
.. code-block:: python
# create a dataset object
# save it in a NetCDF file, using above profile and NCDataset class
Note that the attributes already defined in the dataset object are not
overridden by the default values in the profile file.
.. code-block:: python
# create a NCDataset dataset object and fill in some attributes
# save it, using above profile
......@@ -2,3 +2,10 @@
-i https://pypi.python.org/simple/
-e .
numpy
pandas
xarray>=0.15
shapely
dateutil
cftime
......@@ -80,8 +80,9 @@ setup(
'cerbere',
'cerbere.datamodel',
'cerbere.mapper',
'cerbere.feature',
'cerbere.dataset'
'cerbere.feature',
'cerbere.dataset',
'tests'
],
package_data={'share': [
'global_attributes_profile.cfg',
......@@ -94,6 +95,8 @@ setup(
),
install_requires=[
'netCDF4',
'numpy',
'pandas',
'xarray>=0.15',
'cftime',
'Shapely>=1.2.18',
......
......@@ -35,12 +35,12 @@ class Checker():
@classmethod
def dataset(cls):
"""Return the mapper class name"""
"""Return the dataset class name"""
raise NotImplementedError
@classmethod
def feature(cls):
"""Return the related datamodel class name"""
"""Return the related feature class name"""
raise NotImplementedError
@classmethod
......@@ -90,7 +90,7 @@ class Checker():
pass
def load_dataset(self, dst):
"""Import mapper class"""
"""Import dataset class"""
if '.' in dst:
datasetpath = 'cerbere.dataset.' + dst.split('.')[0].lower()
datasetname = dst.split('.')[1]
......@@ -104,28 +104,31 @@ class Checker():
return classreader
def load_feature(self, feat):
"""Import datamodel class"""
"""Import feature class"""
if '.' in feat:
featurepath = 'cerbere.feature.' + feat.split('.')[0].lower()
featurename = feat.split('.')[1]
else:
featurepath = 'cerbere.feature.' + feat.lower()
featurename = feat
featurereader = getattr(
importlib.import_module(
'cerbere.feature.' + feat.lower()
),
feat
importlib.import_module(featurepath), featurename
)
return featurereader
def open_file(self):
"""Open file and create mapper object"""
"""Open file and create dataset object"""
print("...instantiate class with {}".format(self.testfile))
print(self.datasetclass(self.testfile, mode='r'))
return self.datasetclass(self.testfile, mode='r')
def test_load_dataset(self):
"""Checker mapper import"""
"""Checker dataset import"""
self.assertTrue(
issubclass(self.datasetclass, Dataset))
def test_open_file_with_dataset(self):
"""Test mapper instantiation with an input file"""
"""Test dataset instantiation with an input file"""
datasetobj = self.open_file()
datasetobj.close()
self.assertIsInstance(datasetobj, self.datasetclass)
......@@ -182,7 +185,7 @@ class Checker():
def test_read_lon(self):
"""Test reading longitude and checking validity"""
datasetobj = self.open_file()
data = numpy.ma.array(datasetobj.get_values('lon'))
data = numpy.ma.array(datasetobj.get_lon())
print("...test there are no fillvalues")
self.assertEqual(data.size, data.count(),
"They are fillvalues in lon field")
......@@ -233,7 +236,7 @@ class Checker():
attr)
self.assertIsInstance(
datasetobj.get_attr(attr),
(str, datetime.datetime, numpy.int32,
(str, int, datetime.datetime, numpy.int32,
numpy.int16, numpy.float32, numpy.float64, list),
msg)
datasetobj.close()
......@@ -251,9 +254,10 @@ class Checker():
self.assertIsInstance(end, datetime.datetime)
datasetobj.close()
def test_load_mapper_content_into_datamodel(self):
"""Test loading the content of a file into a datamodel structure"""
def test_load_dataset_content_into_feature(self):
"""Test loading the content of a file into a feature structure"""
datasetobj = self.datasetclass(self.testfile)
print(datasetobj)
feature = self.featureclass(datasetobj)
self.assertIsInstance(feature, self.featureclass)
datasetobj.close()
......@@ -263,27 +267,38 @@ class Checker():
datasetobj = self.datasetclass(self.testfile)
featureobj = self.featureclass(datasetobj)
geodims = featureobj.geodimsizes
if featureobj.__class__.__name__ in ['Swath', 'Image']:
rows, cells = geodims
cells = featureobj.geodims['cell']
rows = featureobj.geodims['row']
width = min(min(rows // 2, cells // 2), 5)
r0, r1 = rows // 2 - width, rows // 2 + width
c0, c1 = cells // 2 - width, cells // 2 + width
print("Subset ")
print("row : ", r0, r1)
print("cell: ", c0, c1)
subset = featureobj.extract_subset(index={'row': slice(r0, r1, 1),
subset = featureobj.extract(index={'row': slice(r0, r1, 1),
'cell': slice(c0, c1, 1)})
elif featureobj.__class__.__name__ == 'Grid':
nj, ni = geodims
elif featureobj.__class__.__name__ in ['Grid', 'GridTimeSeries']:
ni = featureobj.geodims['x']
nj = featureobj.geodims['y']
width = min(min(nj // 2, ni // 2), 5)
j0, j1 = nj // 2 - width, nj // 2 + width
i0, i1 = ni // 2 - width, ni // 2 + width
subset = featureobj.extract_subset(index={'y': slice(j0, j1, 1),
subset = featureobj.extract(index={'y': slice(j0, j1, 1),
'x': slice(i0, i1, 1)})
elif featureobj.__class__.__name__ in [
'CylindricalGrid', 'CylindricalGridTimeSeries'
]:
ni = featureobj.geodims['lon']
nj = featureobj.geodims['lat']
width = min(min(nj // 2, ni // 2), 5)
j0, j1 = nj // 2 - width, nj // 2 + width
i0, i1 = ni // 2 - width, ni // 2 + width
subset = featureobj.extract(index={'lat': slice(j0, j1, 1),
'lon': slice(i0, i1, 1)})
self.assertIsInstance(subset, featureobj.__class__)
return subset
......
from datetime import datetime
import numpy as np
import xarray as xr
from cerbere.feature.gridtimeseries import CylindricalGridTimeSeries
from .test_xarray_feature import TestFeature
class TestCylindricalGridFeature(TestFeature):
"""Test class for CylindricalGridFeature"""
def get_feature_class(self):
return CylindricalGridTimeSeries
def define_base_feature(self):
# creates a test xarray object
lon = xr.DataArray(data=np.arange(-180, 180, 1), dims=['lon'])
lat = xr.DataArray(data=np.arange(-80, 80, 1), dims=['lat'])
time = xr.DataArray([
datetime(2018, 1, 1),
datetime(2018, 1, 2),
datetime(2018, 1, 3),
datetime(2018, 1, 4),
datetime(2018, 1, 5)
], dims='time')
var = xr.DataArray(
data=np.ones(shape=(5, 160, 360)),
dims=['time', 'lat', 'lon'],
attrs={'myattr': 'test_attr_val'}
)
xrdataset = xr.Dataset(
coords={'lat': lat, 'lon': lon, 'time': time},
data_vars={'myvar': var},
attrs={'gattr1': 'gattr1_val', 'gattr2': 'gattr2_val'}
)
return CylindricalGridTimeSeries(xrdataset)
def get_feature_dimnames(self):
return ('time', 'lat', 'lon',)
def get_feature_dimsizes(self):
return (5, 160, 360)
def test_create_feature_with_incorrect_geodim_order(self):
basefeat = self.define_base_feature()
feat = self.get_feature_class()(
{
'lat': {'data': basefeat.get_lat(), 'dims': ('lat',)},
'lon': {'data': basefeat.get_lon(), 'dims': ('lon',)},
'time': {
'data': basefeat.get_times(), 'dims': ('time',)
},
'myvar': {
'data': np.ones(shape=(5, 360, 160)),
'dims': ('time', 'lon', 'lat')
}
}
)
self.assertEqual(
feat.get_field_dimnames('myvar'), ('time', 'lat', 'lon')
)
print("Feature from: test_create_feature_with_incorrect_geodim_order")
print(feat)
def test_create_feature_from_dict_datetime_1d(self):
basefeat = self.define_base_feature()
feat = self.get_feature_class()(
{
'lat': {'data': basefeat.get_lat(), 'dims': ('lat',)},
'lon': {'data': basefeat.get_lon(), 'dims': ('lon',)},
'time': {
'data': basefeat.get_times(), 'dims': ('time',)
},
'myvar': {
'data': basefeat.get_values('myvar'),
'dims': ('time', 'lat', 'lon')
}
}
)
self.assertIsInstance(feat, self.get_feature_class())
self.assertEqual(len(feat.get_field_dimnames('time')), 1)
print("Feature from: test_create_feature_from_dict_datetime_1d")
print(feat)
def test_create_feature_from_dict_datetime64_1d(self):
basefeat = self.define_base_feature()
feat = self.get_feature_class()(
{
'lat': {'data': basefeat.get_lat(), 'dims': ('lat',)},
'lon': {'data': basefeat.get_lon(), 'dims': ('lon',)},
'time': {
'data': basefeat.get_times(),
'dims': ('time',)
},
'myvar': {
'data': basefeat.get_values('myvar'),
'dims': ('time', 'lat', 'lon')
}
}
)
self.assertIsInstance(feat, self.get_feature_class())
self.assertEqual(len(feat.get_field_dimnames('time')), 1)
print("Feature from: test_create_feature_from_dict_datetime64_1d")
print(feat)
def test_expanded_latlon(self):
basefeat = self.define_base_feature()
res = basefeat.get_values(
'lat',
index={'lat': slice(10, 15), 'lon': slice(50, 55)},
expand=False
)
self.assertEqual(res.shape, (5,))
self.assertTrue(np.equal(res, np.arange(-70, -65)).all())
res = basefeat.get_values(
'lat',
index={'lat': 10, 'lon': 50},
expand=True
)
self.assertEqual(res, -70)
res = basefeat.get_values(
'lon',
index={'lat': 10, 'lon': 50},
expand=True
)
self.assertEqual(res, -130)
res = basefeat.get_values(
'lat',
index={'lat': slice(10, 15), 'lon': slice(50, 53)},
expand=True
)
self.assertEqual(res.shape, (5, 3,))
self.assertTrue(np.equal(res[:, 0], np.arange(-70, -65)).all())
self.assertTrue(np.equal(res[0, :], np.ones((3,)) * -70).all())
res = basefeat.get_values(
'lon',
index={'lat': slice(10, 15), 'lon': slice(50, 53)},
expand=True
)
self.assertEqual(res.shape, (5, 3,))
self.assertTrue(np.equal(res[0, :], np.arange(-130, -127)).all())
self.assertTrue(np.equal(res[:, 0], np.ones((5,)) * -130).all())
res = basefeat.get_values(
'lat',
expand=True
)
print("result: test_expanded_latlon ", res)
def test_expanded_latlon(self):
basefeat = self.define_base_feature()
res = basefeat.get_values(
'time',
index={'lat': slice(10, 15), 'lon': slice(50, 53)},
expand=True
)
self.assertEqual(res.shape, (5, 5, 3,))
res = basefeat.get_values(
'time',
expand=True
)
print("result: test_expanded_time ", res)
\ No newline at end of file
......@@ -345,4 +345,9 @@ class TestXArrayDataset(unittest.TestCase):
dst.save(
dest='test_profile.nc',
attr_profile='global_attributes_profile.cfg'
)
\ No newline at end of file
)
def test_opening_file_not_existing(self):
def open_dummy():
return Dataset('file_not_existing.nc')
self.assertRaises(IOError, open_dummy)
......@@ -32,6 +32,10 @@ class TestFeature(unittest.TestCase):
for att in saved_feat.attrs:
if saved_feat.attrs[att] is None:
saved_feat.attrs[att] = ''
elif isinstance(saved_feat.attrs[att], datetime):
saved_feat.attrs[att] = saved_feat.attrs[att].strftime(
'%Y-%m-%dT%H:%M:%S'
)
saved_feat.to_netcdf(TEST_FILE, 'w')
def tearDown(self):
......
......@@ -46,10 +46,9 @@ class GHRSSTNCDatasetL2PChecker(Checker, unittest.TestCase):
def test_dim_swath(self):
ncf = self.datasetclass(self.testfile)
swath = self.featureclass(ncf)
self.assertEquals(swath.get_geocoord('lat').dims, ('row', 'cell',))
print("DMS SST ", swath.get_field('sea_surface_temperature').dims)
self.assertEquals(swath.get_geocoord('lat').dimnames, ('row', 'cell',))
self.assertEquals(
swath.get_field('sea_surface_temperature').dims,
swath.get_field('sea_surface_temperature').dimnames,
('row', 'cell',)
)
......