Commit 86b892c5 authored by Jeff Piollé's avatar Jeff Piollé

added args in dataset creator

parent d357d707
......@@ -234,7 +234,7 @@ class Dataset(ABC):
"""
def __init__(self,
dataset: Union[str, xr.Dataset, 'Dataset', dict],
dataset: Union[str, xr.Dataset, 'Dataset', dict] = None,
mode: OpenMode = OpenMode.READ_ONLY,
view: Optional[Dict[str, slice]] = None,
dim_matching: Optional[Dict[str, str]] = DIM_MATCHING,
......@@ -244,7 +244,7 @@ class Dataset(ABC):
**kwargs):
"""
"""
if dataset is None:
if dataset is None and 'data_vars' not in kwargs:
raise ValueError('an url or dataset object must be provided')
object.__init__(self)
......@@ -275,8 +275,34 @@ class Dataset(ABC):
elif isinstance(dataset, xr.Dataset):
self.dataset = dataset
elif isinstance(dataset, dict):
self._create_from_dict(dataset)
elif isinstance(dataset, dict) or dataset is None:
# dataset provided as a xarray compliant dictionary or xarray
# dataset creation keywords
if dataset is not None and 'data_vars' in kwargs:
raise ValueError(
"data fields can not be provided both as main argument"
" and with data_vars keyword"
)
if dataset is None and 'data_vars' not in kwargs:
raise ValueError(
"either dataset or data_vars argument must be provided"
)
def data_as_tuple(arg):
if not isinstance(arg, dict) or len(arg) == 0:
raise TypeError("Badly formatted input: {}".format(arg))
return isinstance(arg[next(iter(arg))], tuple)
if dataset is None or data_as_tuple(dataset):
# xarray from classic arguments
dst = {'data_vars': dataset}
for kw in ['data_vars', 'coords', 'attrs', 'dims']:
if kw in kwargs:
dst[kw] = kwargs[kw]
self.dataset = xr.Dataset(**dst)
else:
# xarray from_dict
self._create_from_dict(dataset)
elif isinstance(dataset, Dataset):
self.dataset = dataset
else:
......@@ -364,7 +390,7 @@ class Dataset(ABC):
)
return result
def _create_from_dict(self, data):
def _create_from_dict(self, data, **kwargs):
"""
Create the dataset from a dict of fields and attributes.
"""
......@@ -477,7 +503,7 @@ class Dataset(ABC):
return '%Y-%m-%dT%H:%M:%SZ'
def has_field(self, fieldname: str) -> bool:
"""Return True if the field ``fieldname`` exists."""
"""Return True if the field ``fieldname`` exists in the dataset."""
return fieldname in self.fieldnames
def rename_field(self, name: str, newname: str) -> None:
......@@ -943,11 +969,11 @@ class Dataset(ABC):
])
@property
def geodimnames(self):
def geodimnames(self) -> Tuple[str]:
return tuple(self.geodims.keys())
@property
def geodimsizes(self):
def geodimsizes(self) -> Tuple[int]:
return tuple(self.geodims.values())
def get_field_fillvalue(self, fieldname: str) -> Any:
......@@ -1144,7 +1170,6 @@ class Dataset(ABC):
else:
raise NotImplementedError
def get_lon(self, **kwargs):
"""Return the longitude values of a feature.
......@@ -1412,8 +1437,8 @@ class Dataset(ABC):
return self.attrs['time_coverage_start']
@time_coverage_start.setter
def time_coverage_start(self,
date: Union[str, 'datetime.datetime'] = 'auto'):
def time_coverage_start(
self, date: Union[str, 'datetime.datetime'] = 'auto'):
"""Set the minimum sensing time of the data contained in the dataset
Args:
......@@ -1558,10 +1583,10 @@ class Dataset(ABC):
)
def save(self,
dest=None,
format='NETCDF4',
preprocess=None,
profile='default_saving_profile.yaml'
dest: Union[str, 'Dataset', None] = None,
format: str = 'NETCDF4',
preprocess: bool = False,
profile: str = 'default_saving_profile.yaml'
):
"""
Args:
......@@ -1621,7 +1646,7 @@ class Dataset(ABC):
self._sync_internals()
# apply new formatting rules
if preprocess is not None:
if preprocess:
saved_dataset = preprocess(self.original)
else:
saved_dataset = self._convert_format(profile)
......@@ -1740,7 +1765,7 @@ class Dataset(ABC):
if v not in dataset.variables:
logging.warning(
'Field {} not found in the dataset to save. Skipping'
.format(v)
.format(v)
)
continue
for att in attrs[v]:
......
......@@ -22,6 +22,9 @@ class NCDataset(Dataset):
Args:
format ({‘NETCDF4’, ‘NETCDF4_CLASSIC’, ‘NETCDF3_64BIT’,
‘NETCDF3_CLASSIC’}, optional): format (default is 'NETCDF4')
See:
:class:`~cerbere.dataset.dataset.Dataset`
"""
def __init__(
self,
......
......@@ -8,13 +8,9 @@ from __future__ import print_function
from abc import abstractmethod, ABC
from collections import OrderedDict
import datetime
import logging
import warnings
from typing import Tuple
from typing import Tuple, Mapping
import numpy as np
import pandas as pd
import xarray as xr
from ..dataset.field import Field
......@@ -30,18 +26,16 @@ class Feature(Dataset, ABC):
.. |dataset| replace:: :mod:`~cerbere.dataset`
.. |feature| replace:: :mod:`~cerbere.feature`
All feature| classes are inherited from this parent class.
All |feature| classes are inherited from this parent class.
A feature object is specialized |dataset| object as it
A feature object is a specialized |dataset| object as it
can only contains typed observations, where the type is defined by the data
structure or observation pattern. Being a |dataset| object,
it can be created from the same arguments as any|dataset| class instance.
Refer to |dataset|.
In most cases, a Feature object will be instantiated from a file content,
In many cases, a |feature| object will be instantiated from a file content,
read through a |dataset| class handling the specific format of this file.
For instance:
......@@ -74,7 +68,6 @@ class Feature(Dataset, ABC):
if not self._check_dimensions():
raise ValueError("data structure does not match the feature type")
@property
def _dataset_class(self):
if isinstance(self.dataset, Dataset):
......@@ -242,79 +235,22 @@ class Feature(Dataset, ABC):
for fieldname, field in child_fields.items():
self.add_field(field)
def get_values(self, fieldname, **kwargs):
def get_values(self, *args, **kwargs):
"""
Args:
expand (bool): expand the geocoordinate values to the full data
grid. This is only for lat, lon and times in the cases for
instance of cylindrical grids (where lat, lon are provided as
vectors) or average/climatological products (where time is
given as a single value for the whole grid). This returns a lat,
lon or times value for each grid pixel.
For more details on the other arguments, refer to:
:func:`cerbere.dataset.Dataset.get_values`
See:
:meth:`~cerbere.dataset.Dataset.get_values`
"""
if 'expand_dims' not in kwargs:
kwargs['expand_dims'] = self._feature_geodimnames
return super(Feature, self).get_values(
fieldname, **kwargs
*args, **kwargs
)
# def save(self, output=None, infer_cf_attrs=False, **kwargs):
# """
# Save the feature to a storage (file,...)
#
# Args:
# output (:class:`~cerbere.dataset.xarraydataset.XArrayDataset`):
# storage object which to save the feature data to.
#
# infer_cf_attrs (bool, optional): infer the values of some CF
# attributes from the data content.
# """
# if 'attrs' in kwargs:
# warnings.warn(
# "Deprecated `attrs` argument",
# FutureWarning
# )
# self.attrs.update(kwargs['attrs'])
#
# if infer_cf_attrs:
# bbox_attrs = [
# 'geospatial_lat_min',
# 'geospatial_lon_min',
# 'geospatial_lat_max',
# 'geospatial_lon_max'
# ]
# if any([self.attrs[_] is None for _ in bbox_attrs]):
# lonmin, latmin, lonmax, latmax = self.get_bbox()
# self.xarray.attrs['geospatial_lat_min'] = latmin
# self.xarray.attrs['geospatial_lon_min'] = lonmin
# self.xarray.attrs['geospatial_lat_max'] = latmax
# self.xarray.attrs['geospatial_lon_max'] = lonmax
#
# if output is None:
# if self.dataset is None:
# raise ValueError(
# "No mapper provided or attached to this feature. You must "
# "provide a mapper in `output` argument"
# )
# self.dataset.save(**kwargs)
#
# elif isinstance(output, XArrayDataset):
# if not output.is_empty():
# raise ValueError("output mapper must be empty")
# output.dataset = self.xarray
# output.save(**kwargs)
#
# else:
# raise TypeError("Unknown object type provided as output")
def extract(
self,
bbox=None,
footprint=None,
**kwargs):
**kwargs) -> 'Feature':
"""Extract a subset as a new ``Feature`` object.
The subset can be selected through one of the following:
......@@ -331,18 +267,17 @@ class Feature(Dataset, ABC):
"""
if bbox is not None or footprint is not None:
raise NotImplementedError
return self.__class__(super().extract(**kwargs))
return self.__class__(super(Feature, self).extract(**kwargs))
def extract_field(
self,
fieldname,
index=None,
padding=False,
prefix=None,
**kwargs
):
fieldname: str,
index: Mapping[str, slice] = None,
padding: bool = False,
prefix: str = None,
**kwargs):
"""
Create a copy of a field, limiting to a set of slices or indices, and
Create a copy of a field, or subset of a field, and
padding out as required.
Args
......
......@@ -15,15 +15,17 @@ __all__ = ['PointCollection']
class PointCollection(Feature):
"""
Feaure class for a set of randomly sampled points.
Feature class for a set of randomly sampled points.
A pointcollection is defined by geolocation coordinates lon(time), lat(time),
time(time), z(time) all having a single geolocation dimension: time.
A :class:`~cerbere.feature.pointcollection.PointCollection` feature is
defined by geolocation coordinates field: ``lon(time)``, ``lat(time)``,
``time(time)``, optionally ``z(time)``, all having a single geolocation
dimension: ``time``.
"""
@property
def _feature_geodimnames(self):
return ('time',)
return 'time',
def get_geocoord_dimnames(self, *args, **kwargs):
return ('time',)
return 'time',
......@@ -22,20 +22,20 @@ class Swath(Feature):
"""
def __init__(self, *args, **kwargs):
# create feature
super().__init__(
super(Swath, self).__init__(
*args,
**kwargs
)
@property
def _feature_geodimnames(self):
return ('row', 'cell')
return 'row', 'cell',
def get_geocoord_dimnames(self, fieldname, shape=None):
if fieldname == 'depth':
return ('depth',)
return 'depth',
else:
return ('row', 'cell',)
return 'row', 'cell',
......
......@@ -6,7 +6,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
from cerbere.feature.feature import Feature
......@@ -16,15 +15,17 @@ __all__ = ['Trajectory']
class Trajectory(Feature):
"""
Feaure class for the trajectory observation patterns.
Feature class for the trajectory observation patterns.
A trajectory is defined by geolocation coordinates lon(time), lat(time),
time(time), z(time) all having a single geolocation dimension: time.
A :class:`~cerbere.feature.trajectory.Trajectory` feature is defined by
geolocation coordinates field: ``lon(time)``, ``lat(time)``,
``time(time)``, optionally ``z(time)``, all having a single geolocation
dimension: ``time``.
"""
@property
def _feature_geodimnames(self):
return ('time',)
return 'time',
def get_geocoord_dimnames(self, *args, **kwargs):
return ('time',)
return 'time',
......@@ -82,6 +82,7 @@ setup(
'cerbere.mapper',
'cerbere.feature',
'cerbere.dataset',
'tests',
'cerbere.share'
],
include_package_data=True,
......
......@@ -149,6 +149,35 @@ class TestCylindricalGridFeature(TestFeature):
print("Feature from: test_create_feature_from_dict_datetime64_1d")
print(feat)
def test_create_feature_from_xarray_keywords(self):
feat = self.get_feature_class()(
{'myvar': (['lat', 'lon'], np.ones(shape=(160, 360)))},
coords={
'time': (['time'], [datetime(2018, 1, 1)], {
'units': 'seconds since 2001-01-01 00:00:00'}),
'lat': (['lat'], np.arange(-80, 80, 1)),
'lon': (['lon'], np.arange(-180, 180, 1))
},
attrs={'gattr1': 'gattr_val'}
)
self.assertIsInstance(feat, self.get_feature_class())
self.assertEqual(len(feat.get_field_dimnames('time')), 1)
print("Feature from: test_create_feature_from_xarray_keywords")
print(feat)
def test_create_feature_from_xarray_args(self):
feat = self.get_feature_class()({
'coords': {
'time': {'dims': ('time'), 'data': [datetime(2018, 1, 1)]},
'lat': {'dims': ('lat'), 'data': np.arange(-80, 80, 1)},
'lon': {'dims': ('lon',), 'data': np.arange(-180, 180, 1)}},
'attrs': {'gattr1': 'gattr_val'},
'data_vars': {'myvar': {'dims': ('lat', 'lon',), 'data': np.ones(shape=(160, 360))}}
})
self.assertIsInstance(feat, self.get_feature_class())
self.assertEqual(len(feat.get_field_dimnames('time')), 1)
print("Feature from: test_create_feature_from_xarray_args")
print(feat)
def test_expanded_latlon(self):
basefeat = self.define_base_feature()
......
......@@ -93,6 +93,20 @@ class TestXArrayDataset(unittest.TestCase):
print(dst)
self.assertIsInstance(dst, Dataset)
def test_init_from_xarray_keywords(self):
dst = Dataset(
{'myvar': (['lat', 'lon'], np.ones(shape=(160, 360)))},
coords={
'time': (['time'], [datetime(2018, 1, 1)], {
'units': 'seconds since 2001-01-01 00:00:00'}),
'lat': (['lat'], np.arange(-80, 80, 1)),
'lon': (['lon'], np.arange(-180, 180, 1))
},
attrs={'gattr1': 'gattr_val'}
)
print(dst)
self.assertIsInstance(dst, Dataset)
def test_dimension_names(self):
print("...from xarray.Dataset")
refdims = list(('lat', 'lon', 'time'))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment