...
 
Commits (3)
include *.txt
include *.rst
include install.sh
include pyhdf_setup.py.patch
include docs/*.rst
include cerbere/share/default_saving_profile.yaml
include cerbere/share/ghrsst_saving_profile.yaml
......@@ -12,6 +12,9 @@ import inspect
import logging
import os
from typing import Mapping, Any
import yaml
import numpy
# Module level default values
DEFAULT_TIME_UNITS = 'seconds since 1981-01-01 00:00:00'
......@@ -19,29 +22,70 @@ DEFAULT_TIME_UNITS = 'seconds since 1981-01-01 00:00:00'
CF_AUTHORITY = 'CF-1.7'
def default_global_attrs(
profile: str='global_attributes_profile.cfg'
def default_fill_value(obj):
"""Returns the default fill value for a specific type"""
return numpy.ma.default_fill_value(obj)
def default_profile(
profile: str='default_saving_profile.yaml'
) -> Mapping[str, Any]:
"""Returns a list of default global attributes
"""Returns a list of default settings for storing data and metadata
The settings are defined in a YAML file and can include a liste of global
attributes (and default values), variable attributes (and default values)
and variable encoding (scale_factor, add_offset,...) for a given storage
format. If no profile file is provided, the function returns the built-in
default settings provided in the ``cerbere.share`` folder of the package.
These settings can be customized by defining your own in a new YAML profile
file and calling the function with ``profile`` argument. If the profile file
is stored in the user home dir in ``~.cerbere`` folder, providing only the
file basename is fine, otherwise provide the full path to the profile file.
By default the function returns the built-in default dict of attributes
provided in the ``cerbere.share`` folder of the package.
A custom file can be defined in YAML, using the following format:
This list can be customized by defining your own list of attributes in a new
attribute profile file and calling the function with ``profile`` argument.
If the profile file is stored in your home dir in ``~.cerbere`` folder, you
can just provide the file name, otherwise provide the full path to your
profile file.
.. code-block:: yaml
---
# Defines first the global attributes
attributes:
# define here a dictionary of global attributes (possibly with
# default value in the following form
#
# with default value
attr1: val1
# with no default value (None)
attr2:
# Define the variable attributes
variables:
# a dictionary of variables containing a dictionary of variable
# attributes
variable1:
varattr1: varval1
varattr2:
# Define the encoding attributes, meaningful for a specific format. For
# instance for NetCDF4, allowed encoding attributes include:
# scale_factor, add_offset, dtype, least_significant_digit,...
encoding:
# a dictionary of variables containing a dictionary of encoding
# attributes
variable1:
scale_factor: 0.01
add_offset: 273.15
dtype: int32
Args:
profile: the path to or filename of the attribute profile configuration
file
profile: the path to (or filename of) the settings profile file
Returns:
a dict of attributes and values
"""
if profile is None:
profile = 'global_attributes_profile.cfg'
profile = 'default_saving_profile.yaml'
if not os.path.exists(profile):
# get path from home dir or default
path = os.path.join(
......@@ -52,7 +96,7 @@ def default_global_attrs(
if not os.path.exists(path):
path = os.path.join(
os.path.dirname(inspect.getfile(inspect.currentframe())),
'../share/{}'.format(profile)
'share/{}'.format(profile)
)
logging.warning(
'Using default global attribute file: {}'.format(path)
......@@ -61,13 +105,27 @@ def default_global_attrs(
path = profile
# read attributes
config = configparser.RawConfigParser()
config.optionxform = str
config.read(path)
with open(os.path.abspath(path), 'r') as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
raise
globattrs = collections.OrderedDict([])
varattrs = collections.OrderedDict([])
encoding = collections.OrderedDict([])
attrs = collections.OrderedDict([])
for section in config.sections():
for att in config.options(section):
attrs[att] = config.get(section, att)
if 'attributes' in config:
for att in config['attributes']:
globattrs[att] = config['attributes'][att]
if 'variables' in config and config['variables'] is not None:
for v in config['variables']:
for att in config['variables'][v]:
varattrs[v] = config['variables'][v]
if 'encoding' in config and config['encoding'] is not None:
for v in config['encoding']:
for att in config['encoding'][v]:
encoding[v] = config['encoding'][v]
return attrs
return globattrs, varattrs, encoding
......@@ -24,7 +24,7 @@ import shapely.geometry
import xarray as xr
from ..cfconvention import (
default_global_attrs, CF_AUTHORITY, DEFAULT_TIME_UNITS
default_profile, default_fill_value, CF_AUTHORITY, DEFAULT_TIME_UNITS
)
from .field import Field
......@@ -1556,7 +1556,7 @@ class Dataset(ABC):
dest=None,
format='NETCDF4',
preprocess=None,
attr_profile='global_attributes_profile.cfg'
profile='default_saving_profile.yaml'
):
"""
Args:
......@@ -1570,7 +1570,7 @@ class Dataset(ABC):
dest=dest,
format=format,
preprocess=preprocess,
attr_profile=attr_profile
profile=profile
)
# save as a new file
......@@ -1595,7 +1595,7 @@ class Dataset(ABC):
return dest.save(
format=format,
preprocess=preprocess,
attr_profile=attr_profile
profile=profile
)
......@@ -1619,7 +1619,7 @@ class Dataset(ABC):
if preprocess is not None:
saved_dataset = preprocess(self.original)
else:
saved_dataset = self._convert_format(attr_profile)
saved_dataset = self._convert_format(profile)
# save to chosen format
if 'NETCDF' in self._format:
......@@ -1636,7 +1636,7 @@ class Dataset(ABC):
if '_FillValue' in saved_dataset[v].attrs:
fillv = saved_dataset[v].attrs.pop('_FillValue')
else:
fillv = np.ma.maximum_fill_value(saved_dataset[v].dtype)
fillv = default_fill_value(saved_dataset[v].dtype)
saved_dataset[v].encoding['_FillValue'] = fillv
saved_dataset.to_netcdf(
path=self._url,
......@@ -1679,32 +1679,92 @@ class Dataset(ABC):
.format(attr, attrval))
self.original.attrs[attr] = attrval
def add_default_attrs(self, *args, **kwargs) -> None:
def add_global_attrs(
cls,
dataset: 'xr.Dataset',
attrs: Mapping[str, Any]) -> None:
"""Add default attributes to the dataset from a attribute definition
file.
A custom profile of attributes can be provided, using ``profile``
argument, otherwise the default cerbere profile will be used.
Args:
attrs (dict): the global attributes to add when saving the dataset
"""
# add attributes
# don't override previous attribute values
for att in attrs:
if att not in dataset.attrs:
dataset.attrs[att] = attrs[att]
def add_field_attrs(
cls,
dataset: 'xr.Dataset',
attrs: Mapping[str, Any]) -> None:
"""Add field attributes to the dataset from a attribute definition
file.
Refer to :func:`~cerbere.cfconvention.default_global_attrs`
Args:
attrs (dict): the field attributes to add when saving the dataset
"""
defattrs = default_global_attrs(*args, **kwargs)
# add attributes from attribute file template
# add attributes
# don't override previous attribute values
for att in defattrs:
if att not in self.attrs:
self.attrs[att] = defattrs[att]
for v in attrs:
if v not in dataset.variables:
logging.warning(
'Field {} not found in the dataset to save. Skipping'
.format(v)
)
continue
for att in attrs[v]:
if att not in dataset.variables[v].attrs:
dataset.variables[v].attrs[att] = attrs[v][att]
def add_field_encoding(
cls,
dataset: 'xr.Dataset',
attrs: Mapping[str, Any]) -> None:
"""Add field encoding to the dataset from a attribute definition
file.
Args:
attrs (dict): the field encoding attributes to add when saving the
dataset
"""
# add attributes
# overrides previous attribute values!!
for v in attrs:
if v not in dataset.variables:
logging.warning(
'Field {} not found in the dataset to save. Skipping'
.format(v)
)
continue
for att in attrs[v]:
dataset.variables[v].encoding[att] = attrs[v][att]
def _convert_format(self, attr_profile=None, **kwargs):
def _convert_format(self, profile=None, **kwargs):
"""Implement specific formatting rules to a dataset.
Used before saving the dataset to match some specific format
requirements when writing the dataset on file.
A custom format profile can be provided, using ``profile``
argument, otherwise the default cerbere profile will be used.
Refer to :func:`~cerbere.cfconvention.default_profile`
"""
dataset = self.original.copy()
# add attributes from attribute file template
self.add_default_attrs(profile=attr_profile)
# read custom settings for the dataset saving
gattrs, vattrs, encoding = default_profile(profile=profile)
# add global attributes from custom settings
self.add_global_attrs(dataset, gattrs)
# add field attributes from custom settings
self.add_field_attrs(dataset, vattrs)
# add field encoding from custom settings
self.add_field_encoding(dataset, encoding)
return dataset
......@@ -1712,13 +1772,16 @@ class Dataset(ABC):
def _format_nc_attrs(cls, dataset, *args, **kwargs):
"""format the attributes in an acceptable type for netCDF"""
# global attrs
invalid_attrs = []
for att in dataset.attrs:
# remove None attributes
if dataset.attrs[att] is None:
dataset.attrs.pop(att)
invalid_attrs.append(att)
continue
# convert datetime objects to string
dataset.attrs[att] = cls._format_attr(dataset.attrs[att])
for att in invalid_attrs:
dataset.attrs.pop(att)
# variable attrs
for varname in dataset._variables:
......
......@@ -14,7 +14,7 @@ import warnings
import numpy
from cerbere.dataset.ncdataset import NCDataset
from cerbere.dataset.dataset import CDM_TYPES
from cerbere.dataset.dataset import CDM_TYPES, OpenMode
# common matching dimensions and fields in netCDF files
......@@ -50,7 +50,7 @@ class GHRSSTNCDataset(NCDataset):
# url needs to be opened first in order to guess default datamodel
super()._open(**kwargs)
if self._mode != 'w':
if self._mode != OpenMode.WRITE_NEW:
# recompose and unpack time variable if required
self._unsplit_time_coord(
......@@ -163,13 +163,27 @@ class GHRSSTNCDataset(NCDataset):
self.dataset = dataset
def _convert_format(self, path=None, **kwargs):
dataset = super()._convert_format(
profile='ghrsst_global_attributes_profile.cfg'
)
def _convert_format(self, profile='ghrsst_saving_profile.yaml'):
"""Implement specific formatting rules to a dataset.
Used before saving the dataset to match some specific format
requirements when writing the dataset on file.
"""
dataset = super()._convert_format(profile=profile)
# fill in specific GHRSST attributes
dataset.attrs['uuid'] = str(uuid.uuid1())
dataset.attrs['date_modified'] = datetime.datetime.now()
dataset.attrs['date_issued'] = dataset.attrs['date_modified']
dataset.attrs['date_metadata_modified'] = dataset.attrs['date_modified']
dataset.attrs['geospatial_bounds'] = \
'POLYGON(({0} {1}, {2} {1}, {2} {3}, {0} {3}, {0} {1}))'.format(
dataset.attrs['geospatial_lat_min'],
dataset.attrs['geospatial_lon_min'],
dataset.attrs['geospatial_lat_max'],
dataset.attrs['geospatial_lon_max']
)
# fill in some attributes
dataset.attrs['uuid'] = uuid.uuid4()
return dataset
def _get_matching_dimname(self, dimname):
......@@ -438,25 +452,3 @@ class GHRSSTNCDataset(NCDataset):
raise
def _convert_format(self, attr_file=None):
"""Implement specific formatting rules to a dataset.
Used before saving the dataset to match some specific format
requirements when writing the dataset on file.
"""
dataset = super()._convert_format(attr_file=attr_file)
# fill in specific GHRSST attributes
dataset.attrs['uuid'] = str(uuid.uuid1())
dataset.attrs['date_modified'] = datetime.datetime.now()
dataset.attrs['date_issued'] = dataset.attrs['date_modified']
dataset.attrs['date_metadata_modified'] = dataset.attrs['date_modified']
dataset.attrs['geospatial_bounds'] = \
'POLYGON(({0} {1}, {2} {1}, {2} {3}, {0} {3}, {0} {1}))'.format(
dataset.attrs['geospatial_lat_min'],
dataset.attrs['geospatial_lon_min'],
dataset.attrs['geospatial_lat_max'],
dataset.attrs['geospatial_lon_max']
)
return dataset
......@@ -9,7 +9,7 @@ import datetime
import logging
from .dataset import Dataset
from ..cfconvention import default_global_attrs
from ..cfconvention import default_profile
LOGGER = logging.getLogger()
......@@ -37,10 +37,10 @@ class NCDataset(Dataset):
def _convert_format(
self,
attr_profile='global_attributes_profile.cfg',
profile='ghrsst_saving_profile.yaml',
**kwargs
):
dataset = super()._convert_format(attr_profile, **kwargs)
dataset = super()._convert_format(profile, **kwargs)
# fill in some attributes
creation_date = datetime.datetime.now()
......
---
# Defines the list and default values of the global attributes of a Cerbere new feature
attributes:
# Description
id:
naming_authority: fr.ifremer.cersat
title:
summary:
cdm_data_type:
keywords:
acknowledgement:
processing_level:
metadata_link:
# Observation
platform:
platform_type:
instrument:
instrument_type:
band:
# Conventions
Metadata_Conventions: Climate and Forecast (CF) 1.7, Attribute Convention for Data Discovery (ACDD) 1.3
standard_name_vocabulary: NetCDF Climate and Forecast (CF) Metadata Convention
keywords_vocabulary: NASA Global Change Master Directory (GCMD) Science Keywords
format_version:
# Authorship
institution: Institut Francais de Recherche et d'Exploitation de la Mer/Centre de Recherche et d'Exploitation satellitaire
institution_abbreviation: ifremer/cersat
project:
license:
publisher_name: ifremer/cersat
publisher_url: http://cersat.ifremer.fr
publisher_email: cersat@ifremer.fr
creator_name:
creator_url:
creator_email:
references:
# Traceability
processing_software: Cersat/Cerbere 1.0
product_version:
uuid:
history:
source:
source_version:
# BBox
geospatial_lat_min:
geospatial_lat_max:
geospatial_lat_units: degrees
geospatial_lon_min:
geospatial_lon_max:
geospatial_lon_units: degrees
# Vertical
geospatial_vertical_min:
geospatial_vertical_max:
geospatial_vertical_units: meters above mean sea level
geospatial_vertical_positive: up
# Resolution
spatial_resolution:
geospatial_lat_resolution:
geospatial_lon_resolution:
# Temporal
time_coverage_start:
time_coverage_end:
time_coverage_resolution:
fields:
encoding:
---
# Defines the list and default values of the global attributes of a Cerbere new feature
attributes:
# Description
id:
naming_authority: fr.ifremer.cersat
title:
summary:
cdm_data_type:
keywords: Oceans > Ocean Temperature > Sea Surface Temperature
acknowledgement: "Please acknowledge the use of these data with the following statement: these data were produced by the Centre de Recherche et d'Exploitation Satellitaire (CERSAT), at IFREMER, Plouzane (France)"
processing_level:
metadata_link:
comment:
file_quality_level:
# Observation
platform:
platform_type:
instrument:
instrument_type:
band:
# Conventions
Metadata_Conventions: Climate and Forecast (CF) 1.7, Attribute Convention for Data Discovery (ACDD) 1.3
standard_name_vocabulary: NetCDF Climate and Forecast (CF) Metadata Convention
keywords_vocabulary: NASA Global Change Master Directory (GCMD) Science Keywords
format_version: GDSv1.2
gds_version_id:
platform_vocabulary: CEOS mission table
instrument_vocabulary: CEOS instrument table
# Authorship
institution: Institut Francais de Recherche et d'Exploitation de la Mer/Centre de Recherche et d'Exploitation satellitaire
institution_abbreviation: Ifremer/CERSAT
project: GHRSST
program: CMEMS
license: GHRSST protocol describes data use as free and open.
publisher_name: CERSAT
publisher_url: http://cersat.ifremer.fr
publisher_email: cersat@ifremer.fr
publisher_institution: Ifremer
publisher_type: institution
creator_name: CERSAT
creator_url: http://cersat.ifremer.fr
creator_email: cersat@ifremer.fr
creator_type: institution
creator_institution: Ifremer
contributor_name:
contributor_role:
references:
# Traceability
processing_software: Telemachus 1.0
product_version: 3.0
uuid:
history:
source:
source_version:
# BBox
geospatial_lat_min:
geospatial_lat_max:
geospatial_lat_units: degrees
geospatial_lon_min:
geospatial_lon_max:
geospatial_lon_units: degrees
geospatial_bounds:
geospatial_bounds_crs: WGS84
# Resolution
spatial_resolution:
geospatial_lat_resolution:
geospatial_lon_resolution:
# Temporal
time_coverage_start:
time_coverage_end:
time_coverage_resolution:
fields:
encoding:
sea_surface_temperature:
dtype: int32
_FillValue: -32768
scale_factor: 0.01
add_offset: 273.15
......@@ -80,14 +80,11 @@ setup(
'cerbere',
'cerbere.datamodel',
'cerbere.mapper',
'cerbere.feature',
'cerbere.dataset',
'tests'
'cerbere.feature',
'cerbere.dataset',
'cerbere.share'
],
package_data={'share': [
'global_attributes_profile.cfg',
'ghrsst_global_attributes_profile.cfg'
]},
include_package_data=True,
long_description=(
"a python API to manipulate spatial and temporal observations, "
"compatible with many existing formats, normalizing and typing the "
......@@ -103,6 +100,5 @@ setup(
'python-dateutil>=2.1'
],
include_dirs=[get_include()],
zip_safe=False,
include_package_data=True
zip_safe=False
)
# Defines the list and default values of the global attributes of a Cerbere new feature
[DESCRIPTION]
id =
naming_authority = fr.ifremer.cersat
title =
summary =
cdm_data_type =
keywords = Oceans > Ocean Temperature > Sea Surface Temperature
acknowledgement = Please acknowledge the use of these data with the following statement: these data were produced by the Centre de Recherche et d'Exploitation Satellitaire (CERSAT), at IFREMER, Plouzane (France)
processing_level =
metadata_link =
history =
comment =
file_quality_level =
[OBSERVATION]
platform =
platform_type =
instrument =
instrument_type =
band =
[CONVENTIONS]
Conventions =
Metadata_Conventions = Climate and Forecast (CF) 1.7, Attribute Convention for Data Discovery (ACDD) 1.3
standard_name_vocabulary = NetCDF Climate and Forecast (CF) Metadata Convention
keywords_vocabulary = NASA Global Change Master Directory (GCMD) Science Keywords
format_version = GDSv1.2
gds_version_id =
naming_authority =
platform_vocabulary = CEOS mission table
instrument_vocabulary = CEOS instrument table
[AUTHORSHIP]
institution = Institut Francais de Recherche et d'Exploitation de la Mer/Centre de Recherche et d'Exploitation satellitaire
institution_abbreviation = Ifremer/CERSAT
project = GHRSST
program =
license = GHRSST protocol describes data use as free and open.
publisher_name = CERSAT
publisher_url = http://cersat.ifremer.fr
publisher_email = cersat@ifremer.fr
publisher_institution = Ifremer
publisher_type = institution
creator_name = CERSAT
creator_url = http://cersat.ifremer.fr
creator_email = cersat@ifremer.fr
creator_type = institution
creator_institution = Ifremer
acknowledgment =
contributor_name =
contributor_role =
references =
[TRACEABILITY]
processing_software = Telemachus 1.0
product_version = 3.0
uuid =
history =
source =
source_version =
netcdf_version_id =
date_created =
date_modified =
date_issued =
date_metadata_modified =
[BBOX]
geospatial_lat_min =
geospatial_lat_max =
geospatial_lat_units = degrees
geospatial_lon_min =
geospatial_lon_max =
geospatial_lon_units = degrees
geospatial_bounds =
geospatial_bounds_crs = WGS84
#[VERTICAL]
#geospatial_vertical_min =
#geospatial_vertical_max =
#geospatial_vertical_units = meters above mean sea level
#geospatial_vertical_positive = up
[RESOLUTION]
spatial_resolution =
geospatial_lat_resolution =
geospatial_lon_resolution =
[TEMPORAL]
time_coverage_start =
time_coverage_end =
time_coverage_resolution =
# Defines the list and default values of the global attributes of a Cerbere new feature
[DESCRIPTION]
id =
naming_authority = fr.ifremer.cersat
title =
summary =
cdm_data_type =
keywords =
acknowledgement =
processing_level =
metadata_link =
[OBSERVATION]
platform =
platform_type =
instrument =
instrument_type =
band =
[CONVENTIONS]
Metadata_Conventions = Climate and Forecast (CF) 1.7, Attribute Convention for Data Discovery (ACDD) 1.3
standard_name_vocabulary = NetCDF Climate and Forecast (CF) Metadata Convention
keywords_vocabulary = NASA Global Change Master Directory (GCMD) Science Keywords
format_version =
[AUTHORSHIP]
institution = Institut Francais de Recherche et d'Exploitation de la Mer/Centre de Recherche et d'Exploitation satellitaire
institution_abbreviation = ifremer/cersat
project =
license =
publisher_name = ifremer/cersat
publisher_url = http://cersat.ifremer.fr
publisher_email = cersat@ifremer.fr
creator_name =
creator_url =
creator_email =
references =
[TRACEABILITY]
processing_software = Cersat/Cerbere 1.0
product_version =
uuid =
history =
source =
source_version =
[BBOX]
geospatial_lat_min =
geospatial_lat_max =
geospatial_lat_units = degrees
geospatial_lon_min =
geospatial_lon_max =
geospatial_lon_units = degrees
[VERTICAL]
geospatial_vertical_min =
geospatial_vertical_max =
geospatial_vertical_units = meters above mean sea level
geospatial_vertical_positive = up
[RESOLUTION]
spatial_resolution =
geospatial_lat_resolution =
geospatial_lon_resolution =
[TEMPORAL]
time_coverage_start =
time_coverage_end =
time_coverage_resolution =
......@@ -176,12 +176,13 @@ class TestXArrayDataset(unittest.TestCase):
def test_var_get_global_attributes(self):
attrs = {
'gattr1': 'gattr1_val', 'gattr2': 'gattr2_val',
'time_coverage_start': None, 'time_coverage_end': None
}
print("...from xarray.Dataset")
dst = Dataset(self.xrdataset)
self.assertIsInstance(dst.attrs, dict)
attrs['time_coverage_start'] = dst.time_coverage_start
attrs['time_coverage_end'] = dst.time_coverage_end
self.assertDictEqual(dict(dst.attrs), attrs)
print("...from file")
......@@ -203,14 +204,14 @@ class TestXArrayDataset(unittest.TestCase):
print("...from xarray.Dataset")
dst = Dataset(self.xrdataset)
print(dst.dataset.attrs)
self.assertEqual(dst.time_coverage_start, None)
self.assertEqual(dst.time_coverage_end, None)
self.assertEqual(dst.time_coverage_start, datetime(2018, 1, 1))
self.assertEqual(dst.time_coverage_end, datetime(2018, 1, 1))
print("...from file")
dst = Dataset('test_xarraydataset.nc')
print(dst.dataset.attrs)
self.assertEqual(dst.time_coverage_start, None)
self.assertEqual(dst.time_coverage_end, None)
self.assertEqual(dst.time_coverage_start, datetime(2018, 1, 1))
self.assertEqual(dst.time_coverage_end, datetime(2018, 1, 1))
def test_print(self):
print("Test print")
......@@ -344,7 +345,7 @@ class TestXArrayDataset(unittest.TestCase):
os.remove("test_profile.nc")
dst.save(
dest='test_profile.nc',
attr_profile='global_attributes_profile.cfg'
profile='default_saving_profile.yaml'
)
def test_opening_file_not_existing(self):
......
......@@ -57,7 +57,7 @@ class GHRSSTNCDatasetL2PChecker(Checker, unittest.TestCase):
{'time': {'dims': ('time'), 'data': [datetime(2018, 1, 1)]},
'lat': {'dims': ('lat'), 'data': np.arange(-80, 80, 1)},
'lon': {'dims': ('lon',), 'data': np.arange(-180, 180, 1)},
'myvar': {'dims': ('lat', 'lon',),
'sea_surface_temperature': {'dims': ('lat', 'lon',),
'data': np.ones(shape=(160, 360))}
}
)
......@@ -65,4 +65,4 @@ class GHRSSTNCDatasetL2PChecker(Checker, unittest.TestCase):
fname = 'ghrsst_file.nc'
if os.path.exists(fname):
os.remove(fname)
ghrsstf.save(fname)
ghrsstf.save(fname, profile='ghrsst_saving_profile.yaml')