Commit 2470b768 authored by PIOLLE's avatar PIOLLE

fixed output format customization

parent b1f5b7f1
......@@ -779,6 +779,11 @@ class Dataset(ABC):
# all forms of time values
return self._make_coord('time', values, None, 'time', 'time')
def has_coord(self, coord: str) -> bool:
"""Return True if the coordinate field is defined"""
return coord in self._std_dataset.coords
@property
def sizes(self) -> MutableMapping[str, int]:
"""Mapping from dimension names to lengths."""
......@@ -1437,8 +1442,9 @@ class Dataset(ABC):
return self.attrs['time_coverage_end']
@time_coverage_end.setter
def time_coverage_end(self,
date: Union[str, 'datetime.datetime'] = 'auto'):
def time_coverage_end(
self,
date: Union[str, 'datetime.datetime'] = 'auto'):
"""Set the maximum sensing time of the data contained in the dataset
Args:
......@@ -1495,11 +1501,11 @@ class Dataset(ABC):
fatt = next((x for x in attrs if x == att), None)
if fatt is not None:
limits.append(attrs[fatt])
if len(bbox) == 4:
self.attrs['bbox'] = shapely.geometry.box(*bbox)
if len(limits) == 4:
self.attrs['bbox'] = shapely.geometry.box(*limits)
# estimate from the lat/lon
elif self.has_coordinate('lat') and self.has_coordinate('lon'):
elif self.has_coord('lat') and self.has_coord('lon'):
lats = self.get_lat()
lons = self.get_lon()
self.attrs['bbox'] = shapely.geometry.box(
......@@ -1520,7 +1526,6 @@ class Dataset(ABC):
if self.bbox is not None:
return self.bbox.wkt
def _get_attr_value(self, att: str) -> Any:
"""Return global attribute value or None if the attribute does not
exists.
......
......@@ -11,6 +11,7 @@ import logging
import uuid
import warnings
import netCDF4
import numpy
from cerbere.dataset.ncdataset import NCDataset
......@@ -172,17 +173,17 @@ class GHRSSTNCDataset(NCDataset):
dataset = super()._convert_format(profile=profile)
# fill in specific GHRSST attributes
dataset.attrs['netcdf_version_id'] = netCDF4.getlibversion()
dataset.attrs['uuid'] = str(uuid.uuid1())
dataset.attrs['date_modified'] = datetime.datetime.now()
dataset.attrs['date_issued'] = dataset.attrs['date_modified']
dataset.attrs['date_metadata_modified'] = dataset.attrs['date_modified']
dataset.attrs['geospatial_bounds'] = \
'POLYGON(({0} {1}, {2} {1}, {2} {3}, {0} {3}, {0} {1}))'.format(
dataset.attrs['geospatial_lat_min'],
dataset.attrs['geospatial_lon_min'],
dataset.attrs['geospatial_lat_max'],
dataset.attrs['geospatial_lon_max']
)
dataset.attrs['geospatial_bounds'] = self.wkt_bbox
for k, v in dataset.data_vars.items():
if 'time' not in v.dims:
dataset[k] = v.expand_dims('time', axis=0)
return dataset
......
......@@ -92,10 +92,6 @@ class Feature(Dataset, ABC):
"""Return the type of the feature"""
return self.__class__.__name__
def has_coordinate(self, coord: str) -> bool:
"""Return True if the coordinate variable is defined"""
return coord in self._std_dataset.coords
@property
@abstractmethod
def _feature_geodimnames(self) -> Tuple[str, ...]:
......
......@@ -5,7 +5,7 @@
attributes:
# Description
id:
naming_authority: fr.ifremer.cersat
naming_authority: org.ghrsst
title:
summary:
cdm_data_type:
......@@ -24,6 +24,7 @@ attributes:
band:
# Conventions
Conventions: CF 1.7, ACDD 1.3, ISO 8601
Metadata_Conventions: Climate and Forecast (CF) 1.7, Attribute Convention for Data Discovery (ACDD) 1.3
standard_name_vocabulary: NetCDF Climate and Forecast (CF) Metadata Convention
keywords_vocabulary: NASA Global Change Master Directory (GCMD) Science Keywords
......@@ -33,9 +34,9 @@ attributes:
instrument_vocabulary: CEOS instrument table
# Authorship
institution: Institut Francais de Recherche et d'Exploitation de la Mer/Centre de Recherche et d'Exploitation satellitaire
institution: Institut Francais de Recherche et d'Exploitation de la Mer (Ifremer) Centre de Recherche et d'Exploitation satellitaire (CERSAT)
institution_abbreviation: Ifremer/CERSAT
project: GHRSST
project: Group for High Resolution Sea Surface Temperature (GHRSST)
program: CMEMS
license: GHRSST protocol describes data use as free and open.
publisher_name: CERSAT
......@@ -55,10 +56,15 @@ attributes:
# Traceability
processing_software: Telemachus 1.0
product_version: 3.0
netcdf_version_id:
uuid:
history:
source:
source_version:
date_created:
date_modified:
date_issued:
date_metadata_modified:
# BBox
geospatial_lat_min:
......@@ -82,12 +88,158 @@ attributes:
fields:
lat:
standard_name: latitude
units: degrees_north
valid_range: -90, 90
comment: geographical coordinates, WGS84 projection
coordinates: lon lat
lon:
standard_name: longitude
units: degrees_east
valid_range: -180., 180
comment: geographical coordinates, WGS84 projection
time:
long_name: reference time of sst file
standard_name: time
sea_surface_temperature:
long_name: sea surface foundation temperature
standard_name: sea_surface_foundation_temperature
units: kelvin
valid_range: -2., 50.
sst_dtime:
long_name: time difference from reference time
units: seconds
valid_range: -86400, 86400
comment: time plus sst_dtime gives each measurement time
solar_zenith_angle:
long_name: solar zenith angle
units: angular_degree
valid_range: 0, 180
comment: the solar zenith angle at the time of the SST observations
sses_bias:
long_name: SSES bias estimate
units: kelvin
valid_range: -2.54, 2.54
comment: Bias estimate derived using the techniques described at http://www.ghrsst.org/SSES-Description-of-schemes.html
sses_standard_deviation:
long_name: SSES standard deviation
valid_range: 0., 2.54
comment: Standard deviation estimate derived using the techniques described at http://www.ghrsst.org/SSES-Description-of-schemes.html
quality_level:
long_name: quality level of SST pixel
valid_range: 0, 5
flag_meanings: no_data bad_data worst_quality low_quality acceptable_quality best_quality
flag_values: 0, 1, 2, 3, 4, 5
comment: These are the overall quality indicators and are used for all GHRSST SSTs
or_latitude:
units: degrees_north
valid_range: -80., 80
long_name: original latitude of the SST value
standard_name: latitude
or_longitude:
units: degrees_east
valid_range: -180., 180.
long_name: original longitude of the SST value
standard_name: longitude
or_number_of_pixels:
long_name: original number of pixels from the L2Ps contributing to the SST value
valid_range: -32767, 32767
satellite_zenith_angle:
long_name: satellite zenith angle
units: angular_degree
comment: the satellite zenith angle at the time of the SST observations
valid_min: 0
valid_max: 90
adjusted_sea_surface_temperature:
long_name: adjusted collated sea surface temperature
standard_name: sea_surface_subskin_temperature
units: kelvin
comment: bias correction using a multi-sensor reference field
valid_min: -300
valid_max: 4500
encoding:
lat:
dtype: float32
least_significant_digit: 3
lon:
dtype: float32
least_significant_digit: 3
sea_surface_temperature:
dtype: int32
_FillValue: -32768
scale_factor: 0.01
add_offset: 273.15
sst_dtime:
_FillValue: -2147483648
add_offset: 0
scale_factor: 1
dtype: int64
solar_zenith_angle:
_FillValue: -128
add_offset: 90.
scale_factor: 1.
quality_level:
_FillValue: -128
dtype: byte
sses_bias:
_FillValue: -128
dtype: byte
add_offset: 0.
scale_factor: 0.02
sses_standard_deviation:
_FillValue: -128
dtype: byte
add_offset: 2.54
scale_factor: 0.02
or_latitude:
dtype: int32
_FillValue: -32768
add_offset: 0.
scale_factor: 0.01
units: degrees_north
or_longitude:
dtype: int32
_FillValue: -32768
add_offset: 0.
scale_factor: 0.01
or_number_of_pixels:
dtype: byte
_FillValue: -32768
add_offset: 0
scale_factor: 1
satellite_zenith_angle:
dtype: byte
_FillValue: -128
add_offset: 0.
scale_factor: 1.
adjusted_sea_surface_temperature:
dtype: int32
_FillValue: -32768
add_offset: 273.15
scale_factor: 0.01
===============
Format profiles
===============
:mod:`cerbere` was designed to easy data management tasks. It can be used to
easily convert data files from one format to another, or to reformat data to
another formatting convention matching a specific project requirements. For some
examples of such operation, refer also to :ref:`format_profile`.
When saving the content of a :mod:`~cerbere.dataset` class object, the output
file is formatted following the default settings and conventions implemented in
the :func:`~cerbere.dataset.dataset.Dataset.save` method of this class.
The format can also be refined and customized through a external format profile
file that can be passed on when saving a dataset. It provides the directives to
properly format a dataset, using some convention or default settings. In
particular, it can define:
* the list of global metadata attributes (and default value)
* the list of field metadata attributes (and default value) such as units,
standard name, comment, description, reference,...
* the encoding parameters used when writing the data on disk, such as (for a
NetCDF writer) scale factor, add offset, number of significant digits,
compression,...
Let's format for instance data to GHRSST format (as defined in GDS 2.1
document). We define these requirements in a profile file as follow:
This profile file can be passed on to the netCDF dedicated dataset object,
provided by :class:`cerbere.dataset.ncdataset.NCDataset` class when saving the
object content to disk:
.. code-block:: python
# create a dataset object
from cerbere.dataset.ncdataset import NCDataset
dst = NCDataset()
# save it in a NetCDF file, using above profile and NCDataset class
dst.save('test.nc')
Note that the attributes already defined in the dataset object are not
overridden by the default values in the profile file. Attributes not defined in
the dataset or feature object to be saved will fall back to their default value
defined in the format profile.
.. code-block:: python
# create a NCDataset dataset object and fill in some attributes
# save it, using above profile
......@@ -33,6 +33,7 @@ User guide
creating_datasets
creating_features
format_profile
Available dataset classes
=========================
......
......@@ -16,30 +16,3 @@ Use case 2: changing format convention
Use case 3: using a format profile
----------------------------------
The format can also be customized through a external format profile file that
can be passed on when saving a dataset. It provides the directives to properly
format a dataset, using some convention or default settings.
Let's format for instance data to GHRSST format (as defined in GDS 2.1
document). We define these requirements in a profile file as follow:
This profile file can be passed on to the netCDF dedicated dataset object,
provided by :class:`cerbere.dataset.ncdataset.NCDataset` class when saving the
object content to disk:
.. code-block:: python
# create a dataset object
# save it in a NetCDF file, using above profile and NCDataset class
Note that the attributes already defined in the dataset object are not
overridden by the default values in the profile file.
.. code-block:: python
# create a NCDataset dataset object and fill in some attributes
# save it, using above profile
......@@ -61,8 +61,9 @@ class GHRSSTNCDatasetL2PChecker(Checker, unittest.TestCase):
'data': np.ones(shape=(160, 360))}
}
)
ghrsstf = GHRSSTNCDataset(grid)
fname = 'ghrsst_file.nc'
if os.path.exists(fname):
os.remove(fname)
ghrsstf.save(fname, profile='ghrsst_saving_profile.yaml')
ghrsstf = GHRSSTNCDataset(fname, mode='w')
grid.save(ghrsstf, profile='ghrsst_saving_profile.yaml')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment