diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index 798b2ceb27..7d126ffa23 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -310,7 +310,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SOILMOISTURE | sm (Eday, Lmon), smStderr (Eday) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| ESACCI-SST | ts, tsStderr (Amon) | 2 | NCL | +| ESACCI-SST | tos (Omon, Oday), tosStderr (Oday) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-WATERVAPOUR | prw (Amon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-SST.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-SST.yml index 49d4731681..61e54c0a2b 100644 --- a/esmvaltool/cmorizers/data/cmor_config/ESACCI-SST.yml +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-SST.yml @@ -1,29 +1,29 @@ --- # Common global attributes for Cmorizer output -filename: '{year}{month}15_regridded_sst.nc' + attributes: dataset_id: ESACCI-SST - version: '2.2' - tier: 2 + version: 3.0-L4-analysis + tier: 3 modeling_realm: sat - project_id: OBS - source: 'http://surftemp.net/regridding/index.html' - reference: ["esacci-sst", "esacci-sst-bias-correction"] - comment: "Note that the variable tsStderr is an uncertainty not a standard error." + project_id: OBS6 + source: 'dx.doi.org/10.5285/4a9654136a7148e39b7feb56f8bb02d2' + reference: ["esacci-sst"] -# Variables to cmorize (here use only filename prefix) +# Variables to cmorize (here use only filename ending) variables: - ts: - mip: Amon - raw: sst - file: ESACCI-SST_sat_L4-GHRSST-SSTdepth-OSTIA-GLOB - tsStderr: - mip: Amon - raw: sst_uncertainty - file: ESACCI-SST_sat_L4-GHRSST-SSTdepth-OSTIA-GLOB + tos: + mip: [Oday, Omon] + raw: analysed_sst + frequency: day + filename: ESACCI-L4_GHRSST-SSTdepth-OSTIA-GLOB_CDR3.0-v02.0-fv01.0.nc + start_year: 1980 + end_year: 2021 -# uncomment this part to produce sst cmorized data for ocean realm (Omon, tos) -# tos: -# mip: Omon -# raw: sst -# file: ESACCI-SST_sat_L4-GHRSST-SSTdepth-OSTIA-GLOB + tosStderr: + mip: [Oday] + raw: analysed_sst_uncertainty + frequency: day + filename: ESACCI-L4_GHRSST-SSTdepth-OSTIA-GLOB_CDR3.0-v02.0-fv01.0.nc + start_year: 1980 + end_year: 2021 diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index dabe314025..412f851ad9 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -549,12 +549,12 @@ datasets: Put all files under a single directory (no subdirectories with years). ESACCI-SST: - tier: 2 - source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sst/data/ - last_access: 2019-02-01 + tier: 3 + source: ftp3.ceda.ac.uk/neodc/eocis/data/global_and_regional/sea_surface_temperature/ + last_access: 2024-07-01 info: | Download the data from: - lt/Analysis/L4/v01.1/ + CDR_v3/Analysis/L4/v3.0.1/ Put all files under a single directory (no subdirectories with years). ESACCI-WATERVAPOUR: diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_sst.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_sst.py new file mode 100644 index 0000000000..9143287d87 --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_sst.py @@ -0,0 +1,74 @@ +"""Script to download ESACCI-SST.""" +# Import required python modules +import logging +import os + +from datetime import datetime + +from dateutil import relativedelta + +from esmvaltool.cmorizers.data.downloaders.ftp import FTPDownloader + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, start_date, end_date, + overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + if start_date is None: + start_date = datetime(1980, 1, 1) + if end_date is None: + end_date = datetime(2021, 12, 31) + + loop_date = start_date + + user = os.environ.get("ceda-user") + if user is None: + user = str(input("CEDA user name? ")) + if user == "": + errmsg = ("A CEDA account is required to download CCI SST data." + " Please visit https://services.ceda.ac.uk/cedasite/" + "register/info/ to create an account at CEDA if needed.") + logger.error(errmsg) + raise ValueError + + passwd = os.environ.get("ceda-passwd") + if passwd is None: + passwd = str(input("CEDA-password? ")) + + downloader = FTPDownloader( + config=config, + server='ftp3.ceda.ac.uk', + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + user=user, + passwd=passwd, + ) + + downloader.connect() + downloader.set_cwd('neodc/eocis/data/global_and_regional/' + 'sea_surface_temperature/CDR_v3/Analysis/L4/v3.0.1/') + + while loop_date <= end_date: + year = loop_date.year + month = loop_date.strftime("%m") + day = loop_date.strftime("%d") + downloader.download_folder(f'./{year}/{month}/{day}/') + loop_date += relativedelta.relativedelta(days=1) diff --git a/esmvaltool/cmorizers/data/downloaders/ftp.py b/esmvaltool/cmorizers/data/downloaders/ftp.py index 9f0cd5e8f9..7f9d4e6492 100644 --- a/esmvaltool/cmorizers/data/downloaders/ftp.py +++ b/esmvaltool/cmorizers/data/downloaders/ftp.py @@ -35,16 +35,22 @@ class FTPDownloader(BaseDownloader): overwrite : bool Overwrite already downloaded files """ - def __init__(self, config, server, dataset, dataset_info, overwrite): + def __init__(self, config, server, dataset, dataset_info, overwrite, + user=None, passwd=None): super().__init__(config, dataset, dataset_info, overwrite) self._client = None self.server = server + self.user = user + self.passwd = passwd def connect(self): """Connect to the FTP server.""" self._client = ftplib.FTP(self.server) logger.info(self._client.getwelcome()) - self._client.login() + if self.user is None: + self._client.login() + else: + self._client.login(user=self.user, passwd=self.passwd) def set_cwd(self, path): """Set current working directory in the remote. diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_sst.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_sst.py index 8e55296f9e..16f4679991 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_sst.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_sst.py @@ -1,45 +1,35 @@ """ESMValTool CMORizer for ESACCI-SST data. Tier - Tier 2: other freely-available dataset. + Tier 3: need to register at CEDA Source - http://surftemp.net/regridding/index.html + https://catalogue.ceda.ac.uk/uuid/4a9654136a7148e39b7feb56f8bb02d2 Last access - 20201214 + 20240628 Download and processing instructions - Download the following files: - Go to http://surftemp.net/regridding/index.html - and request regridded data with the following options: - Time Resolution: monthly - Longitude Resolution: 0.5 - Latitude Resolution: 0.5 - Start Date: 1982-01-01 - End Date: 2019-12-31 - Exclude data above sea ice threshold: True - (Threshold: 100 %) - Include post-hoc SST bias adjustments: True - Output Absolute or Anomaly SST: absolute - Generate Sea Ice Fraction: True - Error Correlation in Time (Days): 7 - Error Correlation In Space (Degrees): 3.0 - -Modification history - 20201204-roberts_charles: written. - 20201214-predoi_valeriu: approved. - 20201214-lauer_axel: approved. + A donwnloader is provided by ESMValTool. First you need + to register. + Go to https://services.ceda.ac.uk/cedasite/register/info/ + and create an account at CEDA if needed. + """ +import copy +import glob import logging import os import iris +from datetime import datetime +from esmvalcore.cmor.fixes import get_time_bounds +from esmvalcore.preprocessor import regrid +from esmvaltool.cmorizers.data import utilities as utils from esmvalcore.preprocessor import concatenate from ...utilities import ( - convert_timeunits, fix_coords, fix_var_metadata, save_variable, @@ -49,49 +39,126 @@ logger = logging.getLogger(__name__) -def extract_variable(var_info, raw_info, attrs, year): +def extract_variable(raw_info): """Extract to all vars.""" rawvar = raw_info['name'] constraint = iris.NameConstraint(var_name=rawvar) - try: - cube = iris.load_cube(raw_info['file'], constraint) - except iris.exceptions.ConstraintMismatchError as constraint_error: - raise ValueError(f"No data available for variable {rawvar}" - f"and year {year}") from constraint_error - - # Fix cube - fix_var_metadata(cube, var_info) - convert_timeunits(cube, year) + if rawvar == 'analysed_sst_uncertainty': + tmp_cube = iris.load_cube(raw_info['file'], + iris.NameConstraint(var_name='analysed_sst')) + ancillary_var = tmp_cube.ancillary_variable('sea_water_temperature' + ' standard_error') + cube = tmp_cube.copy(ancillary_var.core_data()) + else: + try: + cube = iris.load_cube(raw_info['file'], constraint) + except iris.exceptions.ConstraintMismatchError as constraint_error: + raise ValueError(f"No data available for variable {rawvar} in file" + f" {raw_info['file']}") from constraint_error + + # Remove ancillary data + for ancillary_variable in cube.ancillary_variables(): + cube.remove_ancillary_variable(ancillary_variable) + return cube + + +def get_monthly_cube(cfg, var, vals, raw_info, attrs, + inpfile_pattern, year, month): + data_cubes = [] + month_inpfile_pattern = inpfile_pattern.format( + year=str(year)+"{:02}".format(month)) + logger.info("Pattern: %s", month_inpfile_pattern) + inpfiles = sorted(glob.glob(month_inpfile_pattern)) + if inpfiles == []: + logger.error("Could not find any files with this" + " pattern %s", month_inpfile_pattern) + raise ValueError + logger.info("Found input files: %s", inpfiles) + + for inpfile in inpfiles: + raw_info['file'] = inpfile + logger.info("CMORizing var %s from file type %s", var, + raw_info['file']) + data_cubes.append(extract_variable(raw_info)) + + cube = concatenate(data_cubes) + + # regridding from 0.05x0.05 to 0.5x0.5 (not for uncertainty field + if 'Stderr' not in var: + cube = regrid(cube, target_grid='0.5x0.5', scheme='area_weighted') + + # Fix dtype + utils.fix_dtype(cube) + # Fix units + cmor_info = cfg['cmor_table'].get_variable(vals['mip'][0], var) + cube.convert_units(cmor_info.units) + # Fix metadata + fix_var_metadata(cube, cmor_info) + # Fix coordinates fix_coords(cube) + cube.coord('time').long_name = 'time' + cube.coord('latitude').long_name = 'latitude' + cube.coord('longitude').long_name = 'longitude' + # Fix monthly time bounds + time = cube.coord('time') + time.bounds = get_time_bounds(time, vals['frequency']) + + # set global attributes set_global_atts(cube, attrs) + # add comment to tosStderr + if var == 'tosStderr': + cube.attributes['comment'] = ('Note that the variable tsStderr is an ' + 'uncertainty not a standard error.') + return cube def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" - cmor_table = cfg['cmor_table'] - glob_attrs = cfg['attributes'] + glob_attrs = copy.deepcopy(cfg['attributes']) # run the cmorization for var, vals in cfg['variables'].items(): - var_info = cmor_table.get_variable(vals['mip'], var) - glob_attrs['mip'] = vals['mip'] - raw_info = {'name': vals['raw'], 'file': vals['file']} - inpfile = os.path.join(in_dir, cfg['filename']) - logger.info("CMORizing var %s from file type %s", var, inpfile) - years = range(1982, 2020) - months = ["0" + str(mo) for mo in range(1, 10)] + ["10", "11", "12"] - for year in years: - monthly_cubes = [] - for month in months: - raw_info['file'] = inpfile.format(year=year, month=month) - logger.info("CMORizing var %s from file type %s", var, - raw_info['file']) - cube = extract_variable(var_info, raw_info, glob_attrs, year) - monthly_cubes.append(cube) - yearly_cube = concatenate(monthly_cubes) - save_variable(yearly_cube, - var, - out_dir, - glob_attrs, - unlimited_dimensions=['time']) + if not start_date: + start_date = datetime(vals['start_year'], 1, 1) + if not end_date: + end_date = datetime(vals['end_year'], 12, 31) + raw_info = {'name': vals['raw']} + inpfile_pattern = os.path.join(in_dir, '{year}*' + vals['filename']) + logger.info("CMORizing var %s from file type %s", var, inpfile_pattern) + mon_cubes = [] + for year in range(start_date.year, end_date.year + 1): + logger.info("Processing year %s", year) + glob_attrs['mip'] = vals['mip'][0] + for month in range(start_date.month, end_date.month + 1): + monthly_cube = get_monthly_cube(cfg, var, vals, raw_info, + glob_attrs, inpfile_pattern, + year, month) + # Save daily data + save_variable(monthly_cube, + var, + out_dir, + glob_attrs, + unlimited_dimensions=['time']) + # Calculate monthly mean + if 'Stderr' not in var: + logger.info("Calculating monthly mean") + iris.coord_categorisation.add_month_number(monthly_cube, + 'time') + iris.coord_categorisation.add_year(monthly_cube, 'time') + monthly_cube = monthly_cube.aggregated_by( + ['month_number', 'year'], + iris.analysis.MEAN) + monthly_cube.remove_coord('month_number') + monthly_cube.remove_coord('year') + mon_cubes.append(monthly_cube) + # Save monthly data + if 'Stderr' not in var: + yearly_cube = concatenate(mon_cubes) + glob_attrs['mip'] = vals['mip'][1] + save_variable(yearly_cube, + var, + out_dir, + glob_attrs, + unlimited_dimensions=['time']) + mon_cubes.clear() diff --git a/esmvaltool/cmorizers/data/utilities.py b/esmvaltool/cmorizers/data/utilities.py index 82da07c12e..853ebd8526 100644 --- a/esmvaltool/cmorizers/data/utilities.py +++ b/esmvaltool/cmorizers/data/utilities.py @@ -425,7 +425,7 @@ def set_global_atts(cube, attrs): # Additional attributes glob_dict.update(attrs) - cube.attributes = glob_dict + cube.attributes.globals = glob_dict def fix_bounds(cube, dim_coord): diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index b3cca9e028..6f3116a955 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -317,12 +317,14 @@ diagnostics: ESACCI-SST: description: ESACCI-SST check variables: - ts: - tsStderr: + tos: + additional_datasets: + - {dataset: ESACCI-SST, project: OBS6, mip: Omon, tier: 3, + type: sat, version: 3.0-L4-analysis, timerange: '198001/202112'} + tosStderr: additional_datasets: - - {dataset: ESACCI-SST, project: OBS, mip: Amon, tier: 2, - type: sat, version: 2.2, - start_year: 1982, end_year: 2019} + - {dataset: ESACCI-SST, project: OBS6, mip: Oday, tier: 3, + type: sat, version: 3.0-L4-analysis, timerange: '202001/202112'} scripts: null diff --git a/esmvaltool/references/esacci-sst.bibtex b/esmvaltool/references/esacci-sst.bibtex index 30eafc7756..278e41966e 100644 --- a/esmvaltool/references/esacci-sst.bibtex +++ b/esmvaltool/references/esacci-sst.bibtex @@ -1,13 +1,12 @@ @article{esacci-sst, - doi = {10.1038/s41597-019-0236-x}, - url = {https://doi.org/10.1038/s41597-019-0236-x}, - year = 2019, - month = {oct}, - publisher = {Springer Nature}, - volume = {6}, - number = {1}, - pages = {223}, - author = {Christopher J. Merchant and Owen Embury and Claire E. Bulgin and Thomas Block and Gary K. Corlett and Emma Fiedler and Simon A. Good and Jonathan Mittaz and Nick A. Rayner and David Berry and Steinar Eastwood and Michael Taylor and Yoko Tsushima and Alison Waterfall and Ruth Wilson and Craig Donlon}, - title = {Satellite-based time-series of sea-surface temperature since 1981 for climate applications ({SST} {CCI})}, - journal = {Scientific Data} + doi = {10.1038/s41597-024-03147-w}, + url = {https://doi.org/10.1038/s41597-024-03147-w}, + year = 2024, + journal = {Sci Data}, + volume = {11}, + number = {1}, + pages = {326}, + publisher = {{Nature Publishing Group}}, + author = {Embury, O. and Merchant, C.J. and Good, S.A. and Rayner, N.A. and Høyer, J.L. and Atkinson, C. and Block, T. and Alerskans, E. and Pearson, K.J. and Worsfold, M. and McCarroll, N. and Donlon, C}, + title = {Satellite-based time-series of sea-surface temperature since 1980 for climate applications}, }