Skip to content

Commit

Permalink
ENH: Manage Sentinel-2 as formatted on the cloud (Element84 or Sinerg…
Browse files Browse the repository at this point in the history
…ise's way). #104
  • Loading branch information
remi-braun committed Nov 13, 2023
1 parent 867f1d1 commit a2cc0e1
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 31 deletions.
2 changes: 1 addition & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
- **BREAKING CHANGES: Rename `utils.stack_dict` to `utils.stack` since we are stacking datasets and not dict anymore.**
- **BREAKING CHANGES: Band ID for Sentinel-3 OLCI are now int instead of band names (i.e. `7` instead of `Oa07`. The names don't change).**
- **ENH: Allow to use bands IDs, names and common name added to mapped names when trying to load a spectral band. ([#111](https://github.com/sertit/eoreader/issues/111)**
- **ENH: Manage Sentinel-2 (currently L2A) as formatted on the cloud (Element84's way). ([#104](https://github.com/sertit/eoreader/issues/104)**
- **ENH: Manage Sentinel-2 as formatted on the cloud (Element84 or Sinergise's way). ([#104](https://github.com/sertit/eoreader/issues/104)**
- **ENH: Handle Python 3.12. ([#113](https://github.com/sertit/eoreader/issues/113)**
- FIX: Fix jpg, png... quicklooks management when plotting
- FIX: Fix an `xarray` issue when trying to compute percentiles when stacking bands
Expand Down
Binary file modified docs/_build/.jupyter_cache/global.db
Binary file not shown.
54 changes: 48 additions & 6 deletions docs/notebooks/aws.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@
"\n",
"<div class=\"alert alert-info\">\n",
" \n",
" <strong>Note:</strong> This is experimental for now, use it at your own risk !\n",
" <strong>Note:</strong> These products are not stored in the `.SAFE` format.\n",
" \n",
"</div>"
"</div>\n",
"\n",
"## Let's read data processed by Element84: Sentinel-2 L2A as COGs\n",
"\n",
"See this [registry](https://registry.opendata.aws/sentinel-2-l2a-cogs) (`arn:aws:s3:::sentinel-cogs`)"
]
},
{
Expand Down Expand Up @@ -41,10 +45,7 @@
"id": "36d9150318c0e2fe",
"metadata": {
"collapsed": false,
"is_executing": true,
"jupyter": {
"outputs_hidden": false
}
"is_executing": true
},
"outputs": [
{
Expand Down Expand Up @@ -91,6 +92,47 @@
"source": [
"blue[:, ::10, ::10].plot(cmap=\"Blues_r\")"
]
},
{
"cell_type": "markdown",
"source": [
"## Let's read data processed by Sinergise: Sentinel-2 L1C\n",
"\n",
"See this [registry](https://registry.opendata.aws/sentinel-2/) (`arn:aws:s3:::sentinel-s2-l1c`)\n",
"\n",
"NB: L2A would have been the same (`arn:aws:s3:::sentinel-s2-l2a`)\n",
"\n",
"<div class=\"alert alert-info\">\n",
" \n",
" <strong>Note:</strong> Sinergise data are stored as requester pays in AWS. Don't forget to state this when requesting data!\n",
" \n",
"</div>"
],
"metadata": {
"collapsed": false
},
"id": "1d8a1cb4fde8949c"
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"with tempenv.TemporaryEnvironment({\n",
" \"AWS_S3_ENDPOINT\": \"s3.eu-central-1.amazonaws.com\",\n",
" \"AWS_SECRET_ACCESS_KEY\": os.getenv(\"AMAZON_AWS_SECRET_ACCESS_KEY\"),\n",
" \"AWS_ACCESS_KEY_ID\": os.getenv(\"AMAZON_AWS_ACCESS_KEY_ID\"),\n",
"}):\n",
" with s3.temp_s3(requester_pays=True):\n",
" path = r\"s3://sentinel-s2-l1c/tiles/10/S/DG/2022/7/8/0\"\n",
" prod = Reader().open(path)\n",
" prod.plot()\n",
" blue = prod.load(BLUE)[BLUE]"
],
"metadata": {
"collapsed": false
},
"id": "514d1d1c09b37c14"
}
],
"metadata": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@
LOGGER = logging.getLogger(EOREADER_NAME)


class S2CloudProduct(OpticalProduct):
class S2E84Product(OpticalProduct):
"""
Class for Sentinel-2 cloud products
Class for Sentinel-2 stored on AWS and processed by Element 84 (COGs) products
https://element84.com/geospatial/introducing-earth-search-v1-new-datasets-now-available/
Expand Down Expand Up @@ -608,7 +608,9 @@ def _get_condensed_name(self) -> str:
# Used to make the difference between 2 products acquired on the same tile at the same date but cut differently
# Sentinel-2 generation time: "%Y%m%dT%H%M%S" -> save only %H%M%S
gen_time = self.split_name[-1].split("T")[-1]
return f"{self.get_datetime()}_{self.constellation.name}_{self.tile_name}_{self.product_type.name}_{gen_time}"

# Force S2 as constellation name for S2_E84 to work
return f"{self.get_datetime()}_S2_{self.tile_name}_{self.product_type.name}_{gen_time}"

@cache
def get_mean_sun_angles(self) -> (float, float):
Expand Down
64 changes: 51 additions & 13 deletions eoreader/products/optical/s2_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class S2Jp2Masks(ListEnum):


BAND_DIR_NAMES = {
S2ProductType.L1C: "IMG_DATA",
S2ProductType.L1C: ".",
S2ProductType.L2A: {
"01": ["R60m"],
"02": ["R10m", "R20m", "R60m"],
Expand Down Expand Up @@ -164,6 +164,9 @@ def __init__(
# L2Ap
self._is_l2ap = False

# S2 Sinergise
self._is_sinergise = kwargs.pop("is_sinergise", False)

# Initialization from the super class
super().__init__(product_path, archive_path, output_path, remove_tmp, **kwargs)

Expand All @@ -183,7 +186,9 @@ def _pre_init(self, **kwargs) -> None:
"""
self._has_cloud_cover = True
self.needs_extraction = False
self._use_filename = True
# Use filename for SAFE names, not for others
# S2A_MSIL1C_20191215T110441_N0208_R094_T30TXP_20191215T114155.SAFE has 65 characters
self._use_filename = len(self.filename) > 50
self._raw_units = RawUnits.REFL

# Post init done by the super class
Expand Down Expand Up @@ -486,6 +491,28 @@ def _get_name_constellation_specific(self) -> str:

return name

def _get_qi_folder(self):
""""""
if self._is_sinergise:
mask_folder = "qi"
elif self.is_archived:
mask_folder = ".*GRANULE.*QI_DATA"
else:
mask_folder = "**/*GRANULE/*/QI_DATA"

return mask_folder

def _get_image_folder(self):
""""""
if self._is_sinergise:
img_folder = "."
elif self.is_archived:
img_folder = ".*GRANULE.*IMG_DATA"
else:
img_folder = "**/*GRANULE/*/IMG_DATA"

return img_folder

def _get_res_band_folder(self, band_list: list, pixel_size: float = None) -> dict:
"""
Return the folder containing the bands of a proper S2 products.
Expand Down Expand Up @@ -544,7 +571,12 @@ def _get_res_band_folder(self, band_list: list, pixel_size: float = None) -> dic
s2_bands_folder[band] = band_path
else:
# Search for the name of the folder into the S2 products
s2_bands_folder[band] = next(self.path.glob(f"**/*/{dir_name}"))
try:
s2_bands_folder[band] = next(
self.path.glob(f"{self._get_image_folder()}/{dir_name}")
)
except IndexError:
s2_bands_folder[band] = self.path

for band in band_list:
if band not in s2_bands_folder:
Expand Down Expand Up @@ -595,12 +627,12 @@ def get_band_paths(
if self.is_archived:
band_paths[band] = path.get_archived_rio_path(
self.path,
f".*{band_folders[band]}.*_B{band_id}.*.jp2",
f".*{band_folders[band]}.*B{band_id}.*.jp2",
)
else:
band_paths[band] = path.get_file_in_dir(
band_folders[band],
f"_B{band_id}",
f"B{band_id}",
extension="jp2",
)
except (FileNotFoundError, IndexError) as ex:
Expand Down Expand Up @@ -778,7 +810,7 @@ def _open_mask_lt_4_0(
self, mask_id: Union[str, S2GmlMasks], band: Union[BandNames, str] = None
) -> gpd.GeoDataFrame:
"""
Open S2 mask (GML files stored in QI_DATA) as :code:`gpd.GeoDataFrame`.
Open S2 mask (GML files stored in QI_DATA/qi) as :code:`gpd.GeoDataFrame`.
Masks than can be called that way are:
Expand Down Expand Up @@ -839,7 +871,7 @@ def _open_mask_lt_4_0(
with zipfile.ZipFile(self.path, "r") as zip_ds:
filenames = [f.filename for f in zip_ds.filelist]
regex = re.compile(
f".*GRANULE.*QI_DATA.*{mask_id.value}_B{band_name}.gml"
f"{self._get_qi_folder()}.*{mask_id.value}_B{band_name}.gml"
)
mask_path = zip_ds.extract(
list(filter(regex.match, filenames))[0], tmp_dir.name
Expand All @@ -848,7 +880,7 @@ def _open_mask_lt_4_0(
# Get mask path
mask_path = path.get_file_in_dir(
self.path,
f"**/*GRANULE/*/QI_DATA/*{mask_id.value}_B{band_name}.gml",
f"{self._get_qi_folder()}/*{mask_id.value}_B{band_name}.gml",
exact_name=True,
)

Expand Down Expand Up @@ -902,13 +934,13 @@ def _open_mask_gt_4_0(

if self.is_archived:
mask_path = path.get_archived_rio_path(
self.path, f".*GRANULE.*QI_DATA.*{mask_id.value}_B{band_id}.jp2"
self.path, f"{self._get_qi_folder()}.*{mask_id.value}_B{band_id}.jp2"
)
else:
# Get mask path
mask_path = path.get_file_in_dir(
self.path,
f"**/*GRANULE/*/QI_DATA/*{mask_id.value}_B{band_id}.jp2",
f"{self._get_qi_folder()}/*{mask_id.value}_B{band_id}.jp2",
exact_name=True,
)

Expand Down Expand Up @@ -1200,7 +1232,9 @@ def _get_condensed_name(self) -> str:
# Used to make the difference between 2 products acquired on the same tile at the same date but cut differently
# Sentinel-2 generation time: "%Y%m%dT%H%M%S" -> save only %H%M%S
gen_time = self.split_name[-1].split("T")[-1]
return f"{self.get_datetime()}_{self.constellation.name}_{self.tile_name}_{self.product_type.name}_{gen_time}"

# Force S2 as constellation name for S2_SIN to work
return f"{self.get_datetime()}_S2_{self.tile_name}_{self.product_type.name}_{gen_time}"

@cache
def get_mean_sun_angles(self) -> (float, float):
Expand Down Expand Up @@ -1254,8 +1288,12 @@ def _read_mtd(self) -> (etree._Element, dict):
Returns:
(etree._Element, dict): Metadata XML root and its namespaces
"""
mtd_from_path = "GRANULE/*/MTD*.xml"
mtd_archived = r"GRANULE.*MTD.*\.xml"
if self._is_sinergise:
mtd_from_path = "metadata.xml"
mtd_archived = r"metadata\.xml"
else:
mtd_from_path = "GRANULE/*/MTD*.xml"
mtd_archived = r"GRANULE.*MTD.*\.xml"

return self._read_mtd_xml(mtd_from_path, mtd_archived)

Expand Down
28 changes: 22 additions & 6 deletions eoreader/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,16 @@ class Constellation(ListEnum):
S2 = "Sentinel-2"
"""Sentinel-2"""

S2_CLOUD = "Sentinel-2 stored on cloud"
S2_E84 = "Sentinel-2 stored on AWS and processed by Element84"
"""
Sentinel-2 stored on AWS and processed by Element84:
- Element84: arn:aws:s3:::sentinel-cogs - https://registry.opendata.aws/sentinel-2-l2a-cogs
"""
Sentinel-2 stored on cloud

For now, obly the one created by Element84 are supported: https://stacindex.org/catalogs/earth-search#/43bjKKcJQfxYaT1ir3Ep6uENfjEoQrjkzhd2?t=3
S2_SIN = "Sentinel-2 stored on AWS and processed by Sinergise"
"""
Sentinel-2 stored on AWS and processed by Sinergise:
arn:aws:s3:::sentinel-s2-l1c and arn:aws:s3:::sentinel-s2-l2a - https://registry.opendata.aws/sentinel-2/
"""

S2_THEIA = "Sentinel-2 Theia"
Expand Down Expand Up @@ -204,8 +209,9 @@ class Constellation(ListEnum):
CONSTELLATION_REGEX = {
Constellation.S1: r"S1[AB]_(IW|EW|SM|WV)_(RAW|SLC|GRD|OCN)[FHM_]_[0-2]S[SD][HV]_\d{8}T\d{6}_\d{8}T\d{6}_\d{6}_.{11}",
Constellation.S2: r"S2[AB]_MSIL(1C|2A)_\d{8}T\d{6}_N\d{4}_R\d{3}_T\d{2}\w{3}_\d{8}T\d{6}",
# Element84 : S2A_31UDQ_20230714_0_L2A
Constellation.S2_CLOUD: r"S2[AB]_\d{2}\w{3}_\d{8}_\d_L(1C|2A)",
# Element84 : S2A_31UDQ_20230714_0_L2A, Sinergise: 0 or 1...
Constellation.S2_E84: r"S2[AB]_\d{2}\w{3}_\d{8}_\d_L(1C|2A)",
Constellation.S2_SIN: r"\d",
Constellation.S2_THEIA: r"SENTINEL2[AB]_\d{8}-\d{6}-\d{3}_L(2A|1C)_T\d{2}\w{3}_[CDH](_V\d-\d|)",
Constellation.S3_OLCI: r"S3[AB]_OL_[012]_\w{6}_\d{8}T\d{6}_\d{8}T\d{6}_\d{8}T\d{6}_\w{17}_\w{3}_[OFDR]_(NR|ST|NT)_\d{3}",
Constellation.S3_SLSTR: r"S3[AB]_SL_[012]_\w{6}_\d{8}T\d{6}_\d{8}T\d{6}_\d{8}T\d{6}_\w{17}_\w{3}_[OFDR]_(NR|ST|NT)_\d{3}",
Expand Down Expand Up @@ -269,7 +275,14 @@ class Constellation(ListEnum):
"regex": r".*s1[ab]-(iw|ew|sm|wv)\d*-(raw|slc|grd|ocn)-[hv]{2}-\d{8}t\d{6}-\d{8}t\d{6}-\d{6}-\w{6}-\d{3}\.xml",
},
Constellation.S2: {"nested": 3, "regex": r"MTD_TL.xml"},
Constellation.S2_CLOUD: rf"{CONSTELLATION_REGEX[Constellation.S2_CLOUD]}\.json",
Constellation.S2_E84: rf"{CONSTELLATION_REGEX[Constellation.S2_E84]}\.json",
Constellation.S2_SIN: {
"nested": -1, # File that can be found at any level (product/**/file)
"regex": [
r"metadata\.xml", # Too generic name, check also a band
r"B12\.jp2",
],
},
Constellation.S2_THEIA: rf"{CONSTELLATION_REGEX[Constellation.S2_THEIA]}_MTD_ALL\.xml",
Constellation.S3_OLCI: r"Oa\d{2}_radiance.nc",
Constellation.S3_SLSTR: r"S\d_radiance_an.nc",
Expand Down Expand Up @@ -536,6 +549,9 @@ def open(
# SPOT-4/5 constellations
elif const in [Constellation.SPOT4, Constellation.SPOT5]:
sat_class = "spot45_product"
elif const in [Constellation.S2_SIN]:
sat_class = "s2_product"
kwargs["is_sinergise"] = True

# Manage both optical and SAR
try:
Expand Down
2 changes: 1 addition & 1 deletion eoreader/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from eoreader.keywords import _prune_keywords

LOGGER = logging.getLogger(EOREADER_NAME)
DEFAULT_TILE_SIZE = "auto"
DEFAULT_TILE_SIZE = 1024
UINT16_NODATA = rasters.UINT16_NODATA


Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ spyndex>=0.3.0
pystac[validation]

# SERTIT libs
sertit[full]>=1.31.0
sertit[full]>=1.32.0

# Optimizations
dask[complete]>=2021.10.0
Expand Down

0 comments on commit a2cc0e1

Please sign in to comment.