Skip to content

Commit

Permalink
Merge pull request #28 from FrontierDevelopmentLab/bugfix/boundless-w…
Browse files Browse the repository at this point in the history
…indow

Remove download for jp2. Now using same function for geotiff and jp2 with rio. Remove gdal
  • Loading branch information
frandorr committed Dec 7, 2021
2 parents 87f82aa + a37b285 commit 91bb3c7
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 112 deletions.
42 changes: 0 additions & 42 deletions providers/gcp/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import base64
import datetime
import io
import json
import logging
import os
Expand All @@ -13,7 +12,6 @@
import pystac
from flask import Flask
from flask import request
from google.cloud import storage
from loguru import logger
from satextractor.extractor import task_mosaic_patches
from satextractor.models import BAND_INFO
Expand All @@ -35,45 +33,6 @@
app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))


def get_bucket_name(url: str) -> str:
"""Get the bucket for an url like:
'gs://gcp-public-data-sentinel-2/
Args:
url (str): The gs url
Returns:
str: the bucket name
"""

return url.split("/")[2]


def get_blob_name(url: str) -> str:
"""Get the blob for an url like:
'gs://gcp-public-data-sentinel-2/tiles/17/Q/QV/S2B_MSIL1C.jp2'
Args:
url (str): The gs url
Returns:
str: the blob name
"""
return "/".join(url.split("/")[3:])


def download_blob(url: str) -> io.BytesIO:
"""Download a blob as bytes
Args:
url (str): the url to download
Returns:
io.BytesIO: the content as bytes
"""
storage_client = storage.Client()
bucket_name = get_bucket_name(url)
source_blob_name = get_blob_name(url)
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(source_blob_name)
f = io.BytesIO(blob.download_as_bytes())
return f


def format_stacktrace():
parts = ["Traceback (most recent call last):\n"]
parts.extend(traceback.format_stack(limit=25)[:-2])
Expand Down Expand Up @@ -155,7 +114,6 @@ def extract_patches():

patches = task_mosaic_patches(
cloud_fs=fs,
download_f=download_blob,
task=task,
method="max",
resolution=archive_resolution,
Expand Down
73 changes: 3 additions & 70 deletions src/satextractor/extractor/extractor.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import os
from typing import Any
from typing import Callable
from typing import List
from typing import Tuple

import numpy as np
import rasterio
from affine import Affine
from loguru import logger
from osgeo import gdal
from osgeo import osr
from rasterio import warp
from rasterio.crs import CRS
from rasterio.enums import Resampling
Expand Down Expand Up @@ -85,7 +82,7 @@ def get_tile_pixel_coords(tiles: List[Tile], raster_file: str) -> List[Tuple[int
return list(zip(rows, cols))


def download_and_extract_tiles_window_COG(
def download_and_extract_tiles_window(
fs: Any,
task: ExtractionTask,
resolution: int,
Expand Down Expand Up @@ -148,69 +145,8 @@ def download_and_extract_tiles_window_COG(
return outfiles


def download_and_extract_tiles_window(
download_f: Callable,
task: ExtractionTask,
resolution: int,
) -> List[str]:
"""Download and extract from the task assets the window bounding the tiles.
i.e a crop of the original assets will
Args:
download_f (Callable): The download function to use. It should return a BytesIO
to read the content.
task (ExtractionTask): The extraction task
resolution (int): The target resolution
Returns:
List[str]: A list of files that store the crops of the original assets
"""
band = task.band
urls = [item.assets[band].href for item in task.item_collection.items]

epsg = task.tiles[0].epsg
out_files = []
for i, url in enumerate(urls):
content = download_f(url)

gdal.FileFromMemBuffer(f"/vsimem/{task.task_id}_content", content.read())
d = gdal.Open(f"/vsimem/{task.task_id}_content", gdal.GA_Update)

proj = osr.SpatialReference(wkt=d.GetProjection())
proj = proj.GetAttrValue("AUTHORITY", 1)
d = None

proj_win = get_proj_win(task.tiles)

if int(proj) != epsg:
file = gdal.Warp(
f"{task.task_id}_warp.vrt",
f"/vsimem/{task.task_id}_content",
dstSRS=f"EPSG:{epsg}",
creationOptions=["QUALITY=100", "REVERSIBLE=YES"],
)
else:
file = f"/vsimem/{task.task_id}_content"

out_f = f"{task.task_id}_{i}.jp2"
gdal.Translate(
out_f,
file,
projWin=proj_win,
projWinSRS=f"EPSG:{epsg}",
xRes=resolution,
yRes=-resolution,
resampleAlg="bilinear",
creationOptions=["QUALITY=100", "REVERSIBLE=YES"],
)
file = None
out_files.append(out_f)
return out_files


def task_mosaic_patches(
cloud_fs: Any,
download_f: Callable,
task: ExtractionTask,
method: str = "max",
resolution: int = 10,
Expand All @@ -219,7 +155,7 @@ def task_mosaic_patches(
"""Get tile patches from the mosaic of a given task
Args:
download_f (Callable): The function to download the task assets
cloud_fs (Any): the cloud_fs to access the files
task (ExtractionTask): The task
method (str, optional): The method to use while merging the assets. Defaults to "max".
resolution (int, optional): The target resolution. Defaults to 10.
Expand All @@ -229,10 +165,7 @@ def task_mosaic_patches(
List[np.ndarray]: The tile patches as numpy arrays
"""

if task.constellation == "sentinel-2":
out_files = download_and_extract_tiles_window(download_f, task, resolution)
else:
out_files = download_and_extract_tiles_window_COG(cloud_fs, task, resolution)
out_files = download_and_extract_tiles_window(cloud_fs, task, resolution)

out_f = f"{task.task_id}_{dst_path}"
datasets = [rasterio.open(f) for f in out_files]
Expand Down

0 comments on commit 91bb3c7

Please sign in to comment.