diff --git a/.travis.yml b/.travis.yml index 2d7fe1b1..74d00c55 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,6 @@ dist: xenial language: python python: - "3.8" - - "nightly" addons: apt: sources: diff --git a/geoextent/__init__.py b/geoextent/__init__.py index 9e30d524..8a15ff13 100644 --- a/geoextent/__init__.py +++ b/geoextent/__init__.py @@ -1,3 +1,3 @@ name = "geoextent" -__version__ = '0.6.0' +__version__ = '0.6.1' diff --git a/geoextent/lib/extent.py b/geoextent/lib/extent.py index 0ae4ec77..07f3e0de 100644 --- a/geoextent/lib/extent.py +++ b/geoextent/lib/extent.py @@ -34,6 +34,18 @@ def compute_bbox_wgs84(module, path): raise Exception("The bounding box could not be transformed to the target CRS epsg:{} \n error {}" .format(hf.WGS84_EPSG_ID, e)) + validate = hf.validate_bbox_wgs84(spatial_extent['bbox']) + logger.debug("Validate: {}".format(validate)) + + if not hf.validate_bbox_wgs84(spatial_extent['bbox']): + try: + flip_bbox = hf.flip_bbox(spatial_extent['bbox']) + spatial_extent['bbox'] = flip_bbox + + except Exception as e: + raise Exception("The bounding box could not be transformed to the target CRS epsg:{} \n error {}" + .format(hf.WGS84_EPSG_ID, e)) + return spatial_extent diff --git a/geoextent/lib/handleCSV.py b/geoextent/lib/handleCSV.py index 38c83e0e..b3a1103b 100644 --- a/geoextent/lib/handleCSV.py +++ b/geoextent/lib/handleCSV.py @@ -3,7 +3,6 @@ from osgeo import gdal from . import helpfunctions as hf - logger = logging.getLogger("geoextent") search = {"longitude": ["(.)*longitude", "(.)*long(.)*", "^lon", "lon$", "(.)*lng(.)*", "^x", "x$"], @@ -30,8 +29,8 @@ def checkFileSupported(filepath): if driver == "CSV": with open(filepath) as csv_file: - daten = csv.reader(csv_file.readlines()) - if daten is None: + data = csv.reader(csv_file.readlines()) + if data is None: logger.debug("File {} is NOT supported by HandleCSV module".format(filepath)) return False else: @@ -40,6 +39,7 @@ def checkFileSupported(filepath): else: return False + def getBoundingBox(filePath): ''' Function purpose: extracts the spatial extent (bounding box) from a csv-file \n @@ -49,32 +49,32 @@ def getBoundingBox(filePath): with open(filePath) as csv_file: # To get delimiter either comma or simecolon - daten = hf.getDelimiter(csv_file) + date = hf.getDelimiter(csv_file) elements = [] - for x in daten: + for x in date: elements.append(x) - spatialLatExtent = hf.searchForParameters(elements, search['latitude'], exp_data='numeric') + spatial_lat_extent = hf.searchForParameters(elements, search['latitude'], exp_data='numeric') - minlat = None - maxlat = None + min_lat = None + max_lat = None - if spatialLatExtent is None: + if spatial_lat_extent is None: pass else: - minlat = (min(spatialLatExtent)) - maxlat = (max(spatialLatExtent)) + min_lat = (min(spatial_lat_extent)) + max_lat = (max(spatial_lat_extent)) - spatialLonExtent = hf.searchForParameters(elements, search['longitude'], exp_data='numeric') + spatial_lon_extent = hf.searchForParameters(elements, search['longitude'], exp_data='numeric') - if spatialLonExtent is None: + if spatial_lon_extent is None: raise Exception('The csv file from ' + filePath + ' has no BoundingBox') else: - minlon = (min(spatialLonExtent)) - maxlon = (max(spatialLonExtent)) + min_lon = (min(spatial_lon_extent)) + max_lon = (max(spatial_lon_extent)) - bbox = [minlon, minlat, maxlon, maxlat] + bbox = [min_lon, min_lat, max_lon, max_lat] logger.debug("Extracted Bounding box (without projection): {}".format(bbox)) crs = getCRS(filePath) logger.debug("Extracted CRS: {}".format(crs)) @@ -84,53 +84,59 @@ def getBoundingBox(filePath): return spatialExtent -def getTemporalExtent(filePath, num_sample): - ''' extract time extent from csv string \n + +def getTemporalExtent(filepath, num_sample): + """ extract time extent from csv string \n input "filePath": type string, file path to csv File \n returns temporal extent of the file: type list, length = 2, both entries have the type str, temporalExtent[0] <= temporalExtent[1] - ''' + """ - with open(filePath) as csv_file: - # To get delimiter either comma or simecolon - daten = hf.getDelimiter(csv_file) + with open(filepath) as csv_file: + # To get delimiter either comma or semicolon + data = hf.getDelimiter(csv_file) elements = [] - for x in daten: + for x in data: elements.append(x) all_temporal_extent = hf.searchForParameters(elements, search['time'], exp_data="time") if all_temporal_extent is None: - raise Exception('The csv file from ' + filePath + ' has no TemporalExtent') + raise Exception('The csv file from ' + filepath + ' has no TemporalExtent') else: tbox = [] - parsed_time = hf.date_parser(all_temporal_extent, num_sample = num_sample) + parsed_time = hf.date_parser(all_temporal_extent, num_sample=num_sample) if parsed_time is not None: # Min and max into ISO8601 format ('%Y-%m-%d') tbox.append(min(parsed_time).strftime('%Y-%m-%d')) tbox.append(max(parsed_time).strftime('%Y-%m-%d')) else: - raise Exception('The csv file from ' + filePath + ' has no recognizable TemporalExtent') + raise Exception('The csv file from ' + filepath + ' has no recognizable TemporalExtent') return tbox -def getCRS(filePath): + +def getCRS(filepath): '''extracts coordinatesystem from csv File \n input "filepath": type string, file path to csv file \n returns the epsg code of the used coordinate reference system, type list, contains extracted coordinate system of content from csv file ''' - with open(filePath) as csv_file: - daten = csv.reader(csv_file.readlines()) + with open(filepath) as csv_file: + data = hf.getDelimiter(csv_file) elements = [] - for x in daten: + for x in data: elements.append(x) - if hf.searchForParameters(elements, search['latitude'] + search['longitude']) is None: - if hf.searchForParameters(elements, ["crs","srsID"]) is None: - raise Exception('The csv file from ' + filePath + ' has no CRS') - if hf.searchForParameters(elements, ["crs","srsID"]) == "WGS84": - return "4326" - else: - raise Exception('The csv file from ' + filePath + ' has no WGS84 CRS') + param = hf.searchForParameters(elements, ["crs", "srsID", "EPSG"]) + + if param is None: + logger.debug("{} : There is no identifiable coordinate reference system. We will try to use EPSG: 4326".format(filepath)) + crs = "4326" + + elif len(list(set(param))) > 1: + logger.debug("{} : Coordinate reference system of the file is ambiguous. Extraction is not possible.".format(filepath)) + raise Exception('The csv file from ' + filepath + ' has no CRS') else: - return "4326" + crs = str(list(set(param))[0]) + + return crs \ No newline at end of file diff --git a/geoextent/lib/handleRaster.py b/geoextent/lib/handleRaster.py index 03d8a987..e139b6f6 100644 --- a/geoextent/lib/handleRaster.py +++ b/geoextent/lib/handleRaster.py @@ -33,17 +33,17 @@ def checkFileSupported(filepath): return False -def getBoundingBox(filePath): - ''' extracts bounding box from geotiff \n - input "filepath": type string, file path to geotiff file \n - returns bounding box of the file: type list, length = 4 , type = float, schema = [min(longs), min(lats), max(longs), max(lats)] - ''' +def getBoundingBox(filepath): + """ extracts bounding box from raster \n + input "filepath": type string, file path to raster file \n + returns bounding box of the file: type list, length = 4 , type = float, schema = [min(longs), min(lats), max(longs), max(lats)] + """ # Enable exceptions crs_output = hf.WGS84_EPSG_ID gdal.UseExceptions() - geotiffContent = gdal.Open(filePath) + geotiffContent = gdal.Open(filepath) # get the existing coordinate system old_crs = osr.SpatialReference() @@ -59,33 +59,38 @@ def getBoundingBox(filePath): height = geotiffContent.RasterYSize gt = geotiffContent.GetGeoTransform() - minx = gt[0] - miny = gt[3] + width * gt[4] + height * gt[5] - maxx = gt[0] + width * gt[1] + height * gt[2] - maxy = gt[3] + min_x = gt[0] + min_y = gt[3] + width * gt[4] + height * gt[5] + max_x = gt[0] + width * gt[1] + height * gt[2] + max_y = gt[3] transform = osr.CoordinateTransformation(old_crs, new_crs) - # get the coordinates in lat long - latlongmin = transform.TransformPoint(minx, miny) - latlongmax = transform.TransformPoint(maxx, maxy) + try: + # get the coordinates in lat long + lat_long_min = transform.TransformPoint(min_x, min_y) + lat_long_max = transform.TransformPoint(max_x, max_y) + except: + # Assume that coordinates are in EPSG:4236 + logger.debug("{}: There is no identifiable coordinate reference system. We will try to use EPSG: 4326 " + .format(filepath)) + lat_long_min = [min_y, min_x] + lat_long_max = [max_y, max_x] - bbox = [latlongmin[0], latlongmin[1], latlongmax[0], latlongmax[1]] + bbox = [lat_long_min[0], lat_long_min[1], lat_long_max[0], lat_long_max[1]] if int(osgeo.__version__[0]) >= 3: if old_crs.GetAxisMappingStrategy() == 1: - bbox = [latlongmin[1], latlongmin[0], latlongmax[1], latlongmax[0]] + bbox = [lat_long_min[1], lat_long_min[0], lat_long_max[1], lat_long_max[0]] spatialExtent = {"bbox": bbox, "crs": str(crs_output)} return spatialExtent -def getTemporalExtent(filePath): - ''' extracts temporal extent of the geotiff \n +def getTemporalExtent(filepath): + """ extracts temporal extent of the geotiff \n input "filepath": type string, file path to geotiff file \n - returns None as There is no time value for GeoTIFF files - ''' - - print('There is no time value for GeoTIFF files') + returns None as There is no time value for GeoTIFF files + """ + logger.debug('{} There is no time value for raster files'.format(filepath)) return None - diff --git a/geoextent/lib/handleVector.py b/geoextent/lib/handleVector.py index e3cfd086..292a2562 100644 --- a/geoextent/lib/handleVector.py +++ b/geoextent/lib/handleVector.py @@ -110,7 +110,6 @@ def getBoundingBox(filepath): """ datasource = ogr.Open(filepath) geo_dict = {} - crs_output = hf.WGS84_EPSG_ID for layer in datasource: layer_name = layer.GetDescription() @@ -118,7 +117,9 @@ def getBoundingBox(filepath): bbox = [ext[0], ext[2], ext[1], ext[3]] try: - crs = layer.GetSpatialRef().GetAttrValue("GEOGCS|AUTHORITY", 1) + spatial_ref = layer.GetSpatialRef() + spatial_ref.AutoIdentifyEPSG() + crs = spatial_ref.GetAuthorityCode(None) except Exception as e: logger.debug("Error extracting EPSG CODE from layer {}: \n {}".format(layer_name, e)) crs = None diff --git a/geoextent/lib/helpfunctions.py b/geoextent/lib/helpfunctions.py index f9974df7..6ce3e58c 100644 --- a/geoextent/lib/helpfunctions.py +++ b/geoextent/lib/helpfunctions.py @@ -87,7 +87,7 @@ def searchForParameters(elements, paramArray, exp_data=None): def transformingIntoWGS84(crs, coordinate): ''' - Function purpose: transforming SRS into WGS84 (EPSG:4978; used by the GPS satellite navigation system) \n + Function purpose: transforming SRS into WGS84 (EPSG:4326) \n Input: crs, point \n Output: retPoint constisting of x2, y2 (transformed points) ''' @@ -112,7 +112,7 @@ def transformingIntoWGS84(crs, coordinate): def transformingArrayIntoWGS84(crs, pointArray): ''' - Function purpose: transforming SRS into WGS84 (EPSG:4978; used by the GPS satellite navigation system) from an array \n + Function purpose: transforming SRS into WGS84 (EPSG 4326; used by the GPS satellite navigation system) from an array \n Input: crs, pointArray \n Output: array array ''' @@ -130,6 +130,40 @@ def transformingArrayIntoWGS84(crs, pointArray): return [transf_bbox[0][0], transf_bbox[0][1], transf_bbox[1][0], transf_bbox[1][1]] +def validate_bbox_wgs84(bbox): + """ + :param bbox: + :return: + """ + valid = True + lon_values = bbox[0:3:2] + lat_values = bbox[1:4:2] + + if sum(list(map(lambda x: x < -90 or x > 90, lat_values))) + sum( + list(map(lambda x: x < -180 or x > 180, lon_values))) > 0: + valid = False + + return valid + + +def flip_bbox(bbox): + """ + :param bbox: + :return: + """ + # Flip values + lon_values = bbox[1:4:2] + lat_values = bbox[0:3:2] + + bbox_flip = [lon_values[0], lat_values[0], lon_values[1], lat_values[1]] + if validate_bbox_wgs84(bbox_flip): + logger.warning("Longitude and latitude values flipped") + return bbox_flip + else: + raise Exception("Latitude and longitude values extracted do not seem to be correctly transformed. We tried " + "flipping latitude and longitude values but both bbox are incorrect") + + def validate(date_text): try: if datetime.datetime.strptime(date_text, output_time_format): diff --git a/tests/test_api.py b/tests/test_api.py index 29add37c..176f8435 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,6 +1,7 @@ import os # used to get the location of the testdata import sys import tempfile +import urllib.request import pytest import geoextent.lib.extent as geoextent from help_functions_test import create_zip, tolerance @@ -24,9 +25,10 @@ def test_defaults(): assert "crs" not in result -@pytest.mark.skip(reason="file format not implemented yet") def test_netcdf_extract_bbox(): - assert geoextent.fromFile("tests/testdata/nc/ECMWF_ERA-40_subset.nc") == [-90.0, 0.0, 90.0, 357.5] + result = geoextent.fromFile("tests/testdata/nc/zeroes.nc") + assert result["bbox"] == pytest.approx([19.86842, -52.63157, 25.13157, 52.63157], abs=tolerance) + assert result["crs"] == "4326" def test_kml_extract_bbox(): @@ -169,6 +171,17 @@ def test_zipfile_nested_folders(): assert result["tbox"] == ['2017-04-08', '2020-02-06'] +def test_png_file_extract_bbox(): + with tempfile.TemporaryDirectory() as tmp: + url = 'https://zenodo.org/record/820562/files/20160100_Hpakan_20151123_PRE.png?download=1' + url2 = 'https://zenodo.org/record/820562/files/20160100_Hpakan_20151123_PRE.pngw?download=1' + urllib.request.urlretrieve(url, os.path.join(tmp, "20160100_Hpakan_20151123_PRE.png")) + urllib.request.urlretrieve(url2, os.path.join(tmp, "20160100_Hpakan_20151123_PRE.pngw")) + result = geoextent.fromFile(os.path.join(tmp, "20160100_Hpakan_20151123_PRE.png"), bbox=True) + assert result["bbox"] == pytest.approx([96.21146, 25.55834, 96.35490, 25.63293], abs=tolerance) + assert result["crs"] == "4326" + + @pytest.mark.skip(reason="file format not implemented yet") def test_netcdf_extract_time(): assert geoextent.fromFile("tests/testdata/nc/ECMWF_ERA-40_subset.nc", tbox=True) == ['2002-07-01', '2002-07-31'] @@ -199,6 +212,18 @@ def test_gml_extract_bbox_time(): assert result['tbox'] == ['2005-12-31', '2013-11-30'] +def test_jpge_2000_extract_bbox(): + result = geoextent.fromFile("tests/testdata/jpge2000/MSK_SNWPRB_60m.jp2", bbox=True) + assert result['bbox'] == pytest.approx([-74.09868, 4.434354, -73.10649, 5.425259], abs=tolerance) + assert result['crs'] == "4326" + + +def test_esri_ascii_extract_bbox(): + result = geoextent.fromFile("tests/testdata/asc/Churfirsten_30m.asc", bbox=True) + assert result['bbox'] == pytest.approx([8.85620, 46.93996, 8.85699, 46.94050], abs=tolerance) + assert result['crs'] == "4326" + + def test_not_found_file(): result = geoextent.fromFile('tests/testdata/empt.geojson', bbox=True) assert result is None @@ -213,3 +238,9 @@ def test_bbox_and_tbox_both_false(): with pytest.raises(Exception) as excinfo: geoextent.fromFile('tests/path_does_not_matter', bbox=False, tbox=False) assert "No extraction options" in str(excinfo.value) + + +def test_invalid_bbox_and_flip(): + result = geoextent.fromFile('tests/testdata/csv/cities_NL_case_flip.csv', bbox=True) + assert result['bbox'] == pytest.approx([4.3175, 5.0, 95.0, 53.217222], abs=tolerance) + assert result['crs'] == "4326" diff --git a/tests/test_cli.py b/tests/test_cli.py index 8b1201ae..b48f9dba 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -138,15 +138,14 @@ def test_print_supported_formats(script_runner): assert "Supported formats:" in ret.stdout, "list of supported formats is printed to console" -@pytest.mark.skip(reason="file format not implemented yet") def test_netcdf_bbox(script_runner): ret = script_runner.run('geoextent', - '-b', 'tests/testdata/nc/ECMWF_ERA-40_subset.nc') + '-b', 'tests/testdata/nc/zeroes.nc') assert ret.success, "process should return success" assert ret.stderr == '', "stderr should be empty" result = ret.stdout bboxList = parse_coordinates(result) - assert bboxList == pytest.approx([-90.0, 0.0, 90.0, 357.5], abs=tolerance) + assert bboxList == pytest.approx([19.86842, -52.63157, 25.13157, 52.63157], abs=tolerance) assert "4326" in result diff --git a/tests/testdata/asc/Churfirsten_30m.asc b/tests/testdata/asc/Churfirsten_30m.asc new file mode 100644 index 00000000..962957d7 --- /dev/null +++ b/tests/testdata/asc/Churfirsten_30m.asc @@ -0,0 +1,8 @@ +ncols 2 +nrows 2 +xllcorner -41404.534638399433 +yllcorner -34323.166541740298 +cellsize 30.000000000000 +NODATA_value -9999.0 + 806.7 798.9 +790.4 781.9 diff --git a/tests/testdata/asc/Churfirsten_30m.prj b/tests/testdata/asc/Churfirsten_30m.prj new file mode 100644 index 00000000..ccc003f1 --- /dev/null +++ b/tests/testdata/asc/Churfirsten_30m.prj @@ -0,0 +1 @@ +PROJCS["Bonne_47_25N_9_4E",GEOGCS["GCS_WGS_1984",DATUM["D_wgs_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Bonne"],PARAMETER["false_easting",0],PARAMETER["false_northing",0],PARAMETER["standard_parallel_1",47.25],PARAMETER["central_meridian",9.4],UNIT["Meter",1]] \ No newline at end of file diff --git a/tests/testdata/csv/cities_NL_case_flip.csv b/tests/testdata/csv/cities_NL_case_flip.csv new file mode 100644 index 00000000..5400fe9f --- /dev/null +++ b/tests/testdata/csv/cities_NL_case_flip.csv @@ -0,0 +1,11 @@ +Name,no_Latitude,EPSG,no_Longitude,NumberOfCitizens,TimeStamp +Amsterdam,4.890444,4326,52.370197,862276,30.09.2018 +Rotterdam,4.479167,4326,51.930833,643307,30.09.2018 +Den Haag,4.3175,4326,52.084167,537368,30.09.2018 +Utrecht,5.115556,4326,52.088889,350821,30.09.2018 +Eindhoven,5.484167,4326,51.434444,230953,01.08.2017 +Groningen,6.574722,4326,53.217222,231214,30.09.2018 +Arnhem,5.9225,4326,51.999167,158629,30.09.2018 +Leeuwarden,5.7925,4326,53.197222,122925,30.09.2019 +Alkmaar,4.746389,4326,52.634444,108552,30.09.2018 +error,95,4326,5,0,30.09.2019 diff --git a/tests/testdata/data-sources.md b/tests/testdata/data-sources.md index fd3c81ab..b6ec7398 100644 --- a/tests/testdata/data-sources.md +++ b/tests/testdata/data-sources.md @@ -15,12 +15,14 @@ | [Geosoftware2] | wf_100m_klas | geotif | MIT | | [geoextent] | onePoint | geojson | PDDL | | [geoextent] | invalid_coordinate | geojson | PDDL | -| [geoextent] | onePoint | geojson | PDDL | | [Carto BCN / martgnz] | Barcelona districts | geojson | CC-BY 4.0 | | [Google] | abstractviews_timeprimitive_example | kml | CC-BY 4.0 | | [Tkrajina] | gpx1.1_with_all_fields.gpx | gpx | Apache License 2.0 | | [geoextent] | ifgi_to_denkpause.shp | shp | PDDL | | [geoextent] | wandelroute_maastricht.gpkg | gpkg | PDDL | +| [geoextent] | zeroes.nc | netcdf | PDDL | +| [Copernicus] | MSK_SNWPRB_60m | jp2 | [Copernicus Sentinel Data] | +| [Zenodo](https://zenodo.org/record/4323616) | Churfirsten_30m | asc | CC-BY 4.0 | [sf]: https://github.com/r-spatial/sf [geoextent]: https://github.com/o2r-project/geoextent @@ -32,3 +34,5 @@ [Carto BCN / martgnz]: https://github.com/martgnz/bcn-geodata/tree/master/districtes [Google]: https://developers.google.com/kml/documentation/time#gps [Tkrajina]: https://github.com/tkrajina/gpxpy/blob/dev/test_files/gpx1.1_with_all_fields.gpx +[Copernicus]: https://scihub.copernicus.eu/dhus/odata/v1/Products('e6b0b515-cc16-4e70-94ba-12e80db73d19')/$value +[Copernicus Sentinel Data]: https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice diff --git a/tests/testdata/jpge2000/MSK_SNWPRB_60m.jp2 b/tests/testdata/jpge2000/MSK_SNWPRB_60m.jp2 new file mode 100644 index 00000000..91aa1599 Binary files /dev/null and b/tests/testdata/jpge2000/MSK_SNWPRB_60m.jp2 differ diff --git a/tests/testdata/nc/zeroes.nc b/tests/testdata/nc/zeroes.nc new file mode 100644 index 00000000..924c97a0 Binary files /dev/null and b/tests/testdata/nc/zeroes.nc differ