Skip to content

Commit

Permalink
import: factorize bounding box
Browse files Browse the repository at this point in the history
  • Loading branch information
bouttier committed Sep 19, 2024
1 parent ee1bc8e commit 6703698
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 95 deletions.
56 changes: 10 additions & 46 deletions backend/geonature/core/gn_synthese/imports/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from geonature.core.imports.utils import (
load_transient_data_in_dataframe,
update_transient_data_from_dataframe,
compute_bounding_box,
)
from geonature.core.imports.checks.dataframe import (
concat_dates,
Expand Down Expand Up @@ -563,53 +564,16 @@ def report_plot(imprt: TImports) -> StandaloneEmbedJson:

@staticmethod
def compute_bounding_box(imprt: TImports):
name_geom_4326_field = "the_geom_4326"
code_entity = "observation"

entity = Entity.query.filter_by(destination=imprt.destination, code=code_entity).one()
where_clause_id_import = None
# If import is still in-progress data is retrieved from the import transient table,
# otherwise the import is done and data is retrieved from the destination table
if imprt.loaded:
# Retrieve the import transient table ("t_imports_synthese")
transient_table = imprt.destination.get_transient_table()
# Set the WHERE clause
where_clause_id_import = transient_table.c["id_import"] == imprt.id_import
else:
# There is no 'id_import' field in the destination table 'synthese', must retrieve
# the corresponding `id_source` from the table "t_sources"
id_source = db.session.scalar(
# The destination where clause will be called only when the import is finished,
# avoiding looking for unexisting source when the import is still in progress.
destination_where_clause = (
lambda imprt, destination_table: db.session.scalar(
select(TSources.id_source).where(
TSources.name_source == f"Import(id={imprt.id_import})"
)
)
# Retrieve the destination table ("synthese")
entity = Entity.query.filter_by(destination=imprt.destination, code="observation").one()
destination_table = entity.get_destination_table()
# Set the WHERE clause
where_clause_id_import = destination_table.c["id_source"] == id_source

# Build the statement to retrieve the valid bounding box
statement = None
if imprt.loaded == True:
# Compute from entries in the transient table and related to the import
transient_table = imprt.destination.get_transient_table()
statement = (
select(func.ST_AsGeojson(func.ST_Extent(transient_table.c[name_geom_4326_field])))
.where(where_clause_id_import)
.where(transient_table.c[entity.validity_column] == True)
)
else:
destination_table = entity.get_destination_table()
# Compute from entries in the destination table and related to the import
statement = select(
func.ST_AsGeojson(func.ST_Extent(destination_table.c[name_geom_4326_field]))
).where(where_clause_id_import)

# Execute the statement to eventually retrieve the valid bounding box
(valid_bbox,) = db.session.execute(statement).fetchone()

# Return the valid bounding box or None
if valid_bbox:
return json.loads(valid_bbox)
pass
== destination_table.c.id_source
)
return compute_bounding_box(
imprt, "observation", "the_geom_4326", destination_where_clause=destination_where_clause
)
48 changes: 47 additions & 1 deletion backend/geonature/core/imports/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from typing import IO, Any, Dict, Iterable, List, Optional, Set, Tuple

from flask import current_app, render_template
from sqlalchemy import delete
import sqlalchemy as sa
from sqlalchemy import func, select, delete
from chardet.universaldetector import UniversalDetector
from sqlalchemy.sql.expression import select, insert
import pandas as pd
Expand Down Expand Up @@ -473,3 +474,48 @@ def get_required(import_: TImports, entity: Entity):
if all([field_name in selected_fields for field_name in bib_field.optional_conditions]):
required_columns.remove(field)
return required_columns


def compute_bounding_box(
imprt: TImports,
entity_code,
geom_4326_field,
*,
transient_where_clause=None,
destination_where_clause=None
):
entity = Entity.query.filter_by(destination=imprt.destination, code=entity_code).one()
if imprt.date_end_import: # import finished, retrieve data from destination table
destination_table = entity.get_destination_table()
geom_field = destination_table.c[geom_4326_field]
if (
destination_where_clause is None
): # assume there is an id_import column in the destination table
where_clause = destination_table.c.id_import == imprt.id_import
elif callable(destination_where_clause):
where_clause = destination_where_clause(imprt, destination_table)
else:
where_clause = destination_where_clause
elif imprt.processed: # import controlled but not finished, retieve data from transient table
transient_table = imprt.destination.get_transient_table()
geom_field = transient_table.c[geom_4326_field]
if transient_where_clause is None:
where_clause = sa.and_(
transient_table.c.id_import == imprt.id_import,
transient_table.c[entity.validity_column] == True,
)
elif callable(transient_where_clause):
where_clause = transient_where_clause(imprt, transient_table)
else:
where_clause = transient_where_clause
else: # import still in progress, checks have not been runned yet, no valid data available
return None

statement = select(func.ST_AsGeojson(func.ST_Extent(geom_field))).where(where_clause)

# Execute the statement to eventually retrieve the valid bounding box
(valid_bbox,) = db.session.execute(statement).fetchone()

# Return the valid bounding box or None
if valid_bbox:
return json.loads(valid_bbox)
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
get_mapping_data,
load_transient_data_in_dataframe,
update_transient_data_from_dataframe,
compute_bounding_box,
)

from geonature.core.imports.checks.dataframe import (
Expand Down Expand Up @@ -558,51 +559,4 @@ def report_plot(imprt: TImports) -> StandaloneEmbedJson:

@staticmethod
def compute_bounding_box(imprt: TImports):
name_geom_4326_field = "geom_4326"
code_entity = "station"

entity = Entity.query.filter_by(destination=imprt.destination, code=code_entity).one()

where_clause_id_import = None
id_import = imprt.id_import
destination_import = imprt.destination

# If import is still in-progress data is retrieved from the import transient table,
# otherwise the import is done and data is retrieved from the destination table
if imprt.loaded:
# Retrieve the import transient table ("t_imports_occhab")
table_with_data = destination_import.get_transient_table()
else:
# Retrieve the destination table ("t_stations")
entity = Entity.query.filter_by(destination=destination_import, code="station").one()
table_with_data = entity.get_destination_table()

# Set the WHERE clause
where_clause_id_import = table_with_data.c["id_import"] == id_import

# Build the statement to retrieve the valid bounding box
statement = None
if imprt.loaded == True:
# Compute from entries in the transient table and related to the import
transient_table = imprt.destination.get_transient_table()
statement = (
sa.select(
sa.func.ST_AsGeojson(sa.func.ST_Extent(transient_table.c[name_geom_4326_field]))
)
.where(where_clause_id_import)
.where(transient_table.c[entity.validity_column] == True)
)
else:
destination_table = entity.get_destination_table()
# Compute from entries in the destination table and related to the import
statement = sa.select(
sa.func.ST_AsGeojson(sa.func.ST_Extent(destination_table.c[name_geom_4326_field]))
).where(where_clause_id_import)

# Execute the statement to eventually retrieve the valid bounding box
(valid_bbox,) = db.session.execute(statement).fetchone()

# Return the valid bounding box or None
if valid_bbox:
return json.loads(valid_bbox)
pass
return compute_bounding_box(imprt, "station", "geom_4326")

0 comments on commit 6703698

Please sign in to comment.