Skip to content

Commit

Permalink
cria tabelas segmento
Browse files Browse the repository at this point in the history
  • Loading branch information
pixuimpou committed Sep 12, 2024
1 parent cab4c3b commit c07a87c
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 3 deletions.
11 changes: 8 additions & 3 deletions queries/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,12 @@ models:
staging:
+materialized: view
+schema: transito_staging
teste_python:
+materialized: view
+schema: teste_python
planejamento:
+materialized: incremental
+incremental_strategy: insert_overwrite
+schema: planejamento
staging:
+database: rj-smtr-dev
+materialized: view
+schema: planejamento_staging
+database: rj-smtr-dev
10 changes: 10 additions & 0 deletions queries/models/planejamento/segmento_shape.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{{
config(
partition_by = {
'field' :'feed_start_date',
'data_type' :'date',
'granularity': 'day'
},
tags=['geolocalizacao']
)
}}
42 changes: 42 additions & 0 deletions queries/models/planejamento/shapes_geom_planejamento.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{{
config(
partition_by = {
'field' :'feed_start_date',
'data_type' :'date',
'granularity': 'day'
},
alias = 'shapes_geom',
tags=['geolocalizacao']
)
}}

WITH shapes AS (
SELECT
feed_version,
feed_start_date,
feed_end_date,
shape_id,
shape_pt_sequence,
ST_GEOGPOINT(shape_pt_lon, shape_pt_lat) AS ponto_shape,
CONCAT(shape_pt_lon, " ", shape_pt_lat) AS lon_lat,
FROM
-- {{ ref("shapes_gtfs") }}
rj-smtr.gtfs.shapes
WHERE
feed_start_date = '2024-09-01'
AND shape_id = "hj1m"
)
SELECT
feed_version,
feed_start_date,
feed_end_date,
shape_id,
ST_MAKELINE(ARRAY_AGG(ponto_shape ORDER BY shape_pt_sequence)) AS shape,
CONCAT("LINESTRING(", STRING_AGG(lon_lat, ", " ORDER BY shape_pt_sequence), ")") AS wkt_shape,
FROM
shapes
GROUP BY
1,
2,
3,
4
78 changes: 78 additions & 0 deletions queries/models/planejamento/staging/aux_segmento_shape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
import numpy as np
import pyproj
from pyspark.sql.functions import col, explode, udf
from pyspark.sql.types import ArrayType, IntegerType, StringType
from shapely import wkt

# from shapely.geometry import LineString, Point
from shapely.ops import substring, transform


def transform_projection(shape, from_utm=False):
bq_projection = pyproj.CRS("EPSG:4326")
shapely_projection = pyproj.CRS("EPSG:31983")
if from_utm:
project = pyproj.Transformer.from_crs(
shapely_projection, bq_projection, always_xy=True
).transform
else:
project = pyproj.Transformer.from_crs(
bq_projection, shapely_projection, always_xy=True
).transform

return transform(project, shape)


def cut(line, distance):
line_len = line.length

dist_mod = line_len % distance
dist_range = list(np.arange(0, line_len, distance))
middle_index = len(dist_range) // 2

last_final_dist = 0
lines = []

for i, _ in enumerate(dist_range, start=1):
if i == middle_index:
cut_distance = dist_mod
else:
cut_distance = distance
final_dist = last_final_dist + cut_distance
lines.append([i, substring(line, last_final_dist, final_dist).wkt])
last_final_dist = final_dist

return lines


def cut_udf(wkt_string):
line = transform_projection(wkt.loads(wkt_string))
return cut(line, distance=1000)


cut_udf = udf(cut_udf, ArrayType(ArrayType(IntegerType(), StringType())))


def model(dbt, session):
dbt.config(
materialized="table",
)
df = dbt.ref("shapes_geom_planejamento")

df_segments = df.withColumn("shape_lists", cut_udf(col("wkt_shape")))

df_exploded = (
df_segments.select(
"feed_version",
"feed_start_date",
"feed_end_date",
"shape_id",
explode(col("shape_lists")).alias("shape_list"),
)
.withColumn("id_segmento", col("shape_list").getItem(1))
.withColumn("wkt_segmento", col("shape_list").getItem(2))
.drop("shape_list")
)

return df_exploded
1 change: 1 addition & 0 deletions queries/models/teste_python/aux_shapes_wkt.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ with shapes AS (
rj-smtr.gtfs.shapes
where
feed_start_date = '2024-09-01'
and shape_id = "hj1m"
order by
1,
2,
Expand Down

0 comments on commit c07a87c

Please sign in to comment.