Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cria ambiente hmg #241

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion queries/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
target/
dbt_packages/
logs/
*dev/
*dev/
target-base/
package-lock.yml
65 changes: 61 additions & 4 deletions queries/dev/profiles-example.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
default:
queries:
target: dev
outputs:
dev:
Expand All @@ -7,13 +7,70 @@ default:
project: rj-smtr-dev
dataset: dbt
location: US
threads: 2
threads: 1
keyfile: # caminho/para/sua/credencial.json
priority: interactive
job_retries: 1

# for dbt Python models to be run on Dataproc Serverless
gcs_bucket: rj-smtr
dataproc_region: us-central1
submission_method: serverless
dataproc_batch:
environment_config:
execution_config:
service_account: # sua-conta-de-servico

runtime_config:
properties:
spark.executor.instances: "2"
spark.driver.memory: 4g
spark.driver.memoryOverhead: 1g
hmg:
type: bigquery
method: service-account
project: rj-smtr-dev
dataset: dbt
location: US
threads: 1
keyfile: # caminho/para/sua/credencial.json
priority: interactive
job_retries: 1

# for dbt Python models to be run on Dataproc Serverless
gcs_bucket: rj-smtr
dataproc_region: us-central1
submission_method: serverless
dataproc_batch:
environment_config:
execution_config:
service_account: # sua-conta-de-servico

runtime_config:
properties:
spark.executor.instances: "2"
spark.driver.memory: 4g
spark.driver.memoryOverhead: 1g
prod:
type: bigquery
method: service-account
project: rj-smtr
dataset: dbt
location: US
threads: 2
keyfile: # caminho/para/sua/credencial.json
threads: 1
keyfile: # caminho/para/sua/credencial.json

# for dbt Python models to be run on Dataproc Serverless
gcs_bucket: rj-smtr
dataproc_region: us-central1
submission_method: serverless
dataproc_batch:
environment_config:
execution_config:
service_account: # sua-conta-de-servico

runtime_config:
properties:
spark.executor.instances: "2"
spark.driver.memory: 4g
spark.driver.memoryOverhead: 1g
1 change: 1 addition & 0 deletions queries/dev/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def run_dbt_model(
run_command += f" {flags}"

print(f"\n>>> RUNNING: {run_command}\n")
os.chdir(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
os.system(run_command)


Expand Down
24 changes: 24 additions & 0 deletions queries/macros/generate_database_name.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
-- fmt: off
{% macro generate_database_name(custom_database_name=none, node=none) -%}

{%- set default_database = target.database -%}
{% set dev_database = "rj-smtr-dev" %}
{%- if custom_database_name is none -%}

{% if target.name in ("dev", "hmg") %}

{{ dev_database }}

{% else %}

{{ default_database }}

{% endif %}

{%- else -%}

{{ custom_database_name | trim }}

{%- endif -%}

{%- endmacro %}
15 changes: 13 additions & 2 deletions queries/macros/generate_schema_name.sql
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
-- fmt: off
{% macro generate_schema_name(custom_schema_name, node) -%}

{%- set default_schema = target.schema -%}
{%- if custom_schema_name is none -%}

{{ default_schema }}
{% set schema_name = default_schema %}

{%- else -%}

{{ custom_schema_name | trim }}
{% set schema_name = custom_schema_name | trim %}

{%- endif -%}

{% if target.name == "dev" %}
{% set schema_name = env_var("DBT_USER") + "__" + schema_name %}
{% endif %}

{% if target.name == "hmg" and schema_name.endswith("_staging") %}
{% set schema_name = schema_name + "_dbt" %}
{% endif %}

{{ schema_name }}

{%- endmacro %}
5 changes: 5 additions & 0 deletions queries/packages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
packages:
- package: dbt-labs/audit_helper
version: 0.12.0
- package: data-mie/dbt_profiler
version: 0.8.2
58 changes: 57 additions & 1 deletion queries/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,47 @@ queries:
project: rj-smtr-dev
threads: 1
type: bigquery

# for dbt Python models to be run on Dataproc Serverless
gcs_bucket: rj-smtr
dataproc_region: us-central1
submission_method: serverless
dataproc_batch:
environment_config:
execution_config:
service_account: [email protected]

runtime_config:
properties:
spark.executor.instances: "2"
spark.driver.memory: 4g
spark.driver.memoryOverhead: 1g
hmg:
dataset: dbt
job_execution_timeout_seconds: 600
job_retries: 1
keyfile: /tmp/credentials.json
location: us
method: service-account
priority: interactive
project: rj-smtr-dev
threads: 1
type: bigquery

# for dbt Python models to be run on Dataproc Serverless
gcs_bucket: rj-smtr
dataproc_region: us-central1
submission_method: serverless
dataproc_batch:
environment_config:
execution_config:
service_account: [email protected]

runtime_config:
properties:
spark.executor.instances: "2"
spark.driver.memory: 4g
spark.driver.memoryOverhead: 1g
prod:
dataset: dbt
job_execution_timeout_seconds: 600
Expand All @@ -22,4 +63,19 @@ queries:
project: rj-smtr
threads: 1
type: bigquery
target: prod

# for dbt Python models to be run on Dataproc Serverless
gcs_bucket: rj-smtr
dataproc_region: us-central1
submission_method: serverless
dataproc_batch:
environment_config:
execution_config:
service_account: [email protected]

runtime_config:
properties:
spark.executor.instances: "2"
spark.driver.memory: 4g
spark.driver.memoryOverhead: 1g
target: prod
13 changes: 13 additions & 0 deletions queries/recce.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Preset Checks
# Please see https://datarecce.io/docs/features/preset-checks/
checks:
- name: Row count diff
description: Check the row count diff for all table models.
type: row_count_diff
params:
select: state:modified,config.materialized:table
- name: Schema diff
description: Check the schema diff for all nodes.
type: schema_diff
params:
select: state:modified
Loading