Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bcom-tp-etl-transformation-pipelines
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
general
bcom-tp-etl-transformation-pipelines
Commits
c48c17b3
Commit
c48c17b3
authored
Aug 03, 2023
by
Cristian Aguirre
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update 02-08-23. Fix some bugs (380)
parent
0e639513
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
312 additions
and
23 deletions
+312
-23
Extractor.py
dags/components/Extractor.py
+2
-0
Utils.py
dags/components/Utils.py
+1
-3
dag_conf.yml
dags/dag_conf.yml
+11
-11
procedure_definition2.json
dags/procedure_definition2.json
+277
-1
airflow-envvars-configmap.yaml
deploy-k8/airflow-envvars-configmap.yaml
+2
-1
airflow-scheduler-deployment.yaml
deploy-k8/airflow-scheduler-deployment.yaml
+5
-2
airflow-volumes.yaml
deploy-k8/airflow-volumes.yaml
+3
-3
airflow-webserver-deployment.yaml
deploy-k8/airflow-webserver-deployment.yaml
+5
-2
postgres-deployment.yaml
deploy-k8/postgres-deployment.yaml
+3
-0
sync-dags-deployment-gcs.yaml
deploy-k8/sync-dags-deployment-gcs.yaml
+3
-0
No files found.
dags/components/Extractor.py
View file @
c48c17b3
...
...
@@ -120,6 +120,8 @@ def extract_from_source(command, source_conn, intern_conn, chunksize: int, **kwa
if
is_procedure
:
command
=
command
[
len
(
tablename
+
"|"
):]
temp_connection
=
source_conn
.
get_basic_connection
()
command
=
source_conn
.
generate_sql_procedure
(
command
)
logger
.
debug
(
f
"FINAL COMMAND: {command}"
)
if
source_conn
.
db_type
==
DatabaseTypeEnum
.
ORACLE
.
value
:
cursor
=
temp_connection
.
cursor
()
cursor
.
execute
(
command
)
...
...
dags/components/Utils.py
View file @
c48c17b3
...
...
@@ -139,10 +139,8 @@ def select_multiple(command: str) -> Dict[str, Any]:
response
=
{
'is_multiple'
:
False
,
'tablename'
:
''
}
tablename
=
""
no_procedure_init
=
"|select"
procedure_init
=
[
"|begin"
,
"|call"
]
try
:
if
command
.
lower
()
.
replace
(
" "
,
""
)
.
find
(
procedure_init
[
0
])
!=
-
1
or
\
command
.
lower
()
.
replace
(
" "
,
""
)
.
find
(
procedure_init
[
1
])
!=
-
1
:
if
command
.
lower
()
.
replace
(
" "
,
""
)
.
find
(
no_procedure_init
)
==
-
1
:
response
[
"is_multiple"
]
=
True
tablename
=
command
[:
command
.
index
(
"|"
)]
.
strip
()
response
[
"tablename"
]
=
tablename
...
...
dags/dag_conf.yml
View file @
c48c17b3
...
...
@@ -15,23 +15,23 @@ app:
schema
:
sources
transformation
:
type
:
mysql
host
:
database-11.cluster-ro-cvsz4ey9eiec.us-east-1.rds.amazonaws.com
port
:
3306
username
:
admin
password
:
adminadmin
database
:
prueba_
ca_
2
host
:
192.168.1.2
port
:
1
3306
username
:
root
password
:
root
database
:
prueba_
bcom
2
service
:
schema
:
intern_db
chunksize
:
8
000
chunksize
:
4
000
label_multiple_select
:
TABLE
source_mask
:
select
# Sufijo (S)
procedure_mask
:
procedure
# S
transformation_mask
:
transform
# S
source_mask
:
select
DA
# Sufijo (S)
procedure_mask
:
procedure
DA
# S
transformation_mask
:
transform
DA
# S
prefix_order_delimiter
:
.
cloud_provider
:
aws
scripts
:
s3_params
:
bucket
:
prueba
-airflow13
bucket
:
prueba
1234568
prefix
:
bcom_scripts
connection_id
:
conn_script
control
:
...
...
@@ -48,7 +48,7 @@ app:
delimiter
:
'
|'
tmp_path
:
/tmp
s3_params
:
bucket
:
prueba
-airflow13
bucket
:
prueba
1234568
prefix
:
bcom_results
connection_id
:
conn_script
report
:
...
...
dags/procedure_definition2.json
View file @
c48c17b3
[
{
{
"identifier"
:
"TABLA1"
,
"fields"
:
[
{
...
...
@@ -49,5 +49,281 @@
"datatype"
:
"DECIMAL"
}
]
},
{
"identifier"
:
"TABLA_PRUEBA"
,
"fields"
:
[
{
"name"
:
"CD_CUENTA"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"DPP_FLAG"
,
"datatype"
:
"NUMBER"
},
{
"name"
:
"FECHA_FACTURA"
,
"datatype"
:
"DATE"
},
{
"name"
:
"FECHA_INICIAL_DPP"
,
"datatype"
:
"DATE"
},
{
"name"
:
"FECHA_LIMITE_DPP"
,
"datatype"
:
"DATE"
},
{
"name"
:
"FH_CARGA"
,
"datatype"
:
"DATE"
},
{
"name"
:
"ID_FACTURA"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"MONTO_AJUSTADO"
,
"datatype"
:
"DECIMAL"
},
{
"name"
:
"MONTO_FACTURA"
,
"datatype"
:
"DECIMAL"
},
{
"name"
:
"MONTO_PAGO"
,
"datatype"
:
"DECIMAL"
},
{
"name"
:
"SALDO_FACTURA"
,
"datatype"
:
"DECIMAL"
}
]
},
{
"identifier"
:
"TABLA_DE_PRUEBA_DA"
,
"fields"
:
[
{
"name"
:
"CAMPO_1"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"CAMPO_2"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"CAMPO_3"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"CAMPO_4"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
}
]
},
{
"identifier"
:
"TACOMVENTAS"
,
"fields"
:
[
{
"name"
:
"CD_EMPRESA"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"CD_FOLIO"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"CD_CUENTA"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"NU_VENDEDOR"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"CD_PAQUETE"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"NU_ADDON"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"NB_PAQUETE"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
200
},
{
"name"
:
"CD_CLIENTE"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
50
},
{
"name"
:
"NB_CLIENTE"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
200
},
{
"name"
:
"FH_CONTRATACION"
,
"datatype"
:
"DATE"
},
{
"name"
:
"FH_ACTIVACION"
,
"datatype"
:
"DATE"
},
{
"name"
:
"FH_OPERACION"
,
"datatype"
:
"DATE"
},
{
"name"
:
"TP_SERVICIO"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"ST_CLIENTE"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"TP_PAGO"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
10
},
{
"name"
:
"NB_USUACARGA"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
50
},
{
"name"
:
"FH_CARGA"
,
"datatype"
:
"DATE"
},
{
"name"
:
"NU_ANIO"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"NU_MES"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"NU_SEMANA"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"NU_COMISION"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"TP_PAGOANT"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"REGLA_APLICADA"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"AUX"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
50
},
{
"name"
:
"PROMOCION"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
80
},
{
"name"
:
"EMPLEADOEMBAJADOR__C"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"CANAL_DE_VENTA__C"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
50
},
{
"name"
:
"CANALREFERIDO__C"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
50
}
],
"indexes"
:
[
"CD_PAQUETE"
,
"NU_ADDON"
,
"CD_CLIENTE"
]
},
{
"identifier"
:
"CATALOGO_PROMOCIONES"
,
"fields"
:
[
{
"name"
:
"NOMBRE_PRODUCTO"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"CD_PAQUETE"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
50
}
]
},
{
"identifier"
:
"PROCEDURE_DA"
,
"fields"
:
[
{
"name"
:
"NOMBRE_PRODUCTO"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"CD_PAQUETE"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
50
}
]
},
{
"identifier"
:
"JOIN_1"
,
"fields"
:
[
{
"name"
:
"PRODUCTO"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
},
{
"name"
:
"ANIO"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"SERVICIO"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"TOTAL_CARGA"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
3
}
]
}
]
\ No newline at end of file
deploy-k8/airflow-envvars-configmap.yaml
View file @
c48c17b3
...
...
@@ -88,13 +88,14 @@ data:
AIRFLOW__CORE__EXECUTOR
:
LocalExecutor
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN
:
postgresql+psycopg2://airflow:airflow@postgres/airflow
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION
:
'
true'
AIRFLOW__KUBERNETES_EXECUTOR__ENABLE_TCP_KEEPALIVE
:
'
false'
AIRFLOW__CORE__LOAD_EXAMPLES
:
'
false'
_AIRFLOW_DB_UPGRADE
:
'
true'
_AIRFLOW_WWW_USER_CREATE
:
'
true'
_AIRFLOW_WWW_USER_USERNAME
:
admin
_AIRFLOW_WWW_USER_PASSWORD
:
admin
S3_DAGS_DIR
:
'
s3://prueba1234568/dags'
GCS_DAGS_DIR
:
'
gs://prueba-rsync
2
/carpeta'
GCS_DAGS_DIR
:
'
gs://prueba-rsync
3
/carpeta'
SYNCHRONYZE_DAG_DIR
:
'
30'
MINIO_SERVER
:
'
http://192.168.49.2:9000'
MINIO_DAGS_DIR
:
'
/prueba-ca/dags'
\ No newline at end of file
deploy-k8/airflow-scheduler-deployment.yaml
View file @
c48c17b3
...
...
@@ -24,8 +24,11 @@ spec:
image
:
cristianfernando/airflow_custom:0.0.4
resources
:
requests
:
cpu
:
"
1000m"
memory
:
"
4Gi"
cpu
:
"
500m"
memory
:
"
2Gi"
limits
:
cpu
:
"
2500m"
memory
:
"
5Gi"
args
:
[
"
scheduler"
]
envFrom
:
-
configMapRef
:
...
...
deploy-k8/airflow-volumes.yaml
View file @
c48c17b3
...
...
@@ -10,7 +10,7 @@ spec:
-
ReadWriteMany
storageClassName
:
airflow-dags
nfs
:
server
:
10.
216.137.186
server
:
10.
115.7.82
path
:
"
/volume1/nfs_share"
---
...
...
@@ -27,7 +27,7 @@ spec:
-
ReadWriteMany
storageClassName
:
airflow-postgres
nfs
:
server
:
10.
216.137.186
server
:
10.
115.7.82
path
:
"
/volume1/nfs_postgres"
---
...
...
@@ -44,7 +44,7 @@ spec:
-
ReadWriteMany
storageClassName
:
airflow-logs
nfs
:
server
:
10.
216.137.186
server
:
10.
115.7.82
path
:
"
/volume1/nfs_logs"
---
...
...
deploy-k8/airflow-webserver-deployment.yaml
View file @
c48c17b3
...
...
@@ -21,11 +21,14 @@ spec:
spec
:
containers
:
-
name
:
airflow-webserver
image
:
apache/airflow:2.5.3
image
:
cristianfernando/airflow_custom:0.0.4
resources
:
requests
:
cpu
:
"
50
0m"
cpu
:
"
25
0m"
memory
:
"
500Mi"
limits
:
cpu
:
"
500m"
memory
:
"
1000Mi"
args
:
[
"
webserver"
]
envFrom
:
-
configMapRef
:
...
...
deploy-k8/postgres-deployment.yaml
View file @
c48c17b3
...
...
@@ -20,6 +20,9 @@ spec:
-
name
:
postgres
image
:
postgres:12
resources
:
requests
:
memory
:
"
1Gi"
cpu
:
"
250m"
limits
:
memory
:
"
2Gi"
cpu
:
"
500m"
...
...
deploy-k8/sync-dags-deployment-gcs.yaml
View file @
c48c17b3
...
...
@@ -28,6 +28,9 @@ spec:
name
:
sync-dags-gcloud
image
:
gcr.io/google.com/cloudsdktool/google-cloud-cli:alpine
resources
:
requests
:
cpu
:
"
200m"
memory
:
"
500Mi"
limits
:
cpu
:
"
250m"
memory
:
"
1Gi"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment