Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bcom-tp-etl-transformation-pipelines
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
general
bcom-tp-etl-transformation-pipelines
Commits
b3a54460
Commit
b3a54460
authored
Jun 05, 2023
by
Cristian Aguirre
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update DAG-TACOMVENTAS-PROMOCIONESRESIDENCIAL-05-06-23
parent
3ebc44a5
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
67 additions
and
47 deletions
+67
-47
app_conf.yml
dags/app_conf.yml
+3
-3
S3Route.py
dags/components/S3Route.py
+3
-3
dag_transformacion_tacomventas_promoresidencial.py
dags/dag_transformacion_tacomventas_promoresidencial.py
+17
-11
airflow-envvars-configmap.yaml
deploy-k8/airflow-envvars-configmap.yaml
+4
-0
airflow-scheduler-deployment.yaml
deploy-k8/airflow-scheduler-deployment.yaml
+1
-5
airflow-secrets.yaml
deploy-k8/airflow-secrets.yaml
+14
-0
airflow-webserver-deployment.yaml
deploy-k8/airflow-webserver-deployment.yaml
+1
-5
pod_template.yaml
deploy-k8/pod_template.yaml
+4
-13
script-apply.sh
deploy-k8/script-apply.sh
+1
-0
script-delete.sh
deploy-k8/script-delete.sh
+1
-0
sync-dags-deployment.yaml
deploy-k8/sync-dags-deployment.yaml
+18
-7
No files found.
dags/app_conf.yml
View file @
b3a54460
...
...
@@ -2,7 +2,7 @@
general
:
s3_parameters
:
s3_conn_id
:
"
bcom_tp_connection"
bucket
:
"
prueba
1234568
"
bucket
:
"
prueba
-ca
"
dags
:
dag1
:
...
...
@@ -41,14 +41,14 @@ dags:
relacion3pa2p
:
type
:
"
INSUMO"
pattern
:
"
temporal_relacion3pa2p*.txt"
prefix
:
"
pruebas_qa
"
prefix
:
"
"
key_field
:
"
TRESP"
value_field
:
"
DOSP"
delimiter
:
"
,"
relacionpoidpaquete
:
type
:
"
INSUMO"
pattern
:
"
temporal_relacion_Paquete*.txt"
prefix
:
"
pruebas_qa
"
prefix
:
"
"
key_field
:
"
POID_PRODUCT"
value_field
:
"
CD_PAQUETE"
delimiter
:
"
,"
...
...
dags/components/S3Route.py
View file @
b3a54460
...
...
@@ -27,13 +27,13 @@ def get_df_from_s3(conn: str, bucket: str, key: str, period: str, delimiter: str
response
.
update
({
'filename'
:
s3_data
[
"filename"
]})
file_type
=
get_type_file
(
s3_data
[
"filename"
])
if
file_type
==
FileTypeEnum
.
EXCEL
:
dataframe
=
pd
.
read_excel
(
s3_data
[
"data"
],
engine
=
"openpyxl"
)
dataframe
=
pd
.
read_excel
(
s3_data
[
"data"
],
engine
=
"openpyxl"
,
dtype
=
'object'
)
elif
file_type
==
FileTypeEnum
.
OLD_EXCEL
:
dataframe
=
pd
.
read_excel
(
s3_data
[
"data"
],
engine
=
"xlrd"
)
dataframe
=
pd
.
read_excel
(
s3_data
[
"data"
],
engine
=
"xlrd"
,
dtype
=
'object'
)
elif
file_type
==
FileTypeEnum
.
TEXT
or
file_type
==
FileTypeEnum
.
CSV
:
str_data
=
str
(
s3_data
[
"data"
]
.
getvalue
(),
encoding
=
'UTF-8'
,
errors
=
'ignore'
)
data
=
StringIO
(
str_data
)
dataframe
=
pd
.
read_csv
(
data
,
sep
=
delimiter
)
dataframe
=
pd
.
read_csv
(
data
,
sep
=
delimiter
,
dtype
=
'object'
)
response
.
update
({
'df'
:
dataframe
})
except
Exception
as
e
:
logger
.
error
(
f
"Error trayendo y transformando a DataFrame desde S3 con periodo {period}. {e}"
)
...
...
dags/dag_transformacion_tacomventas_promoresidencial.py
View file @
b3a54460
...
...
@@ -18,6 +18,11 @@ import logging
logger
=
logging
.
getLogger
()
DAG_NAME
=
"BCOM_DAG_TRANSFORMACION_TACOMVENTAS_PROMOCIONESRESIDENCIAL"
PROMOCION_3PA2P
=
"3P a 2P"
PROMOCION_PARRILLA
=
"Parrilla"
PROMOCION_HBO
=
"Promocion HBO"
PROMOCION_DEFAULT
=
"Promocion"
PROMOCIONES_NO_CONSIDERADAS_TV_CANALES
=
"Adicional|Soundbox|SOUNDBOX"
DEFAULT_ARGS
=
{
'owner'
:
'airflow'
,
...
...
@@ -56,25 +61,26 @@ def dag1_id2(promo: pd.DataFrame, relation_poid: pd.DataFrame, key_field: str, v
def
dag1_id3
(
tacom
:
pd
.
DataFrame
,
promo
:
pd
.
DataFrame
)
->
pd
.
DataFrame
:
result
=
pd
.
DataFrame
()
try
:
promo
[
"CUENTA"
]
=
promo
[
"CUENTA"
]
.
astype
(
int
,
errors
=
'ignore'
)
promo
[
"CD_PAQUETE_PROMO"
]
=
promo
[
"CD_PAQUETE"
]
.
astype
(
int
,
errors
=
'ignore'
)
promo
.
drop
(
"CD_PAQUETE"
,
axis
=
1
,
inplace
=
True
)
promo
=
promo
.
drop_duplicates
([
"CUENTA"
,
"CD_PAQUETE_PROMO"
])
result
=
tacom
.
merge
(
promo
,
how
=
'left'
,
left_on
=
[
"CD_CUENTA"
,
"CD_PAQUETE"
],
right_on
=
[
"CUENTA"
,
"CD_PAQUETE_PROMO"
])
result
[
"CD_PAQUETE"
]
=
result
[
"CD_PAQUETE"
]
.
astype
(
int
)
.
astype
(
str
)
no_consider
=
"Adicional|Soundbox|SOUNDBOX"
result
[
"PROMOCION"
]
=
np
.
where
((
result
[
"CD_PAQUETE_PROMO"
]
.
isna
())
|
(
result
[
"CD_PAQUETE_PROMO"
]
==
"None"
)
|
(
result
[
"CD_PAQUETE_PROMO"
]
==
"nan"
),
None
,
np
.
where
((
result
[
"CD_PAQUETE"
]
.
notna
())
&
(
result
[
"CD_PAQUETE"
]
.
str
.
len
()
<=
5
),
"3P a 2P"
,
np
.
where
((
result
[
"CD_PAQUETE"
]
.
notna
())
&
(
result
[
"CD_PAQUETE"
]
.
str
.
len
()
<=
5
),
PROMOCION_3PA2P
,
np
.
where
((
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
"TV"
,
na
=
False
))
&
(
~
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
no_consider
,
na
=
False
)),
"Parrilla"
,
(
~
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
PROMOCIONES_NO_CONSIDERADAS_TV_CANALES
,
na
=
False
)),
PROMOCION_PARRILLA
,
np
.
where
((
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
"CANALES"
,
na
=
False
))
&
(
~
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
no_consider
,
na
=
False
)),
"Parrilla"
,
np
.
where
(
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
"HBO MAX"
,
na
=
False
),
"Promocion HBO"
,
np
.
where
(
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
"PAQUETE HBO"
,
na
=
False
),
"Promocion HBO"
,
(
~
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
PROMOCIONES_NO_CONSIDERADAS_TV_CANALES
,
na
=
False
)),
PROMOCION_PARRILLA
,
np
.
where
(
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
"HBO MAX"
,
na
=
False
),
PROMOCION_HBO
,
np
.
where
(
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
"PAQUETE HBO"
,
na
=
False
),
PROMOCION_HBO
,
np
.
where
(
result
[
"NOMBRE_PRODUCTO"
]
.
str
.
contains
(
"STAR PREMIUM"
,
na
=
False
),
"Promocion STAR PREMIUM"
,
"PROMOCION"
)))))))
"Promocion STAR PREMIUM"
,
PROMOCION_DEFAULT
)))))))
result
[
"CD_PAQUETE_PROMO"
]
=
np
.
where
((
result
[
"CD_PAQUETE_PROMO"
]
==
'nan'
)
|
(
result
[
"CD_PAQUETE_PROMO"
]
==
'None'
),
None
,
result
[
"CD_PAQUETE"
])
...
...
@@ -91,7 +97,7 @@ def dag1_id4(df: pd.DataFrame, df_promo: pd.DataFrame, key_field: str, value_fie
df_promo
[
value_field
]
=
df_promo
[
value_field
]
.
astype
(
str
,
errors
=
'ignore'
)
df
=
df
.
merge
(
df_promo
,
how
=
'outer'
,
left_on
=
"CD_PAQUETE"
,
right_on
=
key_field
)
df
=
df
.
dropna
(
how
=
'all'
,
subset
=
[
"CD_EMPRESA"
,
"CD_FOLIO"
,
"CD_CUENTA"
])
df
[
"CD_PAQUETE"
]
=
np
.
where
((
df
[
"PROMOCION"
]
==
"3P a 2P"
)
&
(
df
[
key_field
]
.
notna
()),
df
[
value_field
],
df
[
"CD_PAQUETE"
])
df
[
"CD_PAQUETE"
]
=
np
.
where
((
df
[
"PROMOCION"
]
==
PROMOCION_3PA2P
)
&
(
df
[
key_field
]
.
notna
()),
df
[
value_field
],
df
[
"CD_PAQUETE"
])
df
=
df
.
drop
([
key_field
,
value_field
],
axis
=
1
)
except
Exception
as
e
:
logger
.
error
(
f
"Error DAG1_ID4. {e}"
)
...
...
@@ -240,7 +246,7 @@ def set_dag_1():
from
yaml.loader
import
SafeLoader
# Cambiar conf_path dependiendo del ambiente, en prod usando k8 y contenedores usar /opt/airflow/dags/app_conf.yml
# En desarrollo, cualquiera que apunte a su carpeta dags
conf_path
=
"/
roo
t/airflow/dags/app_conf.yml"
conf_path
=
"/
op
t/airflow/dags/app_conf.yml"
with
open
(
conf_path
)
as
f
:
data
=
yaml
.
load
(
f
,
Loader
=
SafeLoader
)
general_cnf
=
data
[
"general"
]
...
...
deploy-k8/airflow-envvars-configmap.yaml
View file @
b3a54460
...
...
@@ -24,5 +24,9 @@ data:
_AIRFLOW_WWW_USER_CREATE
:
'
true'
_AIRFLOW_WWW_USER_USERNAME
:
admin
_AIRFLOW_WWW_USER_PASSWORD
:
admin
S3_DAGS_DIR
:
'
s3://prueba1234568/dags'
SYNCHRONYZE_DAG_DIR
:
'
30'
MINIO_SERVER
:
'
http://192.168.49.2:9000'
MINIO_DAGS_DIR
:
'
/prueba-ca/dags'
deploy-k8/airflow-scheduler-deployment.yaml
View file @
b3a54460
...
...
@@ -20,15 +20,11 @@ spec:
spec
:
containers
:
-
name
:
airflow-scheduler
image
:
cristianfernando/airflow_custom:0.0.1
image
:
apache/airflow:2.5.3
args
:
[
"
scheduler"
]
envFrom
:
-
configMapRef
:
name
:
airflow-envvars-configmap
resources
:
limits
:
memory
:
"
512Mi"
# cpu: "100"
volumeMounts
:
-
name
:
dags-host-volume
mountPath
:
/opt/airflow/dags
...
...
deploy-k8/airflow-secrets.yaml
0 → 100644
View file @
b3a54460
apiVersion
:
v1
kind
:
Secret
metadata
:
name
:
credentials
type
:
Opaque
data
:
AWS_ACCESS_KEY
:
bWluaW9hZG1pbg==
AWS_SECRET_KEY
:
bWluaW9hZG1pbg==
MINIO_USER
:
bWluaW9hZG1pbg==
MINIO_PASSWORD
:
bWluaW9hZG1pbg==
deploy-k8/airflow-webserver-deployment.yaml
View file @
b3a54460
...
...
@@ -20,15 +20,11 @@ spec:
spec
:
containers
:
-
name
:
airflow-webserver
image
:
cristianfernando/airflow_custom:0.0.1
image
:
apache/airflow:2.5.3
args
:
[
"
webserver"
]
envFrom
:
-
configMapRef
:
name
:
airflow-envvars-configmap
resources
:
limits
:
memory
:
"
512Mi"
# cpu: "100"
ports
:
-
containerPort
:
8080
volumeMounts
:
...
...
deploy-k8/pod_template.yaml
View file @
b3a54460
...
...
@@ -6,21 +6,12 @@ spec:
containers
:
-
args
:
[
]
command
:
[
]
envFrom
:
-
configMapRef
:
name
:
airflow-envvars-configmap
env
:
-
name
:
AIRFLOW__CORE__EXECUTOR
value
:
LocalExecutor
-
name
:
DB_HOST
value
:
postgres
-
name
:
DB_DATABASE
value
:
airflow
-
name
:
DB_USER
value
:
airflow
-
name
:
DB_PASSWORD
value
:
airflow
-
name
:
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN
value
:
postgresql+psycopg2://airflow:airflow@postgres/airflow
-
name
:
AIRFLOW__LOGGING__LOGGING_LEVEL
value
:
INFO
image
:
dumy-image
imagePullPolicy
:
IfNotPresent
name
:
base
...
...
@@ -30,7 +21,7 @@ spec:
-
name
:
logs-persistent-storage
mountPath
:
/opt/airflow/logs
hostNetwork
:
false
restartPolicy
:
Never
restartPolicy
:
IfNotPresent
securityContext
:
runAsUser
:
50000
nodeSelector
:
{
}
...
...
deploy-k8/script-apply.sh
View file @
b3a54460
...
...
@@ -3,6 +3,7 @@ kubectl apply -f airflow-rbac.yaml
kubectl apply
-f
postgres-deployment.yaml
kubectl apply
-f
postgres-service.yaml
kubectl apply
-f
airflow-envvars-configmap.yaml
kubectl apply
-f
airflow-secrets.yaml
kubectl apply
-f
airflow-webserver-deployment.yaml
kubectl apply
-f
airflow-webserver-service.yaml
kubectl apply
-f
airflow-scheduler-deployment.yaml
...
...
deploy-k8/script-delete.sh
View file @
b3a54460
kubectl delete
-f
airflow-rbac.yaml
kubectl delete
-f
postgres-service.yaml
kubectl delete
-f
postgres-deployment.yaml
kubectl delete
-f
airflow-secrets.yaml
kubectl delete
-f
airflow-envvars-configmap.yaml
kubectl delete
-f
airflow-webserver-service.yaml
kubectl delete
-f
airflow-webserver-deployment.yaml
...
...
deploy-k8/sync-dags-deployment.yaml
View file @
b3a54460
...
...
@@ -16,18 +16,29 @@ spec:
spec
:
containers
:
-
args
:
-
while
true
; aws s3 sync --exact-timestamps --delete 's3://prueba1234568/dags' '/dags'; do sleep 30; done;
-
mc alias set minio ${MINIO_SERVER:-http://192.168.49.2:9000} ${MINIO_USER:-minioadmin}
${MINIO_PASSWORD:-minioadmin}; while
true
; mc mirror --remove --overwrite minio${MINIO_DAGS_DIR:-/prueba-ca/dags} /dags;
do sleep ${SYNCHRONYZE_DAG_DIR:-30}; done;
command
:
-
/bin/bash
-
-c
-
--
name
:
sync-dags
image
:
amazon/aws-cli:2.1.34
name
:
sync-dags-minio
image
:
minio/mc
envFrom
:
-
configMapRef
:
name
:
airflow-envvars-configmap
env
:
-
name
:
AWS_ACCESS_KEY_ID
value
:
AKIAQAAMXO3Z4BHNKEIE
-
name
:
AWS_SECRET_ACCESS_KEY
value
:
+MUmn3EoigY93w5RxNtmCcxV+ErkZgEXqxUkjXU3
-
name
:
MINIO_USER
valueFrom
:
secretKeyRef
:
key
:
MINIO_USER
name
:
credentials
-
name
:
MINIO_PASSWORD
valueFrom
:
secretKeyRef
:
key
:
MINIO_PASSWORD
name
:
credentials
volumeMounts
:
-
name
:
dags-host-volume
mountPath
:
/dags
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment