Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bcom-tp-etl-transformation-pipelines
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
general
bcom-tp-etl-transformation-pipelines
Commits
05e1b5a3
Commit
05e1b5a3
authored
Oct 06, 2023
by
Erly Villaroel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Definicion de procedure y conexiones
parent
2c9764fa
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
110 additions
and
47 deletions
+110
-47
Extractor.py
dags/components/Extractor.py
+4
-0
Generation.py
dags/components/Generation.py
+12
-1
Transformation.py
dags/components/Transformation.py
+27
-4
Utils.py
dags/components/Utils.py
+5
-1
Xcom.py
dags/components/Xcom.py
+3
-0
dag_conf.yml
dags/dag_conf.yml
+31
-19
dag_transformacion_bcom.py
dags/dag_transformacion_bcom.py
+7
-3
procedure_prueba.json
dags/procedure_prueba.json
+21
-19
No files found.
dags/components/Extractor.py
View file @
05e1b5a3
...
@@ -66,8 +66,12 @@ def on_failure_extractor(context) -> None:
...
@@ -66,8 +66,12 @@ def on_failure_extractor(context) -> None:
ti
=
context
[
"ti"
]
ti
=
context
[
"ti"
]
task_name
=
f
"{ti.task_id}_{ti.map_index}"
task_name
=
f
"{ti.task_id}_{ti.map_index}"
selects
=
Variable
.
get
(
'SELECTS'
,
default_var
=
[],
deserialize_json
=
True
)
selects
=
Variable
.
get
(
'SELECTS'
,
default_var
=
[],
deserialize_json
=
True
)
logger
.
info
(
f
"TASK_NAME_EXTRACTOR: {task_name}"
)
logger
.
info
(
f
"SELECTS_EXTRACTOR: {selects}"
)
command
=
selects
[
ti
.
map_index
]
command
=
selects
[
ti
.
map_index
]
logger
.
info
(
f
"COMAND_EXTRACTOR: {command}"
)
tablename
=
select_multiple
(
command
[
1
])[
"tablename"
]
tablename
=
select_multiple
(
command
[
1
])[
"tablename"
]
logger
.
info
(
f
"TABLENAME_EXTRACTOR: {tablename}"
)
exception
=
str
(
context
[
"exception"
])
exception
=
str
(
context
[
"exception"
])
status
=
ProcessStatusEnum
.
FAIL
.
value
status
=
ProcessStatusEnum
.
FAIL
.
value
init_process
=
ti
.
xcom_pull
(
task_ids
=
"EXTRACTORS"
,
key
=
"INIT_PROCESS_DATETIME_"
+
str
(
ti
.
map_index
))[
0
]
init_process
=
ti
.
xcom_pull
(
task_ids
=
"EXTRACTORS"
,
key
=
"INIT_PROCESS_DATETIME_"
+
str
(
ti
.
map_index
))[
0
]
...
...
dags/components/Generation.py
View file @
05e1b5a3
...
@@ -78,6 +78,9 @@ def on_success_generator(context) -> None:
...
@@ -78,6 +78,9 @@ def on_success_generator(context) -> None:
ti
=
context
[
"ti"
]
ti
=
context
[
"ti"
]
task_name
=
f
"{ti.task_id}_{ti.map_index}"
task_name
=
f
"{ti.task_id}_{ti.map_index}"
selects
=
Variable
.
get
(
'GENERATES'
,
default_var
=
[],
deserialize_json
=
True
)
selects
=
Variable
.
get
(
'GENERATES'
,
default_var
=
[],
deserialize_json
=
True
)
logger
.
info
(
f
"TASK_NAME: {task_name}"
)
logger
.
info
(
f
"SELECTS: {selects}"
)
logger
.
info
(
f
"TI_MAP_INDEX: {ti.map_index}"
)
table
=
selects
[
ti
.
map_index
]
table
=
selects
[
ti
.
map_index
]
table
=
select_multiple
(
table
)[
"tablename"
]
table
=
select_multiple
(
table
)[
"tablename"
]
status
=
ProcessStatusEnum
.
SUCCESS
.
value
status
=
ProcessStatusEnum
.
SUCCESS
.
value
...
@@ -142,7 +145,7 @@ def generate_and_deploy(command: str, intern_conn, params: Dict[str, Any], timez
...
@@ -142,7 +145,7 @@ def generate_and_deploy(command: str, intern_conn, params: Dict[str, Any], timez
except
StopIteration
:
except
StopIteration
:
break
break
conn_id
=
params
[
"s3_params"
][
"connection_id"
]
list_outputs
=
params
[
"s3_params"
]
list_outputs
=
params
[
"s3_params"
]
size
=
os
.
path
.
getsize
(
tmp_file
)
size
=
os
.
path
.
getsize
(
tmp_file
)
for
output
in
list_outputs
:
for
output
in
list_outputs
:
...
@@ -150,6 +153,7 @@ def generate_and_deploy(command: str, intern_conn, params: Dict[str, Any], timez
...
@@ -150,6 +153,7 @@ def generate_and_deploy(command: str, intern_conn, params: Dict[str, Any], timez
continue
continue
bucket
=
list_outputs
[
output
][
"bucket"
]
bucket
=
list_outputs
[
output
][
"bucket"
]
prefix
=
list_outputs
[
output
][
"prefix"
]
prefix
=
list_outputs
[
output
][
"prefix"
]
conn_id
=
list_outputs
[
output
][
"connection_id"
]
if
not
prefix
.
endswith
(
"/"
):
if
not
prefix
.
endswith
(
"/"
):
prefix
+=
"/"
prefix
+=
"/"
file_key
=
prefix
+
tmp_file
[
tmp_file
.
rfind
(
"/"
)
+
1
:]
file_key
=
prefix
+
tmp_file
[
tmp_file
.
rfind
(
"/"
)
+
1
:]
...
@@ -170,7 +174,9 @@ def get_generate_from_xcom(**kwargs):
...
@@ -170,7 +174,9 @@ def get_generate_from_xcom(**kwargs):
task
=
kwargs
[
'ti'
]
task
=
kwargs
[
'ti'
]
final_outputs
=
[]
final_outputs
=
[]
conf
=
task
.
xcom_pull
(
task_ids
=
"VALIDATE_TRANSFORMATION"
,
key
=
"CONTROL-CONFIG"
)
conf
=
task
.
xcom_pull
(
task_ids
=
"VALIDATE_TRANSFORMATION"
,
key
=
"CONTROL-CONFIG"
)
logger
.
info
(
f
"CONF_GENERATION: {conf}"
)
tasks
=
get_tasks_from_control
(
conf
,
"generator"
)
tasks
=
get_tasks_from_control
(
conf
,
"generator"
)
logger
.
info
(
f
"TASKS_GENERATION: {tasks}"
)
tasks_with_save
=
[]
tasks_with_save
=
[]
definition
=
task
.
xcom_pull
(
task_ids
=
"SCRIPTS-EXTRACTOR"
,
key
=
"EXTRACTION-DEFINITION-JSON"
)
definition
=
task
.
xcom_pull
(
task_ids
=
"SCRIPTS-EXTRACTOR"
,
key
=
"EXTRACTION-DEFINITION-JSON"
)
...
@@ -184,12 +190,17 @@ def get_generate_from_xcom(**kwargs):
...
@@ -184,12 +190,17 @@ def get_generate_from_xcom(**kwargs):
xcom_keys
=
task
.
xcom_pull
(
task_ids
=
"SCRIPTS-EXTRACTOR"
,
key
=
"XCOM-EXTRACTION-NAMES"
)
xcom_keys
=
task
.
xcom_pull
(
task_ids
=
"SCRIPTS-EXTRACTOR"
,
key
=
"XCOM-EXTRACTION-NAMES"
)
logger
.
debug
(
xcom_keys
)
logger
.
debug
(
xcom_keys
)
for
key
in
xcom_keys
:
for
key
in
xcom_keys
:
logger
.
info
(
f
"KEY: {key}"
)
if
not
key
.
startswith
(
OperationTypeEnum
.
SELECT
.
value
)
and
not
key
.
startswith
(
OperationTypeEnum
.
PROCEDURE
.
value
):
if
not
key
.
startswith
(
OperationTypeEnum
.
SELECT
.
value
)
and
not
key
.
startswith
(
OperationTypeEnum
.
PROCEDURE
.
value
):
continue
continue
xcom_outputs
=
task
.
xcom_pull
(
task_ids
=
"SCRIPTS-EXTRACTOR"
,
key
=
key
)
xcom_outputs
=
task
.
xcom_pull
(
task_ids
=
"SCRIPTS-EXTRACTOR"
,
key
=
key
)
logger
.
info
(
f
"Trayendo tablas {xcom_outputs}"
)
logger
.
info
(
f
"Trayendo tablas {xcom_outputs}"
)
for
select
in
xcom_outputs
:
for
select
in
xcom_outputs
:
logger
.
info
(
f
"SELECT: {select}"
)
tablename
=
select_multiple
(
select
)[
"tablename"
]
tablename
=
select_multiple
(
select
)[
"tablename"
]
logger
.
info
(
f
"TABLENAME {tablename}"
)
logger
.
info
(
f
"TASKS: {tasks}"
)
logger
.
info
(
f
"TASKS_WITH_SAVE: {tasks_with_save}"
)
if
(
tasks
[
"reset"
]
or
tasks
[
"status"
]
==
ProcessStatusEnum
.
SUCCESS
.
value
or
select
not
in
success_tasks
)
and
tablename
in
tasks_with_save
:
if
(
tasks
[
"reset"
]
or
tasks
[
"status"
]
==
ProcessStatusEnum
.
SUCCESS
.
value
or
select
not
in
success_tasks
)
and
tablename
in
tasks_with_save
:
final_outputs
.
append
(
select
)
final_outputs
.
append
(
select
)
logger
.
info
(
f
"Final outputs: {final_outputs}"
)
logger
.
info
(
f
"Final outputs: {final_outputs}"
)
...
...
dags/components/Transformation.py
View file @
05e1b5a3
...
@@ -5,7 +5,9 @@ from airflow.operators.python import PythonOperator
...
@@ -5,7 +5,9 @@ from airflow.operators.python import PythonOperator
from
airflow.models
import
Variable
from
airflow.models
import
Variable
from
airflow.decorators
import
task
from
airflow.decorators
import
task
from
airflow.exceptions
import
AirflowSkipException
from
airflow.exceptions
import
AirflowSkipException
from
components.S3Route
import
get_files_from_prefix
,
get_file_from_prefix
from
components.Xcom
import
save_commands_to_xcom
from
components.Utils
import
update_sql_commands_2
from
components.Control
import
get_tasks_from_control
,
update_new_process
from
components.Control
import
get_tasks_from_control
,
update_new_process
from
components.S3Route
import
load_control_to_s3
from
components.S3Route
import
load_control_to_s3
from
components.Xcom
import
delete_all_xcom_tasks
,
delete_task_instances
from
components.Xcom
import
delete_all_xcom_tasks
,
delete_task_instances
...
@@ -91,8 +93,12 @@ def transformations(xcom_commands: str, intern_conn, timezone: str, **kwargs):
...
@@ -91,8 +93,12 @@ def transformations(xcom_commands: str, intern_conn, timezone: str, **kwargs):
script_name
=
xcom_commands
[
0
]
script_name
=
xcom_commands
[
0
]
commands
=
xcom_commands
[
1
]
commands
=
xcom_commands
[
1
]
logger
.
info
(
f
"Ejecutando transformaciones del script {script_name}"
)
logger
.
info
(
f
"Ejecutando transformaciones del script {script_name}"
)
not_procedure
=
[
"UPDATE"
,
"SELECT"
,
"CREATE"
,
"ALTER"
,
"DROP"
,
"DELETE"
,
"INSERT"
,
"GRANT"
,
"REVOKE"
,
"TRUNCATE"
,
"COPY"
,
"COMMIT"
,
"ROLLBACK"
,
"USE"
]
with
engine
.
connect
()
as
connection
:
with
engine
.
connect
()
as
connection
:
for
command
in
commands
:
for
command
in
commands
:
if
any
(
command
.
startswith
(
palabra
)
or
command
.
startswith
(
palabra
.
lower
())
for
palabra
in
not_procedure
):
print
(
2
)
logger
.
info
(
f
"Ejecutando comando de transformación: {command}"
)
logger
.
info
(
f
"Ejecutando comando de transformación: {command}"
)
_
=
connection
.
execute
(
command
)
_
=
connection
.
execute
(
command
)
end_process_datetime
=
datetime_by_tzone
(
timezone
)
.
strftime
(
'
%
d/
%
m/
%
Y
%
H:
%
M:
%
S'
)
end_process_datetime
=
datetime_by_tzone
(
timezone
)
.
strftime
(
'
%
d/
%
m/
%
Y
%
H:
%
M:
%
S'
)
...
@@ -100,8 +106,23 @@ def transformations(xcom_commands: str, intern_conn, timezone: str, **kwargs):
...
@@ -100,8 +106,23 @@ def transformations(xcom_commands: str, intern_conn, timezone: str, **kwargs):
@
task
(
task_id
=
"MASTER_TRANSFORMATION"
,
trigger_rule
=
'none_skipped'
)
@
task
(
task_id
=
"MASTER_TRANSFORMATION"
,
trigger_rule
=
'none_skipped'
)
def
get_trans_from_xcom
(
**
kwargs
):
def
get_trans_from_xcom
(
provider
,
store_procedure
,
procedure_mask
,
label_tablename
,
transform_mask
,
order_delimiter
,
db_intern_conn
,
**
kwargs
):
task
=
kwargs
[
'ti'
]
task
=
kwargs
[
'ti'
]
#CAMBIOS PARA TRAER LOS SP
engine
=
db_intern_conn
.
engine
conn_id
=
store_procedure
[
"s3_params"
][
"connection_id"
]
bucket
=
store_procedure
[
"s3_params"
][
"bucket"
]
prefix
=
store_procedure
[
"s3_params"
][
"prefix"
]
procedures
=
get_files_from_prefix
(
conn_id
,
bucket
,
prefix
,
provider
)
with
engine
.
connect
()
as
connection
:
for
procedure
in
procedures
:
procedure
=
procedure
[
1
]
logger
.
info
(
f
"Ejecutando creacion de procedure: {procedure}"
)
_
=
connection
.
execute
(
procedure
)
save_commands_to_xcom
(
procedures
,
kwargs
[
'ti'
],
procedure_mask
,
transform_mask
,
procedure_mask
,
order_delimiter
)
logger
.
debug
(
f
"Procedures cargados en Xcom: {procedures}"
)
transforms_per_file
=
[]
transforms_per_file
=
[]
conf
=
task
.
xcom_pull
(
task_ids
=
"VALIDATE_EXTRACTION"
,
key
=
"CONTROL-CONFIG"
)
conf
=
task
.
xcom_pull
(
task_ids
=
"VALIDATE_EXTRACTION"
,
key
=
"CONTROL-CONFIG"
)
tasks
=
get_tasks_from_control
(
conf
,
"transformation"
)
tasks
=
get_tasks_from_control
(
conf
,
"transformation"
)
...
@@ -129,11 +150,13 @@ def get_trans_from_xcom(**kwargs):
...
@@ -129,11 +150,13 @@ def get_trans_from_xcom(**kwargs):
def
get_transform_task_group
(
db_intern_conn
,
timezone
:
str
,
control_s3
:
Dict
[
str
,
Any
],
def
get_transform_task_group
(
db_intern_conn
,
timezone
:
str
,
control_s3
:
Dict
[
str
,
Any
],
provider
:
str
)
->
TaskGroup
or
None
:
provider
:
str
,
store_procedure
:
Dict
,
procedure_mask
:
str
,
label_tablename
:
str
,
transform_mask
:
str
,
order_delimiter
:
str
)
->
TaskGroup
or
None
:
group
=
None
group
=
None
try
:
try
:
with
TaskGroup
(
group_id
=
"TransformacionDeDatos"
,
prefix_group_id
=
False
)
as
group
:
with
TaskGroup
(
group_id
=
"TransformacionDeDatos"
,
prefix_group_id
=
False
)
as
group
:
transforms
=
get_trans_from_xcom
()
transforms
=
get_trans_from_xcom
(
provider
,
store_procedure
,
procedure_mask
,
label_tablename
,
transform_mask
,
order_delimiter
,
db_intern_conn
)
tasks
=
PythonOperator
.
partial
(
tasks
=
PythonOperator
.
partial
(
task_id
=
"TRANSFORMATIONS"
,
task_id
=
"TRANSFORMATIONS"
,
...
...
dags/components/Utils.py
View file @
05e1b5a3
...
@@ -97,10 +97,14 @@ def update_sql_commands_2(dataset: List[Tuple[str, str]], label_tablename: str,
...
@@ -97,10 +97,14 @@ def update_sql_commands_2(dataset: List[Tuple[str, str]], label_tablename: str,
extraction_mask
:
str
)
->
List
[
Tuple
[
str
,
List
[
str
]]]:
extraction_mask
:
str
)
->
List
[
Tuple
[
str
,
List
[
str
]]]:
result
=
[]
result
=
[]
comments
=
[
CommentsScriptEnum
[
item
]
.
value
for
item
in
CommentsScriptEnum
.
_member_names_
if
item
!=
CommentsScriptEnum
.
EXTENDED
.
name
]
comments
=
[
CommentsScriptEnum
[
item
]
.
value
for
item
in
CommentsScriptEnum
.
_member_names_
if
item
!=
CommentsScriptEnum
.
EXTENDED
.
name
]
logger
.
info
(
f
"COMENTS: {comments}"
)
try
:
try
:
for
row
in
dataset
:
for
row
in
dataset
:
logger
.
info
(
f
"DATASET: {dataset}"
)
data
=
row
[
1
]
.
split
(
"
\n
"
)
data
=
row
[
1
]
.
split
(
"
\n
"
)
logger
.
info
(
f
"data inicial: {data}"
)
data
=
[
item
.
replace
(
"
\r
"
,
""
)
for
item
in
data
if
item
.
strip
()
!=
''
]
data
=
[
item
.
replace
(
"
\r
"
,
""
)
for
item
in
data
if
item
.
strip
()
!=
''
]
logger
.
info
(
f
"data final: {data}"
)
final_data
=
[]
final_data
=
[]
start_sentence
=
True
start_sentence
=
True
add_next
=
False
add_next
=
False
...
@@ -108,6 +112,7 @@ def update_sql_commands_2(dataset: List[Tuple[str, str]], label_tablename: str,
...
@@ -108,6 +112,7 @@ def update_sql_commands_2(dataset: List[Tuple[str, str]], label_tablename: str,
tablename
=
""
tablename
=
""
extend_comment
=
False
extend_comment
=
False
for
item
in
data
:
for
item
in
data
:
logger
.
info
(
f
"Item in data: {item}"
)
if
not
extend_comment
and
item
.
strip
()
.
startswith
(
CommentsScriptEnum
.
EXTENDED
.
value
):
if
not
extend_comment
and
item
.
strip
()
.
startswith
(
CommentsScriptEnum
.
EXTENDED
.
value
):
extend_comment
=
True
extend_comment
=
True
continue
continue
...
@@ -286,7 +291,6 @@ def generateModel(tablename: str, attributes: List[Dict[str, Any]], indexes: Lis
...
@@ -286,7 +291,6 @@ def generateModel(tablename: str, attributes: List[Dict[str, Any]], indexes: Lis
model
=
type
(
modelName
,
(
InsumoModel
,),
model_args
)
model
=
type
(
modelName
,
(
InsumoModel
,),
model_args
)
try
:
try
:
for
attribute
in
attributes
:
for
attribute
in
attributes
:
logger
.
debug
(
f
"attribute: {attribute}"
)
if
attribute
[
"datatype"
]
==
DataTypeEnum
.
TEXT
.
name
and
"maxLength"
in
attribute
.
keys
():
if
attribute
[
"datatype"
]
==
DataTypeEnum
.
TEXT
.
name
and
"maxLength"
in
attribute
.
keys
():
setattr
(
model
,
attribute
[
"name"
],
setattr
(
model
,
attribute
[
"name"
],
Column
(
DataTypeOrmEnum
[
attribute
[
"datatype"
]]
.
value
(
attribute
[
"maxLength"
])))
Column
(
DataTypeOrmEnum
[
attribute
[
"datatype"
]]
.
value
(
attribute
[
"maxLength"
])))
...
...
dags/components/Xcom.py
View file @
05e1b5a3
...
@@ -14,11 +14,14 @@ def save_commands_to_xcom(dataset: List[Tuple[str, List[str]]], task, extract_ma
...
@@ -14,11 +14,14 @@ def save_commands_to_xcom(dataset: List[Tuple[str, List[str]]], task, extract_ma
final_names_xcom
=
[]
final_names_xcom
=
[]
try
:
try
:
for
data
in
dataset
:
for
data
in
dataset
:
logger
.
info
(
f
"DATASET: {dataset}"
)
logger
.
info
(
f
"Guardando Xcom en llave {data[0]}"
)
logger
.
info
(
f
"Guardando Xcom en llave {data[0]}"
)
name
=
data
[
0
]
name
=
data
[
0
]
base_name
=
name
base_name
=
name
if
order_delimiter
==
"."
:
if
order_delimiter
==
"."
:
base_name
=
base_name
[:
base_name
.
rfind
(
"."
)]
base_name
=
base_name
[:
base_name
.
rfind
(
"."
)]
logger
.
info
(
f
"BASE_NAME: {base_name}"
)
logger
.
info
(
f
"ORDER DELIMITER: {order_delimiter}"
)
order
=
base_name
.
split
(
order_delimiter
)
order
=
base_name
.
split
(
order_delimiter
)
if
len
(
order
)
<
2
:
if
len
(
order
)
<
2
:
raise
AssertionError
(
f
"Script {name} no tiene prefijo de orden. Validar nombre de script"
)
raise
AssertionError
(
f
"Script {name} no tiene prefijo de orden. Validar nombre de script"
)
...
...
dags/dag_conf.yml
View file @
05e1b5a3
...
@@ -6,24 +6,25 @@ app:
...
@@ -6,24 +6,25 @@ app:
sources
:
sources
:
source1
:
source1
:
type
:
mysql
type
:
mysql
host
:
192.168.
21.52
host
:
192.168.
1.13
port
:
13306
port
:
13306
username
:
root
username
:
root
password
:
root
1234
password
:
root
database
:
bcom_tp_res_bk
database
:
prueba
service
:
service
:
schema
:
sources
schema
:
sources
transformation
:
transformation
:
type
:
mysql
type
:
mysql
host
:
192.168.1.
4
host
:
192.168.1.
13
port
:
13306
port
:
13306
username
:
root
username
:
root
password
:
root
password
:
root
database
:
prueba_
bcom2
database
:
prueba_
ca
service
:
service
:
schema
:
intern_db
schema
:
intern_db
chunksize
:
4000
chunksize
:
4000
label_multiple_select
:
TABLENAME
label_multiple_select
:
TABLENAME
label_transform_procedure
:
STORE
source_mask
:
select
# Sufijo (S)
source_mask
:
select
# Sufijo (S)
procedure_mask
:
procedure
# S
procedure_mask
:
procedure
# S
transformation_mask
:
transform
# S
transformation_mask
:
transform
# S
...
@@ -31,14 +32,20 @@ app:
...
@@ -31,14 +32,20 @@ app:
cloud_provider
:
local
cloud_provider
:
local
scripts
:
scripts
:
s3_params
:
s3_params
:
bucket
:
prueba
1234568
bucket
:
prueba
-id
prefix
:
bcom_scripts
prefix
:
prueba_bcom/
bcom_scripts
connection_id
:
conn_script
connection_id
:
conn_script
store_procedures
:
s3_params
:
bucket
:
prueba-id
prefix
:
prueba_bcom/bcom_store_procedures
connection_id
:
conn_script
control
:
control
:
s3_params
:
s3_params
:
connection_id
:
conn_script
connection_id
:
conn_script
bucket
:
prueba
1234568
bucket
:
prueba
-id
prefix
:
bcom_control
prefix
:
prueba_bcom/
bcom_control
filename
:
control_<period>.json
filename
:
control_<period>.json
timezone
:
'
GMT-5'
timezone
:
'
GMT-5'
outputs
:
outputs
:
...
@@ -48,21 +55,26 @@ app:
...
@@ -48,21 +55,26 @@ app:
delimiter
:
'
|'
delimiter
:
'
|'
tmp_path
:
/tmp
tmp_path
:
/tmp
s3_params
:
s3_params
:
TACOMVENTAS
:
ESTUDIANTES_1
:
bucket
:
prueba1234568
bucket
:
pruebairflow
prefix
:
bcom_results
prefix
:
bcom_results
RANGO_VENTAS_CON_PROMOCION
:
connection_id
:
prueba_af
bucket
:
prueba-id
ESTUDIANTES_11
:
prefix
:
prueba_bcom/bcom_results
bucket
:
pruebairflow
connection_id
:
conn_script
prefix
:
bcom_results
connection_id
:
prueba_af
CATALOGO_PROMOCIONES
:
bucket
:
pruebairflow
prefix
:
bcom_results
connection_id
:
prueba_af
report
:
report
:
s3_params
:
s3_params
:
bucket
:
prueba
1234568
bucket
:
prueba
-id
prefix
:
bcom_report
prefix
:
prueba_bcom/
bcom_report
connection_id
:
conn_script
connection_id
:
conn_script
filename
:
report_<datetime>.xlsx
filename
:
report_<datetime>.xlsx
datetime_pattern
:
'
%Y-%m-%d
%H:%M:%S'
datetime_pattern
:
'
%Y-%m-%d
%H:%M:%S'
procedure
:
procedure
:
filepath
:
"
/opt/airflow/dags/procedure_
definition
.json"
filepath
:
"
/opt/airflow/dags/procedure_
prueba
.json"
dags/dag_transformacion_bcom.py
View file @
05e1b5a3
...
@@ -59,10 +59,11 @@ def generate_and_deploy_results(intern_conn, parameters: Dict[str, Any], timezon
...
@@ -59,10 +59,11 @@ def generate_and_deploy_results(intern_conn, parameters: Dict[str, Any], timezon
return
groups
return
groups
def
transformation
(
intern_conn
,
timezone
:
str
,
control_s3
:
Dict
[
str
,
Any
],
provider
:
str
)
->
TaskGroup
:
def
transformation
(
intern_conn
,
timezone
:
str
,
control_s3
:
Dict
[
str
,
Any
],
provider
:
str
,
store_procedure
:
Dict
,
procedure_mask
:
str
,
label_tablename
:
str
,
transform_mask
:
str
,
order_delimiter
:
str
)
->
TaskGroup
:
groups
=
None
groups
=
None
try
:
try
:
groups
=
get_transform_task_group
(
intern_conn
,
timezone
,
control_s3
,
provider
)
groups
=
get_transform_task_group
(
intern_conn
,
timezone
,
control_s3
,
provider
,
store_procedure
,
procedure_mask
,
label_tablename
,
transform_mask
,
order_delimiter
)
except
Exception
as
e
:
except
Exception
as
e
:
logger
.
error
(
f
"Error general de transformación de datos. {e}"
)
logger
.
error
(
f
"Error general de transformación de datos. {e}"
)
finally
:
finally
:
...
@@ -190,10 +191,13 @@ def set_dag():
...
@@ -190,10 +191,13 @@ def set_dag():
extractions
=
extraction
(
source_db
,
intern_db
,
timezone
,
control_s3
,
conf
[
"cloud_provider"
],
chunksize
)
extractions
=
extraction
(
source_db
,
intern_db
,
timezone
,
control_s3
,
conf
[
"cloud_provider"
],
chunksize
)
# Creación de grupo de tasks para las transformaciones
# Creación de grupo de tasks para las transformaciones
transformations
=
transformation
(
intern_db
,
timezone
,
control_s3
,
conf
[
"cloud_provider"
])
store_procedure
=
conf
[
"store_procedures"
]
transformations
=
transformation
(
intern_db
,
timezone
,
control_s3
,
conf
[
"cloud_provider"
],
store_procedure
,
procedure_mask
,
conf
[
"label_transform_procedure"
],
transform_mask
,
order_delimiter
)
# Creación de grupo de tasks para la generación y despliegue de archivos resultados
# Creación de grupo de tasks para la generación y despliegue de archivos resultados
outputs_conf
=
conf
[
"outputs"
]
outputs_conf
=
conf
[
"outputs"
]
result
=
generate_and_deploy_results
(
intern_db
,
outputs_conf
,
timezone
,
control_s3
,
conf
[
"cloud_provider"
])
result
=
generate_and_deploy_results
(
intern_db
,
outputs_conf
,
timezone
,
control_s3
,
conf
[
"cloud_provider"
])
# Creación de tasks de limpiadores
# Creación de tasks de limpiadores
...
...
dags/procedure_prueba.json
View file @
05e1b5a3
[
[
{
"identifier"
:
"obtenerEstudiantes"
,
"transformation_store_procedure"
:
true
},
{
{
"identifier"
:
"ESTUDIANTES_11"
,
"identifier"
:
"ESTUDIANTES_11"
,
"fields"
:
[
"fields"
:
[
...
@@ -29,7 +33,12 @@
...
@@ -29,7 +33,12 @@
}
}
],
],
"indexes"
:
[
"indexes"
:
[
"ID"
{
"name"
:
"indice1"
,
"index_fields"
:
[
"ID"
]
}
],
],
"save_output"
:
true
"save_output"
:
true
},
},
...
@@ -37,33 +46,26 @@
...
@@ -37,33 +46,26 @@
"identifier"
:
"ESTUDIANTES_1"
,
"identifier"
:
"ESTUDIANTES_1"
,
"fields"
:
[
"fields"
:
[
{
{
"name"
:
"
ID
"
,
"name"
:
"
id
"
,
"datatype"
:
"NUMBER"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
"decimal_precision"
:
0
},
},
{
{
"name"
:
"Nombre"
,
"name"
:
"fecha"
,
"datatype"
:
"TEXT"
,
"datatype"
:
"DATE"
"maxLength"
:
50
},
},
{
{
"name"
:
"Apellido"
,
"name"
:
"fecha_tiempo"
,
"datatype"
:
"TEXT"
,
"datatype"
:
"DATETIME"
"maxLength"
:
50
},
{
"name"
:
"Edad"
,
"datatype"
:
"NUMBER"
,
"decimal_precision"
:
0
},
{
"name"
:
"CorreoElectronico"
,
"datatype"
:
"TEXT"
,
"maxLength"
:
100
}
}
],
],
"indexes"
:
[
"indexes"
:
[
"ID"
{
"name"
:
"indice1"
,
"index_fields"
:
[
"id"
]
}
],
],
"save_output"
:
true
"save_output"
:
true
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment