Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bcom-tp-etl-transformation-pipelines
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
general
bcom-tp-etl-transformation-pipelines
Commits
b4835849
Commit
b4835849
authored
Jan 11, 2024
by
Cristian Aguirre
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/developer_ev' into developer_ca
# Conflicts: # dags/procedure_prueba.json
parents
c4edffad
852ec9f6
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
112 additions
and
64 deletions
+112
-64
Cleaning.py
dags/components/Cleaning.py
+24
-11
DatabaseTransformation.py
dags/components/DatabaseOperation/DatabaseTransformation.py
+52
-19
Oracle.py
dags/components/Databases/Oracle.py
+7
-5
dag_conf.yml
dags/dag_conf.yml
+21
-21
requirements.txt
deploy-k8/requirements.txt
+8
-8
No files found.
dags/components/Cleaning.py
View file @
b4835849
...
...
@@ -18,13 +18,16 @@ import logging
logger
=
logging
.
getLogger
()
def
validate_clean
(
control_params
:
Dict
[
str
,
Any
],
provider
:
str
,
timezone
:
str
,
engine
,
**
kwargs
)
->
None
:
def
validate_clean
(
control_params
:
Dict
[
str
,
Any
],
provider
:
str
,
timezone
:
str
,
intern_conn
,
**
kwargs
)
->
None
:
engine
=
intern_conn
.
engine
delete_task_instances
()
ti
=
kwargs
[
"ti"
]
#created_Tables = ti.xcom_pull(task_ids="VALIDATE_GENERATOR", key="CREATED_TABLES")
procedures
=
ti
.
xcom_pull
(
task_ids
=
"MASTER_TRANSFORMATION"
,
key
=
"PROC_CREATED"
)
if
procedures
:
for
procedure
in
procedures
:
with
engine
.
connect
()
as
conn
:
result_p
=
bool
(
intern_conn
.
check_procedure
(
procedure
,
conn
))
if
result_p
:
logger
.
info
(
f
"Borrando procedures {procedure}"
)
delete
=
delete_procedure
(
procedure
,
engine
)
if
delete
:
...
...
@@ -60,12 +63,22 @@ def validate_clean(control_params: Dict[str, Any], provider: str, timezone: str,
def
clean
(
command
:
str
,
intern_conn
):
engine
=
intern_conn
.
engine
#tablename = select_multiple(command)["tablename"]
tablename
=
command
logger
.
info
(
f
"Borrando tabla {tablename}"
)
delete
=
delete_table
(
tablename
,
engine
)
if
delete
:
# with engine.connect() as conn:
# result_t = bool(intern_conn.check_table(tablename, conn))
# result_p = bool(intern_conn.check_procedure(tablename, conn))
# if result_t:
# logger.info(f"Borrando tabla {tablename}")
# delete_t = delete_table(tablename, engine)
# elif result_p:
# logger.info(f"Borrando procedure {tablename}")
# delete_p = delete_procedure(tablename, engine)
delete_p
=
delete_procedure
(
tablename
,
engine
)
delete_t
=
delete_table
(
tablename
,
engine
)
if
delete_t
:
logger
.
info
(
f
"Borrado correctamente la tabla {tablename}"
)
elif
delete_p
:
logger
.
info
(
f
"Borrado correctamente el procedure {tablename}"
)
logger
.
info
(
f
"Borrado todas las variables xcom"
)
...
...
@@ -110,7 +123,7 @@ def get_cleaning_task_group(db_intern_conn, control_s3: Dict[str, Any], provider
validate_task
=
PythonOperator
(
task_id
=
"VALIDATE_CLEANER"
,
python_callable
=
validate_clean
,
op_kwargs
=
{
'control_params'
:
control_s3
,
'provider'
:
provider
,
'timezone'
:
timezone
,
'
engine'
:
db_intern_conn
.
engine
},
op_kwargs
=
{
'control_params'
:
control_s3
,
'provider'
:
provider
,
'timezone'
:
timezone
,
'
intern_conn'
:
db_intern_conn
},
trigger_rule
=
'none_skipped'
)
cleaners
>>
tasks
>>
validate_task
...
...
dags/components/DatabaseOperation/DatabaseTransformation.py
View file @
b4835849
import
logging
import
time
from
typing
import
List
import
re
logger
=
logging
.
getLogger
()
...
...
@@ -16,17 +16,33 @@ def execute_transformations(commands: List[str], engine):
def
delete_table
(
tablename
:
str
,
engine
)
->
bool
:
base_Datos
=
engine
.
dialect
.
name
delete
=
False
try
:
command
=
f
'DROP TABLE IF EXISTS {tablename}'
if
base_Datos
==
"oracle"
:
command
=
f
'DROP TABLE {tablename}'
start_time
=
time
.
time
()
with
engine
.
connect
()
as
conn
:
try
:
_
=
conn
.
execute
(
command
)
check_query
=
f
"SELECT table_name FROM all_tables WHERE table_name = '{tablename.upper()}'"
result
=
conn
.
execute
(
check_query
)
exists
=
bool
(
result
.
fetchone
())
except
Exception
as
e
:
logger
.
error
(
f
"Tabla no encontrada. {e}"
)
if
exists
:
_
=
conn
.
execute
(
command
)
delete
=
True
logger
.
debug
(
f
"Duración de borrado: {time.time() - start_time}"
)
elif
base_Datos
==
"mysql"
:
command
=
f
'DROP TABLE IF EXISTS {tablename}'
start_time
=
time
.
time
()
with
engine
.
connect
()
as
conn
:
try
:
_
=
conn
.
execute
(
command
)
delete
=
True
logger
.
debug
(
f
"Duración de borrado: {time.time() - start_time}"
)
except
Exception
as
e
:
logger
.
error
(
f
"Tabla no encontrada. {e}"
)
except
Exception
as
e
:
logger
.
error
(
f
"Error borrando tabla {tablename}. {e}"
)
finally
:
...
...
@@ -34,17 +50,34 @@ def delete_table(tablename: str, engine) -> bool:
def
delete_procedure
(
procedure
:
str
,
engine
)
->
bool
:
base_Datos
=
engine
.
dialect
.
name
delete
=
False
try
:
command
=
f
"DROP PROCEDURE IF EXISTS {procedure}"
if
base_Datos
==
"oracle"
:
command
=
f
"DROP PROCEDURE {procedure}"
start_time
=
time
.
time
()
with
engine
.
connect
()
as
conn
:
try
:
_
=
conn
.
execute
(
command
)
proc
=
re
.
match
(
r'([^(]+)'
,
procedure
)
.
group
(
1
)
check_query
=
f
"SELECT object_name FROM all_procedures WHERE object_name = '{proc.upper()}'"
result
=
conn
.
execute
(
check_query
)
exists
=
bool
(
result
.
fetchone
())
except
Exception
as
e
:
logger
.
error
(
f
"Procedure no encontrado. {e}"
)
if
exists
:
_
=
conn
.
execute
(
command
)
delete
=
True
logger
.
debug
(
f
"Duración de borrado: {time.time() - start_time}"
)
elif
base_Datos
==
"mysql"
:
command
=
f
'DROP PROCEDURE IF EXISTS {procedure}'
start_time
=
time
.
time
()
with
engine
.
connect
()
as
conn
:
try
:
_
=
conn
.
execute
(
command
)
delete
=
True
logger
.
debug
(
f
"Duración de borrado: {time.time() - start_time}"
)
except
Exception
as
e
:
logger
.
error
(
f
"Procedure no encontrada. {e}"
)
except
Exception
as
e
:
logger
.
error
(
f
"Error borrando procedure {procedure}. {e}"
)
finally
:
...
...
dags/components/Databases/Oracle.py
View file @
b4835849
...
...
@@ -2,7 +2,7 @@ from typing import List, Tuple
from
sqlalchemy
import
create_engine
import
oracledb
import
re
from
enums.DatabaseDialectEnum
import
DatabaseDialectEnum
from
enums.DatabaseTypeEnum
import
DatabaseTypeEnum
from
components.Databases.Enums.OracleDataTypeEnum
import
OracleDataTypeEnum
...
...
@@ -90,8 +90,9 @@ class Oracle:
def
generate_sql_procedure
(
self
,
command
:
str
,
reserved_word
:
str
=
"execute"
)
->
str
:
response
=
""
try
:
logger
.
info
(
"COMANDO"
,
command
)
command
=
command
.
replace
(
reserved_word
,
""
)
.
replace
(
";"
,
""
)
response
=
f
"
begin {command}; end
;"
response
=
f
"
BEGIN {command}; END
;"
logger
.
debug
(
"COMANDO ORACLE:"
,
response
)
except
Exception
as
e
:
logger
.
error
(
f
"Error generando comando sql para procedure Oracle. Comando: {command}. {e}"
)
...
...
@@ -113,7 +114,8 @@ class Oracle:
def
check_procedure
(
self
,
procedure_name
,
connection
)
->
bool
:
exists
=
False
try
:
check_query
=
f
"SELECT text FROM all_source WHERE name = '{procedure_name}'"
procedure_name
=
re
.
match
(
r'([^(]+)'
,
procedure_name
)
.
group
(
1
)
check_query
=
f
"SELECT object_name FROM all_procedures WHERE object_name = '{procedure_name.upper()}'"
result
=
connection
.
execute
(
check_query
)
exists
=
bool
(
result
.
fetchone
())
except
Exception
as
e
:
...
...
@@ -124,7 +126,7 @@ class Oracle:
def
check_table
(
self
,
table_name
,
connection
)
->
bool
:
exists
=
False
try
:
check_query
=
f
"
DESCRIBE {table_name}
"
check_query
=
f
"
SELECT table_name FROM all_tables WHERE table_name = '{table_name.upper()}'
"
result
=
connection
.
execute
(
check_query
)
exists
=
bool
(
result
.
fetchone
())
except
Exception
as
e
:
...
...
@@ -135,7 +137,7 @@ class Oracle:
def
verify_table
(
self
,
table_name
,
connection
)
->
bool
:
exists
=
False
try
:
check_query
=
f
"SELECT COUNT(*) FROM {table_name}"
check_query
=
f
"SELECT COUNT(*) FROM {table_name
.upper()
}"
result
=
connection
.
execute
(
check_query
)
if
result
>
0
:
exists
=
True
...
...
dags/dag_conf.yml
View file @
b4835849
...
...
@@ -5,23 +5,23 @@ app:
database
:
sources
:
source1
:
type
:
mysql
host
:
192.168.
1.13
port
:
13306
username
:
root
password
:
root
database
:
prueba
service
:
schema
:
sources
type
:
oracle
host
:
192.168.
27.22
port
:
21521
username
:
PRUEBABCOM2
password
:
admin
database
:
service
:
ORCLPDB1
schema
:
transformation
:
type
:
mysql
host
:
192.168.
1.13
port
:
13306
username
:
root
password
:
root
database
:
prueba_ca
service
:
schema
:
intern_db
type
:
oracle
host
:
192.168.
27.22
port
:
21521
username
:
RLQA_AIR
password
:
RLQA_AIR99
database
:
service
:
ORCLPDB1
schema
:
chunksize
:
4000
label_multiple_select
:
TABLENAME
label_transform_procedure
:
STORE
...
...
@@ -55,14 +55,14 @@ app:
delimiter
:
'
|'
tmp_path
:
/tmp
s3_params
:
tabla
4
:
bucket
:
prueba
irflow
tabla
1
:
bucket
:
prueba
-id
prefix
:
bcom_results
connection_id
:
prueba_af
connection_id
:
conn_script
tabla5
:
bucket
:
prueba
irflow
bucket
:
prueba
-id
prefix
:
bcom_results
connection_id
:
prueba_af
connection_id
:
conn_script
report
:
s3_params
:
bucket
:
prueba-id
...
...
deploy-k8/requirements.txt
View file @
b4835849
openpyxl==3.1.2
XlsxWriter==3.1.2
pymysql==1.1.0
oracledb==1.3.2
apache-airflow-providers-google
apache-airflow-providers-amazon
apache-airflow-providers-postgres
apache-airflow-providers-oracle
\ No newline at end of file
pip install openpyxl==3.1.2
pip install XlsxWriter==3.1.2
pip install pymysql==1.1.0
pip install oracledb==1.3.2
pip install apache-airflow-providers-google
pip install apache-airflow-providers-amazon
pip install apache-airflow-providers-postgres
pip install apache-airflow-providers-oracle
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment