Commit 8f6a5a31 authored by Cristian Aguirre's avatar Cristian Aguirre

Merge branch 'developer_ca' into 'developer'

Developer ca

See merge request !14
parents 8ebc05c1 122df81f
...@@ -18,17 +18,20 @@ import logging ...@@ -18,17 +18,20 @@ import logging
logger = logging.getLogger() logger = logging.getLogger()
def validate_clean(control_params: Dict[str, Any], provider: str, timezone: str, engine, **kwargs) -> None: def validate_clean(control_params: Dict[str, Any], provider: str, timezone: str, intern_conn, **kwargs) -> None:
engine = intern_conn.engine
delete_task_instances() delete_task_instances()
ti = kwargs["ti"] ti = kwargs["ti"]
#created_Tables = ti.xcom_pull(task_ids="VALIDATE_GENERATOR", key="CREATED_TABLES")
procedures = ti.xcom_pull(task_ids="MASTER_TRANSFORMATION", key="PROC_CREATED") procedures = ti.xcom_pull(task_ids="MASTER_TRANSFORMATION", key="PROC_CREATED")
if procedures: if procedures:
for procedure in procedures: for procedure in procedures:
logger.info(f"Borrando procedures {procedure}") with engine.connect() as conn:
delete = delete_procedure(procedure, engine) result_p = bool(intern_conn.check_procedure(procedure, conn))
if delete: if result_p:
logger.info(f"Borrado correctamente el procedure {procedure}") logger.info(f"Borrando procedures {procedure}")
delete = delete_procedure(procedure, intern_conn)
if delete:
logger.info(f"Borrado correctamente el procedure {procedure}")
success_tasks = ti.xcom_pull(task_ids="GENERATORS", key="SUCCESS_TASKS") success_tasks = ti.xcom_pull(task_ids="GENERATORS", key="SUCCESS_TASKS")
failed_tasks = ti.xcom_pull(task_ids="GENERATORS", key="FAILED_TASKS") failed_tasks = ti.xcom_pull(task_ids="GENERATORS", key="FAILED_TASKS")
conf = ti.xcom_pull(task_ids="VALIDATE_GENERATOR", key="CONTROL-CONFIG") conf = ti.xcom_pull(task_ids="VALIDATE_GENERATOR", key="CONTROL-CONFIG")
...@@ -38,7 +41,6 @@ def validate_clean(control_params: Dict[str, Any], provider: str, timezone: str, ...@@ -38,7 +41,6 @@ def validate_clean(control_params: Dict[str, Any], provider: str, timezone: str,
if not prefix.endswith("/"): if not prefix.endswith("/"):
prefix += "/" prefix += "/"
key = prefix + control_params["filename"] key = prefix + control_params["filename"]
#conf = json.dumps(conf, indent=2, default=str)
final_dict = {} final_dict = {}
status = ProcessStatusEnum.SUCCESS.value status = ProcessStatusEnum.SUCCESS.value
if not isinstance(success_tasks, type(None)) and len(success_tasks) > 0: if not isinstance(success_tasks, type(None)) and len(success_tasks) > 0:
...@@ -59,13 +61,13 @@ def validate_clean(control_params: Dict[str, Any], provider: str, timezone: str, ...@@ -59,13 +61,13 @@ def validate_clean(control_params: Dict[str, Any], provider: str, timezone: str,
def clean(command: str, intern_conn): def clean(command: str, intern_conn):
engine = intern_conn.engine
#tablename = select_multiple(command)["tablename"]
tablename = command tablename = command
logger.info(f"Borrando tabla {tablename}") delete_p = delete_procedure(tablename, intern_conn)
delete = delete_table(tablename, engine) delete_t = delete_table(tablename, intern_conn)
if delete: if delete_t:
logger.info(f"Borrado correctamente la tabla {tablename}") logger.info(f"Borrado correctamente la tabla {tablename}")
elif delete_p:
logger.info(f"Borrado correctamente el procedure {tablename}")
logger.info(f"Borrado todas las variables xcom") logger.info(f"Borrado todas las variables xcom")
...@@ -110,7 +112,7 @@ def get_cleaning_task_group(db_intern_conn, control_s3: Dict[str, Any], provider ...@@ -110,7 +112,7 @@ def get_cleaning_task_group(db_intern_conn, control_s3: Dict[str, Any], provider
validate_task = PythonOperator( validate_task = PythonOperator(
task_id="VALIDATE_CLEANER", task_id="VALIDATE_CLEANER",
python_callable=validate_clean, python_callable=validate_clean,
op_kwargs={'control_params': control_s3, 'provider': provider, 'timezone': timezone, 'engine':db_intern_conn.engine}, op_kwargs={'control_params': control_s3, 'provider': provider, 'timezone': timezone, 'intern_conn':db_intern_conn},
trigger_rule='none_skipped' trigger_rule='none_skipped'
) )
cleaners >> tasks >> validate_task cleaners >> tasks >> validate_task
......
...@@ -7,29 +7,18 @@ import logging ...@@ -7,29 +7,18 @@ import logging
logger = logging.getLogger() logger = logging.getLogger()
def save_from_dataframe(df: pd.DataFrame, tablename: str, connection) -> bool: def save_from_dataframe(df: pd.DataFrame, tablename: str, connection, is_first: bool = False) -> bool:
save = False save = False
try: try:
chunksize = 2000 chunksize = 2000
# db_type = connection.db_type db_type = connection.db_type
connection = connection.engine connection = connection.engine
# print(df["CREACION_PRODUCTO"].value_counts())
with connection.connect() as conn: with connection.connect() as conn:
# if db_type == DatabaseTypeEnum.ORACLE.value: if db_type == DatabaseTypeEnum.ORACLE.value:
# df.info() dtypes = {c: VARCHAR(df[c].str.len().max()) for c in df.columns[df.dtypes == 'object'].tolist()}
# aux = df.columns[df.dtypes == 'object'].tolist() df.to_sql(tablename, conn, if_exists='append', dtype=dtypes, index=False, chunksize=chunksize)
# print(aux) else:
# dtyp = {} df.to_sql(tablename, conn, if_exists='append', index=False, chunksize=chunksize)
# for col in aux:
# print(col)
# print(df[col].dtype)
# df[col] = df[col].astype(str)
# dtyp.update({col: VARCHAR(df[col].str.len().max())})
# # dtyp = {c: VARCHAR(df[c].str.len().max()) for c in aux}
# print(dtyp)
# df.to_sql(tablename, conn, if_exists='append', dtype=dtyp, index=False, chunksize=chunksize)
# else:
df.to_sql(tablename, conn, if_exists='append', index=False, chunksize=chunksize)
save = True save = True
except Exception as e: except Exception as e:
logger.error(f"Error guardando resultados desde dataframe. {e}") logger.error(f"Error guardando resultados desde dataframe. {e}")
......
import logging import logging
import time import time
from typing import List from typing import List
import re
from enums.DatabaseTypeEnum import DatabaseTypeEnum
logger = logging.getLogger() logger = logging.getLogger()
...@@ -15,36 +18,69 @@ def execute_transformations(commands: List[str], engine): ...@@ -15,36 +18,69 @@ def execute_transformations(commands: List[str], engine):
logger.error(f"Error ejecutando comando de transformación. {e}") logger.error(f"Error ejecutando comando de transformación. {e}")
def delete_table(tablename: str, engine) -> bool: def delete_table(tablename: str, connection) -> bool:
engine = connection.engine
delete = False delete = False
try: try:
command = f'DROP TABLE IF EXISTS {tablename}' if connection.db_type == DatabaseTypeEnum.ORACLE.value:
start_time = time.time() command = f'DROP TABLE {tablename}'
with engine.connect() as conn: start_time = time.time()
try: with engine.connect() as conn:
_ = conn.execute(command) try:
except Exception as e: check_query = f"SELECT table_name FROM all_tables WHERE table_name = '{tablename.upper()}'"
logger.error(f"Tabla no encontrada. {e}") result = conn.execute(check_query)
delete = True exists = bool(result.fetchone())
logger.debug(f"Duración de borrado: {time.time() - start_time}") except Exception as e:
logger.error(f"Tabla no encontrada. {e}")
if exists:
_ = conn.execute(command)
delete = True
logger.debug(f"Duración de borrado: {time.time() - start_time}")
else:
command = f'DROP TABLE IF EXISTS {tablename}'
start_time = time.time()
with engine.connect() as conn:
try:
_ = conn.execute(command)
delete = True
logger.debug(f"Duración de borrado: {time.time() - start_time}")
except Exception as e:
logger.error(f"Tabla no encontrada. {e}")
except Exception as e: except Exception as e:
logger.error(f"Error borrando tabla {tablename}. {e}") logger.error(f"Error borrando tabla {tablename}. {e}")
finally: finally:
return delete return delete
def delete_procedure(procedure: str, engine) -> bool: def delete_procedure(procedure: str, connection) -> bool:
engine = connection.engine
delete = False delete = False
try: try:
command = f"DROP PROCEDURE IF EXISTS {procedure}" if connection.db_type == DatabaseTypeEnum.ORACLE.value:
start_time = time.time() command = f"DROP PROCEDURE {procedure}"
with engine.connect() as conn: start_time = time.time()
try: with engine.connect() as conn:
_ = conn.execute(command) try:
except Exception as e: proc = re.match(r'([^(]+)', procedure).group(1)
logger.error(f"Procedure no encontrado. {e}") check_query = f"SELECT object_name FROM all_procedures WHERE object_name = '{proc.upper()}'"
delete = True result = conn.execute(check_query)
logger.debug(f"Duración de borrado: {time.time() - start_time}") exists = bool(result.fetchone())
except Exception as e:
logger.error(f"Procedure no encontrado. {e}")
if exists:
_ = conn.execute(command)
delete = True
logger.debug(f"Duración de borrado: {time.time() - start_time}")
else:
command = f'DROP PROCEDURE IF EXISTS {procedure}'
start_time = time.time()
with engine.connect() as conn:
try:
_ = conn.execute(command)
delete = True
logger.debug(f"Duración de borrado: {time.time() - start_time}")
except Exception as e:
logger.error(f"Procedure no encontrada. {e}")
except Exception as e: except Exception as e:
logger.error(f"Error borrando procedure {procedure}. {e}") logger.error(f"Error borrando procedure {procedure}. {e}")
finally: finally:
......
...@@ -121,7 +121,7 @@ class Mysql: ...@@ -121,7 +121,7 @@ class Mysql:
result = connection.execute(check_query) result = connection.execute(check_query)
exists = bool(result.fetchone()) exists = bool(result.fetchone())
except Exception as e: except Exception as e:
logger.error(f"Error obteniendo existencia de tabla. {e}") logger.info(f"No se encuentra la tabla {table_name} en la BD interna")
finally: finally:
return exists return exists
...@@ -130,9 +130,10 @@ class Mysql: ...@@ -130,9 +130,10 @@ class Mysql:
try: try:
check_query = f"SELECT COUNT(*) FROM {table_name}" check_query = f"SELECT COUNT(*) FROM {table_name}"
result = connection.execute(check_query) result = connection.execute(check_query)
result = result.fetchone()[0]
if result > 0: if result > 0:
exists = True exists = True
except Exception as e: except Exception as e:
logger.error(f"Error obteniendo counts de tabla. {e}") logger.info(f"No se encuentra la tabla {table_name} en la BD interna")
finally: finally:
return exists return exists
\ No newline at end of file
...@@ -2,7 +2,7 @@ from typing import List, Tuple ...@@ -2,7 +2,7 @@ from typing import List, Tuple
from sqlalchemy import create_engine from sqlalchemy import create_engine
import oracledb import oracledb
import re
from enums.DatabaseDialectEnum import DatabaseDialectEnum from enums.DatabaseDialectEnum import DatabaseDialectEnum
from enums.DatabaseTypeEnum import DatabaseTypeEnum from enums.DatabaseTypeEnum import DatabaseTypeEnum
from components.Databases.Enums.OracleDataTypeEnum import OracleDataTypeEnum from components.Databases.Enums.OracleDataTypeEnum import OracleDataTypeEnum
...@@ -91,7 +91,7 @@ class Oracle: ...@@ -91,7 +91,7 @@ class Oracle:
response = "" response = ""
try: try:
command = command.replace(reserved_word, "").replace(";", "") command = command.replace(reserved_word, "").replace(";", "")
response = f"begin {command}; end;" response = f"BEGIN {command}; END;"
logger.debug("COMANDO ORACLE:", response) logger.debug("COMANDO ORACLE:", response)
except Exception as e: except Exception as e:
logger.error(f"Error generando comando sql para procedure Oracle. Comando: {command}. {e}") logger.error(f"Error generando comando sql para procedure Oracle. Comando: {command}. {e}")
...@@ -113,7 +113,8 @@ class Oracle: ...@@ -113,7 +113,8 @@ class Oracle:
def check_procedure(self, procedure_name, connection) -> bool: def check_procedure(self, procedure_name, connection) -> bool:
exists = False exists = False
try: try:
check_query = f"SELECT text FROM all_source WHERE name = '{procedure_name}'" procedure_name = re.match(r'([^(]+)', procedure_name).group(1)
check_query = f"SELECT object_name FROM all_procedures WHERE object_name = '{procedure_name.upper()}'"
result = connection.execute(check_query) result = connection.execute(check_query)
exists = bool(result.fetchone()) exists = bool(result.fetchone())
except Exception as e: except Exception as e:
...@@ -124,22 +125,23 @@ class Oracle: ...@@ -124,22 +125,23 @@ class Oracle:
def check_table(self, table_name, connection) -> bool: def check_table(self, table_name, connection) -> bool:
exists = False exists = False
try: try:
check_query = f"DESCRIBE {table_name}" check_query = f"SELECT table_name FROM all_tables WHERE table_name = '{table_name.upper()}'"
result = connection.execute(check_query) result = connection.execute(check_query)
exists = bool(result.fetchone()) exists = bool(result.fetchone())
except Exception as e: except Exception as e:
logger.error(f"Error obteniendo existencia de tabla. {e}") logger.info(f"No se encuentra la tabla {table_name} en la BD interna")
finally: finally:
return exists return exists
def verify_table(self, table_name, connection) -> bool: def verify_table(self, table_name, connection) -> bool:
exists = False exists = False
try: try:
check_query = f"SELECT COUNT(*) FROM {table_name}" check_query = f"SELECT COUNT(*) FROM {table_name.upper()}"
result = connection.execute(check_query) result = connection.execute(check_query)
result = result.fetchone()[0]
if result > 0: if result > 0:
exists = True exists = True
except Exception as e: except Exception as e:
logger.error(f"Error obteniendo counts de tabla. {e}") logger.info(f"No se encuentra la tabla {table_name} en la BD interna.")
finally: finally:
return exists return exists
\ No newline at end of file
...@@ -122,7 +122,7 @@ class Postgres: ...@@ -122,7 +122,7 @@ class Postgres:
result = connection.execute(check_query) result = connection.execute(check_query)
exists = bool(result.fetchone()) exists = bool(result.fetchone())
except Exception as e: except Exception as e:
logger.error(f"Error obteniendo existencia de tabla. {e}") logger.info(f"No se encuentra la tabla {table_name} en la BD interna")
finally: finally:
return exists return exists
...@@ -131,9 +131,10 @@ class Postgres: ...@@ -131,9 +131,10 @@ class Postgres:
try: try:
check_query = f"SELECT COUNT(*) FROM {table_name}" check_query = f"SELECT COUNT(*) FROM {table_name}"
result = connection.execute(check_query) result = connection.execute(check_query)
result = result.fetchone()[0]
if result > 0: if result > 0:
exists = True exists = True
except Exception as e: except Exception as e:
logger.error(f"Error obteniendo counts de tabla. {e}") logger.info(f"No se encuentra la tabla {table_name} en la BD interna")
finally: finally:
return exists return exists
\ No newline at end of file
...@@ -90,6 +90,8 @@ def on_success_extractor(context) -> None: ...@@ -90,6 +90,8 @@ def on_success_extractor(context) -> None:
ti = context["ti"] ti = context["ti"]
task_name = f"{ti.task_id}_{ti.map_index}" task_name = f"{ti.task_id}_{ti.map_index}"
selects = Variable.get('SELECTS', default_var=[], deserialize_json=True) selects = Variable.get('SELECTS', default_var=[], deserialize_json=True)
if len(selects) == 0:
raise AirflowSkipException(f"No se encontraron operaciones de extraccion")
command = selects[ti.map_index] command = selects[ti.map_index]
tablename = select_multiple(command[1])["tablename"] tablename = select_multiple(command[1])["tablename"]
status = ProcessStatusEnum.SUCCESS.value status = ProcessStatusEnum.SUCCESS.value
...@@ -142,11 +144,10 @@ def extract_from_source(command, source_conn, intern_conn, chunksize: int, timez ...@@ -142,11 +144,10 @@ def extract_from_source(command, source_conn, intern_conn, chunksize: int, timez
result = bool(intern_conn.check_table(tablename, connection)) result = bool(intern_conn.check_table(tablename, connection))
resultado = intern_conn.verify_table(tablename, connection) resultado = intern_conn.verify_table(tablename, connection)
if not result or not resultado: if not result or not resultado:
_ = delete_table(tablename, intern_conn)
create = intern_conn.create_table(model) create = intern_conn.create_table(model)
if create: if create:
logger.info(f"Creado correctamente la tabla {tablename}. Creado?: {create}") logger.info(f"Creado correctamente la tabla {tablename}. Creado?: {create}")
#extract_tables.append(tablename)
#task.xcom_push(key="TABLES_CREATED", value=tablename)
else: else:
raise AssertionError(f"Error creando tabla {tablename}") raise AssertionError(f"Error creando tabla {tablename}")
if is_procedure: if is_procedure:
...@@ -163,13 +164,13 @@ def extract_from_source(command, source_conn, intern_conn, chunksize: int, timez ...@@ -163,13 +164,13 @@ def extract_from_source(command, source_conn, intern_conn, chunksize: int, timez
data.append(row) data.append(row)
if len(data) == chunksize: if len(data) == chunksize:
dataframe = pd.DataFrame(data, columns=columns_name) dataframe = pd.DataFrame(data, columns=columns_name)
save = save_from_dataframe(dataframe, tablename, intern_conn.engine) save = save_from_dataframe(dataframe, tablename, intern_conn)
if save: if save:
logger.debug(f"Guardado correctamente dataframe. Procesando más bloques") logger.debug(f"Guardado correctamente dataframe. Procesando más bloques")
data.clear() data.clear()
if len(data) > 0: if len(data) > 0:
dataframe = pd.DataFrame(data, columns=columns_name) dataframe = pd.DataFrame(data, columns=columns_name)
save = save_from_dataframe(dataframe, tablename, intern_conn.engine) save = save_from_dataframe(dataframe, tablename, intern_conn)
if save: if save:
logger.debug(f"Migrado correctamente todos los datos") logger.debug(f"Migrado correctamente todos los datos")
extract_tables.append(tablename) extract_tables.append(tablename)
...@@ -183,13 +184,13 @@ def extract_from_source(command, source_conn, intern_conn, chunksize: int, timez ...@@ -183,13 +184,13 @@ def extract_from_source(command, source_conn, intern_conn, chunksize: int, timez
data.append(row) data.append(row)
if len(data) == chunksize: if len(data) == chunksize:
dataframe = pd.DataFrame(data, columns=columns_name) dataframe = pd.DataFrame(data, columns=columns_name)
save = save_from_dataframe(dataframe, tablename, intern_conn.engine) save = save_from_dataframe(dataframe, tablename, intern_conn)
if save: if save:
logger.debug(f"Guardado correctamente dataframe. Procesando más bloques") logger.debug(f"Guardado correctamente dataframe. Procesando más bloques")
data.clear() data.clear()
if len(data) > 0: if len(data) > 0:
dataframe = pd.DataFrame(data, columns=columns_name) dataframe = pd.DataFrame(data, columns=columns_name)
save = save_from_dataframe(dataframe, tablename, intern_conn.engine) save = save_from_dataframe(dataframe, tablename, intern_conn)
if save: if save:
logger.debug(f"Migrado correctamente todos los datos") logger.debug(f"Migrado correctamente todos los datos")
...@@ -225,7 +226,7 @@ def extract_from_source(command, source_conn, intern_conn, chunksize: int, timez ...@@ -225,7 +226,7 @@ def extract_from_source(command, source_conn, intern_conn, chunksize: int, timez
task.xcom_push(key="TABLES_CREATED", value=tablename) task.xcom_push(key="TABLES_CREATED", value=tablename)
task.xcom_push(key="END_PROCESS_DATETIME_" + str(task.map_index), value=end_process_datetime) task.xcom_push(key="END_PROCESS_DATETIME_" + str(task.map_index), value=end_process_datetime)
except Exception as e: except Exception as e:
delete = delete_table(tablename, intern_conn.engine) delete = delete_table(tablename, intern_conn)
if delete: if delete:
logger.info(f"Se borró correctamente la tabla {tablename}") logger.info(f"Se borró correctamente la tabla {tablename}")
raise AssertionError(f"Error creando la tabla y migrando datos. {type(e)}. {e}") raise AssertionError(f"Error creando la tabla y migrando datos. {type(e)}. {e}")
...@@ -240,7 +241,6 @@ def get_select_from_xcom(db_intern_conn, **kwargs): ...@@ -240,7 +241,6 @@ def get_select_from_xcom(db_intern_conn, **kwargs):
conf = task.xcom_pull(task_ids="SCRIPTS-EXTRACTOR", key="CONTROL-CONFIG") conf = task.xcom_pull(task_ids="SCRIPTS-EXTRACTOR", key="CONTROL-CONFIG")
definitions = task.xcom_pull(task_ids="SCRIPTS-EXTRACTOR", key="EXTRACTION-DEFINITION-JSON") definitions = task.xcom_pull(task_ids="SCRIPTS-EXTRACTOR", key="EXTRACTION-DEFINITION-JSON")
temp_conf = conf[-1]["objects_created"] if "objects_created" in conf[-1].keys() else [] temp_conf = conf[-1]["objects_created"] if "objects_created" in conf[-1].keys() else []
engine = db_intern_conn.engine
logger.info(f"OBJETOS CREADOS: {temp_conf}") logger.info(f"OBJETOS CREADOS: {temp_conf}")
identificadores = [item["identifier"] for item in definitions] identificadores = [item["identifier"] for item in definitions]
if temp_conf: if temp_conf:
...@@ -258,8 +258,8 @@ def get_select_from_xcom(db_intern_conn, **kwargs): ...@@ -258,8 +258,8 @@ def get_select_from_xcom(db_intern_conn, **kwargs):
tablas_temp = list(tablas_temp) tablas_temp = list(tablas_temp)
if tablas_temp: if tablas_temp:
for i in tablas_temp: for i in tablas_temp:
delete = delete_table(i, engine) delete = delete_table(i, db_intern_conn)
delete2 = delete_procedure(i, engine) delete2 = delete_procedure(i, db_intern_conn)
if delete: if delete:
logger.info(f"Borrado correctamente la tabla temporal {i}") logger.info(f"Borrado correctamente la tabla temporal {i}")
if delete2: if delete2:
......
...@@ -115,7 +115,6 @@ def save_df_to_s3(data: pd.DataFrame or str, conn: str, bucket: str, key: str, p ...@@ -115,7 +115,6 @@ def save_df_to_s3(data: pd.DataFrame or str, conn: str, bucket: str, key: str, p
else: else:
hook.load_bytes(buffer.getvalue(), key, bucket, True) hook.load_bytes(buffer.getvalue(), key, bucket, True)
else: else:
logger.info(f"Llegue aca, para guardar")
logger.info(f"DATA: {data}. BUCKET: {bucket}. KEY: {key}") logger.info(f"DATA: {data}. BUCKET: {bucket}. KEY: {key}")
# convert_df = pd.read_csv() # convert_df = pd.read_csv()
if gcp_cloud: if gcp_cloud:
......
...@@ -8,12 +8,12 @@ from airflow.exceptions import AirflowSkipException ...@@ -8,12 +8,12 @@ from airflow.exceptions import AirflowSkipException
from components.DatabaseOperation.DatabaseTransformation import delete_table, delete_procedure from components.DatabaseOperation.DatabaseTransformation import delete_table, delete_procedure
from components.S3Route import get_files_from_prefix, get_file_from_prefix from components.S3Route import get_files_from_prefix, get_file_from_prefix
from components.Xcom import save_commands_to_xcom from components.Xcom import save_commands_to_xcom
from components.Utils import update_sql_commands_2
from components.Control import get_tasks_from_control, update_new_process from components.Control import get_tasks_from_control, update_new_process
from components.S3Route import load_control_to_s3 from components.S3Route import load_control_to_s3
from components.Xcom import delete_all_xcom_tasks, delete_task_instances from components.Xcom import delete_all_xcom_tasks, delete_task_instances
from enums.ProcessStatusEnum import ProcessStatusEnum from enums.ProcessStatusEnum import ProcessStatusEnum
from enums.OperationTypeEnum import OperationTypeEnum from enums.OperationTypeEnum import OperationTypeEnum
from enums.DatabaseTypeEnum import DatabaseTypeEnum
from components.Timezone import datetime_by_tzone from components.Timezone import datetime_by_tzone
import logging import logging
...@@ -99,12 +99,16 @@ def transformations(xcom_commands: str, intern_conn, timezone: str, **kwargs): ...@@ -99,12 +99,16 @@ def transformations(xcom_commands: str, intern_conn, timezone: str, **kwargs):
script_name = xcom_commands[0] script_name = xcom_commands[0]
commands = xcom_commands[1] commands = xcom_commands[1]
logger.info(f"Ejecutando transformaciones del script {script_name}") logger.info(f"Ejecutando transformaciones del script {script_name}")
not_procedure = ["UPDATE","SELECT","CREATE","ALTER","DROP","DELETE","INSERT","GRANT","REVOKE","TRUNCATE","COPY", logger.info(f"XCOM_COMMANDS: {xcom_commands}")
"COMMIT","ROLLBACK","USE","BEGIN"] not_procedure = ["UPDATE", "SELECT", "CREATE", "ALTER", "DROP", "DELETE", "INSERT"," GRANT", "REVOKE", "TRUNCATE",
"COPY", "COMMIT", "ROLLBACK", "USE", "BEGIN"]
with engine.connect() as connection: with engine.connect() as connection:
for command in commands: for command in commands:
logger.info(f"Commando: {command}")
if any(command.startswith(palabra) or command.startswith(palabra.lower()) for palabra in not_procedure): if any(command.startswith(palabra) or command.startswith(palabra.lower()) for palabra in not_procedure):
logger.info(f"Ejecutando comando de transformación: {command}") logger.info(f"Ejecutando comando de transformación en {intern_conn.db_type} : {command}")
if intern_conn.db_type == DatabaseTypeEnum.ORACLE.value:
command = command[:-1]
_ = connection.execute(command) _ = connection.execute(command)
else: else:
logger.info(f"Generando llamada al procedure según bd para: {command}") logger.info(f"Generando llamada al procedure según bd para: {command}")
...@@ -143,14 +147,14 @@ def get_trans_from_xcom(provider, store_procedure, procedure_mask, label_tablena ...@@ -143,14 +147,14 @@ def get_trans_from_xcom(provider, store_procedure, procedure_mask, label_tablena
index = procedure.find(label_tablename + ":") index = procedure.find(label_tablename + ":")
label_lenght = len(label_tablename + ":") label_lenght = len(label_tablename + ":")
tablename = procedure[index + label_lenght:].strip().split("\n")[0] tablename = procedure[index + label_lenght:].strip().split("\n")[0]
delete_procedure(tablename, engine) delete_procedure(tablename, db_intern_conn)
for element in definitions: for element in definitions:
if element["identifier"] == tablename and "transformation_store_procedure" in element.keys() and element["transformation_store_procedure"] == True: if element["identifier"] == tablename and "transformation_store_procedure" in element.keys() and element["transformation_store_procedure"] == True:
logger.info(f"Ejecutando creacion de procedure: {procedure}") logger.info(f"Ejecutando creacion de procedure: {procedure}")
# result = db_intern_conn.check_procedure(tablename, connection)
# if not result:
try: try:
logger.info("1")
_ = connection.execute(procedure) _ = connection.execute(procedure)
logger.info("2")
except Exception as e: except Exception as e:
logger.error(f" Error: {e}") logger.error(f" Error: {e}")
raise AirflowSkipException raise AirflowSkipException
...@@ -159,7 +163,7 @@ def get_trans_from_xcom(provider, store_procedure, procedure_mask, label_tablena ...@@ -159,7 +163,7 @@ def get_trans_from_xcom(provider, store_procedure, procedure_mask, label_tablena
else: else:
logger.debug(f"No se encontró el label en {procedure} por ende no se creará") logger.debug(f"No se encontró el label en {procedure} por ende no se creará")
save_commands_to_xcom(procedures, kwargs['ti'], procedure_mask, transform_mask, procedure_mask, order_delimiter) save_commands_to_xcom(procedures, kwargs['ti'], procedure_mask, transform_mask, procedure_mask, order_delimiter)
logger.debug(f"Procedures cargados en Xcom: {procedures}") logger.info(f"Procedures cargados en Xcom: {procedures}")
transforms_per_file = [] transforms_per_file = []
conf = task.xcom_pull(task_ids="VALIDATE_EXTRACTION", key="CONTROL-CONFIG") conf = task.xcom_pull(task_ids="VALIDATE_EXTRACTION", key="CONTROL-CONFIG")
......
...@@ -166,13 +166,6 @@ def update_sql_commands(dataset: List[Tuple[str, str]], label_tablename: str, ...@@ -166,13 +166,6 @@ def update_sql_commands(dataset: List[Tuple[str, str]], label_tablename: str,
for item in data: for item in data:
if item.lower().strip() == "end": if item.lower().strip() == "end":
final_data[-1] = final_data[-1] + "; end;" final_data[-1] = final_data[-1] + "; end;"
# parts = item.split(CommentsScriptEnum.DASHES.value)
# parts = [part for part in parts if len(part.strip()) > 0]
# print(parts)
# if len(parts) > 1:
# for part in parts:
# if not part.strip().lower().startswith(label_tablename.lower()):
# continue
final_item = item final_item = item
if item.lower().strip().find(label_tablename.lower().strip() + ":") != -1: if item.lower().strip().find(label_tablename.lower().strip() + ":") != -1:
...@@ -222,14 +215,6 @@ def select_multiple(command: str) -> Dict[str, Any]: ...@@ -222,14 +215,6 @@ def select_multiple(command: str) -> Dict[str, Any]:
if command.lower().replace(" ", "").find(no_procedure_init) != -1: if command.lower().replace(" ", "").find(no_procedure_init) != -1:
response["is_multiple"] = True response["is_multiple"] = True
tablename = command[:command.index("|")].strip() tablename = command[:command.index("|")].strip()
# init_index = command.lower().find("from")
# if init_index == -1:
# raise AssertionError("Query malformed")
# else:
# from_command = command[init_index + 4:]
# tablename_base = from_command.strip().split(" ")
# if len(tablename_base) > 0 and tablename == "":
# tablename = tablename_base[0]
response["tablename"] = tablename response["tablename"] = tablename
except Exception as e: except Exception as e:
raise AssertionError(f"Error validando si es múltiple select y nombre de tabla. {e}") raise AssertionError(f"Error validando si es múltiple select y nombre de tabla. {e}")
......
...@@ -5,23 +5,23 @@ app: ...@@ -5,23 +5,23 @@ app:
database: database:
sources: sources:
source1: source1:
type: mysql type: oracle
host: 192.168.1.13 host: 192.168.27.22
port: 13306 port: 21521
username: root username: PRUEBABCOM2
password: root password: admin
database: prueba database:
service: service: ORCLPDB1
schema: sources schema:
transformation: transformation:
type: mysql type: oracle
host: 192.168.1.13 host: 192.168.27.22
port: 13306 port: 21521
username: root username: RLQA_AIR
password: root password: RLQA_AIR99
database: prueba_ca database:
service: service: ORCLPDB1
schema: intern_db schema:
chunksize: 4000 chunksize: 4000
label_multiple_select: TABLENAME label_multiple_select: TABLENAME
label_transform_procedure: STORE label_transform_procedure: STORE
...@@ -55,14 +55,14 @@ app: ...@@ -55,14 +55,14 @@ app:
delimiter: '|' delimiter: '|'
tmp_path: /tmp tmp_path: /tmp
s3_params: s3_params:
tabla4: tabla1:
bucket: pruebairflow bucket: prueba-id
prefix: bcom_results prefix: bcom_results
connection_id: prueba_af connection_id: conn_script
tabla5: tabla5:
bucket: pruebairflow bucket: prueba-id
prefix: bcom_results prefix: bcom_results
connection_id: prueba_af connection_id: conn_script
report: report:
s3_params: s3_params:
bucket: prueba-id bucket: prueba-id
......
This diff is collapsed.
...@@ -5,4 +5,4 @@ oracledb==1.3.2 ...@@ -5,4 +5,4 @@ oracledb==1.3.2
apache-airflow-providers-google apache-airflow-providers-google
apache-airflow-providers-amazon apache-airflow-providers-amazon
apache-airflow-providers-postgres apache-airflow-providers-postgres
apache-airflow-providers-oracle apache-airflow-providers-oracle
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment