Update 25-07-23. Solve issues 351, 352

82505e1b · Cristian Aguirre · bbfda096 · 82505e1b · 82505e1b · 82505e1b
Commit 82505e1b authored Jul 26, 2023 by Cristian Aguirre
5 changed files
--- a/dags/components/Databases/Mysql.py
+++ b/dags/components/Databases/Mysql.py
 from typing import List, Tuple
+import pymysql
 from sqlalchemy import create_engine
 from enums.DatabaseDialectEnum import DatabaseDialectEnum
 from components.Model.InsumoModel import InsumoModel
@@ -22,6 +23,17 @@ class Mysql:
        self.password = password
        self.database = database
        self.engine = None
+        self.connection = None
+    def get_basic_connection(self):
+        try:
+            if isinstance(self.connection, type(None)):
+                self.connection = pymysql.connect(host=self.host, port=self.port, user=self.user,
+                                                  password=self.password, db=self.database)
+        except Exception as e:
+            logger.error(f"Error obteniendo conexion básica de Oracle. {e}")
+        finally:
+            return self.connection
    def create_engine(self) -> None:
        try:

--- a/dags/components/Databases/Postgres.py
+++ b/dags/components/Databases/Postgres.py
 from typing import List, Tuple
+import psycopg2
 from sqlalchemy import create_engine
 from components.Model.InsumoModel import InsumoModel
 from enums.DatabaseDialectEnum import DatabaseDialectEnum
@@ -23,6 +24,18 @@ class Postgres:
        self.schema = schema
        self.engine = None
        self.DEFAULT_VAR_LENGHT = 100
+        self.connection = None
+    def get_basic_connection(self):
+        try:
+            if isinstance(self.connection, type(None)):
+                self.connection = psycopg2.connect(host=self.host, port=self.port, user=self.user,
+                                                   password=self.password, database=self.database,
+                                                   options="-c search_path="+self.schema)
+        except Exception as e:
+            logger.error(f"Error obteniendo conexion básica de Oracle. {e}")
+        finally:
+            return self.connection
    def create_engine(self) -> None:
        try:

--- a/dags/components/Extractor.py
+++ b/dags/components/Extractor.py
@@ -94,9 +94,10 @@ def extract_from_source(command: str, source_conn, intern_conn, chunksize: int,
        # Create the model with the procedure descriptor
        if command.replace(" ", "").lower().find("|begin") != -1:
            tablename = command[:command.find("|")]
+        elif command.replace(" ", "").lower().find("|call") != -1:
+            tablename = command[:command.find("|")]
        else:
-            proc_name = command[len("begin"):command.rfind("end")]
+            raise AssertionError("Procedure mal formed")
-            tablename = proc_name.strip().replace(";", "")
        task = kwargs['ti']
        procedures = task.xcom_pull(task_ids="SCRIPTS-EXTRACTOR", key="PROCEDURE-JSON")
        model = None
@@ -121,7 +122,7 @@ def extract_from_source(command: str, source_conn, intern_conn, chunksize: int,
        with source_engine.connect() as connection:
            final_command = command_for_create
            if final_command.replace(" ", "").lower().find("|select") != -1:
-                final_command = final_command[final_command.find("select"):]
+                final_command = final_command[final_command.lower().find("select"):]
            result = connection.execute(final_command)
            fields = result.cursor.description
        for field in fields:
@@ -141,6 +142,7 @@ def extract_from_source(command: str, source_conn, intern_conn, chunksize: int,
        if is_tablename:
            command = command[len(tablename+":"):]
            temp_connection = source_conn.get_basic_connection()
+            if source_conn.db_type == DatabaseTypeEnum.ORACLE.value:
                cursor = temp_connection.cursor()
                cursor.execute(command)
                for resultSet in cursor.getimplicitresults():
@@ -159,11 +161,30 @@ def extract_from_source(command: str, source_conn, intern_conn, chunksize: int,
                        if save:
                            logger.debug(f"Migrado correctamente todos los datos")
                        data.clear()
+            elif source_conn.db_type == DatabaseTypeEnum.MYSQL.value or \
+                    source_conn.db_type == DatabaseTypeEnum.POSTGRES.value:
+                cursor = temp_connection.cursor()
+                cursor.execute(command)
+                data = []
+                for row in cursor._rows:
+                    data.append(row)
+                    if len(data) == chunksize:
+                        dataframe = pd.DataFrame(data, columns=columns_name)
+                        save = save_from_dataframe(dataframe, tablename, intern_conn.engine)
+                        if save:
+                            logger.debug(f"Guardado correctamente dataframe. Procesando más bloques")
+                        data.clear()
+                if len(data) > 0:
+                    dataframe = pd.DataFrame(data, columns=columns_name)
+                    save = save_from_dataframe(dataframe, tablename, intern_conn.engine)
+                    if save:
+                        logger.debug(f"Migrado correctamente todos los datos")
+                    data.clear()
            logger.info("Guardado correctamente todos los datos")
            source_conn.close_basic_connection()
        else:
-            if command.replace(" ", "").lower().find("|select"):
+            if command.replace(" ", "").lower().find("|select") != -1:
-                command = command[command.find("select"):]
+                command = command[command.lower().find("select"):]
            steps = get_steps(command, chunksize, source_engine)
            # Traemos el iterator
            iterator = get_iterator(command, chunksize, source_engine)

--- a/dags/components/Utils.py
+++ b/dags/components/Utils.py
@@ -22,7 +22,7 @@ def get_type_file(key: str) -> FileTypeEnum:
    result = FileTypeEnum.EXCEL
    try:
        file_type_sufix = key.rfind(".")
-        file_type = key[file_type_sufix+1:]
+        file_type = key[file_type_sufix + 1:]
        result = FileTypeEnum(file_type)
    except Exception as e:
        logger.error(f"Error obteniedo el tipo de archivo de {key}. {e}")
@@ -81,10 +81,10 @@ def update_dict_with_catalogs(data_dict: Dict[str, Any], data: Dict[str, Any], c
        else:
            catalog_prefix = default_prefix
        s3_catalog = catalog_prefix + catalog["pattern"]
-        data_dict.update({'s3_'+catalog_name: s3_catalog, catalog_name+'_key': catalog["key_field"],
+        data_dict.update({'s3_' + catalog_name: s3_catalog, catalog_name + '_key': catalog["key_field"],
-                          catalog_name+'_value': catalog["value_field"]})
+                          catalog_name + '_value': catalog["value_field"]})
        if "delimiter" in catalog.keys():
-            data_dict.update({catalog_name+'_delimiter': catalog["delimiter"]})
+            data_dict.update({catalog_name + '_delimiter': catalog["delimiter"]})
    except Exception as e:
        raise AssertionError(f"Error actualizando dict de catalogos. {e}")
    finally:
@@ -107,9 +107,9 @@ def update_sql_commands(dataset: List[Tuple[str, str]], label_tablename: str) ->
                if item.lower().strip() == "end":
                    final_data[-1] = final_data[-1] + "; end;"
                final_item = item
-                if item.lower().strip().find(label_tablename.lower().strip()+":") != -1:
+                if item.lower().strip().find(label_tablename.lower().strip() + ":") != -1:
-                    init_index = item.lower().strip().index(label_tablename.lower().strip()+":")
+                    init_index = item.lower().strip().index(label_tablename.lower().strip() + ":")
-                    table_name = item.replace(" ", "").strip()[init_index+5:].strip()
+                    table_name = item.replace(" ", "").strip()[init_index + len(label_tablename + ":"):].strip()
                    add_next = True
                elif item != "":
                    if add_next:
@@ -117,7 +117,8 @@ def update_sql_commands(dataset: List[Tuple[str, str]], label_tablename: str) ->
                        add_next = False
                    final_item = item.strip()
                    table_name = ""
-                if final_item.strip()[:2] in comments and ("update " in final_item.lower() or "delete " in final_item.lower() or
+                if final_item.strip()[:2] in comments and (
+                        "update " in final_item.lower() or "delete " in final_item.lower() or
                        "alter" in final_item.lower() or "create" in final_item.lower() or
                        "drop" in final_item.lower()):
                    trans_index = final_item.lower().find("update")
@@ -135,6 +136,9 @@ def update_sql_commands(dataset: List[Tuple[str, str]], label_tablename: str) ->
                    drop_index = final_item.lower().find("drop")
                    if drop_index != -1:
                        final_item = final_item[drop_index:]
+                    call_index = final_item.lower().find("call")
+                    if call_index != -1:
+                        final_item = final_item[call_index:]
                final_item = final_item.replace("%", "%%")
                final_data.append(final_item)
            final_data = [item.replace("\t", "") for item in final_data if item != "" and ("select" in item.lower() or
@@ -144,7 +148,8 @@ def update_sql_commands(dataset: List[Tuple[str, str]], label_tablename: str) ->
                                                                                           "alter" in item.lower() or
                                                                                           "create" in item.lower() or
                                                                                           "drop" in item.lower() or
-                                                                                           "commit" in item.lower())]
+                                                                                           "commit" in item.lower() or
+                                                                                           "call" in item.lower())]
            result.append((row[0], final_data))
            logger.info(f"Lista de comandos: {result}")
    except Exception as e:
@@ -157,9 +162,10 @@ def select_multiple(command: str) -> Dict[str, Any]:
    response = {'is_multiple': False, 'tablename': ''}
    tablename = ""
    no_procedure_init = "|select"
-    procedure_init = ["|begin"]
+    procedure_init = ["|begin", "|call"]
    try:
-        if command.lower().replace(" ", "").find(procedure_init[0]) != -1:
+        if command.lower().replace(" ", "").find(procedure_init[0]) != -1 or \
+                command.lower().replace(" ", "").find(procedure_init[1]) != -1:
            response["is_multiple"] = True
            tablename = command[:command.index("|")].strip()
            response["tablename"] = tablename
@@ -171,7 +177,7 @@ def select_multiple(command: str) -> Dict[str, Any]:
            if init_index == -1:
                raise AssertionError("Query malformed")
            else:
-                from_command = command[init_index+4:]
+                from_command = command[init_index + 4:]
                tablename_base = from_command.strip().split(" ")
                if len(tablename_base) > 0 and tablename == "":
                    tablename = tablename_base[0]

--- a/dags/procedure_definition.json
+++ b/dags/procedure_definition.json
@@ -2,29 +2,28 @@
  "procedure_identifier": "PROCEDURE_1",
  "fields": [
    {
-      "identifier": "CD_EMPRESA",
+      "identifier": "CD_FOLIO",
-      "datatype": "NUMBER",
+      "datatype": "TEXT",
      "decimal_precision": 0,
-      "maxLength": null
+      "maxLength": 100
    },
    {
-      "identifier": "CD_FOLIO",
+      "identifier": "CD_CUENTA",
      "datatype": "TEXT",
      "decimal_precision": null,
      "maxLength": 100
    },
    {
-      "identifier": "CD_CLIENTE",
+      "identifier": "CD_PAQUETE",
      "datatype": "TEXT",
      "decimal_precision": null,
-      "maxLength": 50
+      "maxLength": 100
    },
    {
-      "identifier": "FH_CONTRATACION",
+      "identifier": "NB_PAQUETE",
-      "datatype": "DATE",
+      "datatype": "TEXT",
-      "pattern": "%d-%m-%y",
      "decimal_precision": null,
-      "maxLength": null
+      "maxLength": 200
    }
  ]
 },