Commit 805f468e authored by Cristian Aguirre's avatar Cristian Aguirre

fix bug commission

parent a78f4f94
from enum import Enum
class DatabaseTypeEnum(Enum):
MONGODB = "mongodb"
MYSQL = "mysql"
REDSHIFT = "redshift"
STARROKS = "starroks"
This diff is collapsed.
from typing import Dict, Any
import logging
from pyspark.sql.functions import col, when, lit, to_date, date_format, date_add
from pyspark.sql.types import StructType, StructField, StringType
from prefect import task
from Enum.DataTypeEnum import DataTypeEnum
from Enum.DatabaseTypeEnum import DatabaseTypeEnum
from Enum.InputTypeEnum import InputTypeEnum
from Utils.SparkUtils import createSession
from Input.Source import Input
......@@ -20,8 +21,8 @@ class ETLProcess:
self.inputs = {}
def init(self, spark_jars: Dict[str, str]) -> None:
self.session = createSession(self.identifier, spark_jars)
def init(self, spark_jars: Dict[str, str], source_type: InputTypeEnum = InputTypeEnum.BUCKET) -> None:
self.session = createSession(self.identifier, spark_jars, source_type)
@task
def reader(self) -> None:
......@@ -124,19 +125,41 @@ class ETLProcess:
return success
@task
def write(self, identifier: str, starroks_jdbc: str, starroks_fe: str, prev_status: bool = True) -> None:
def write(self, identifier: str, starroks_jdbc: str, starroks_fe: str, prev_status: bool = True,
db_type: DatabaseTypeEnum = DatabaseTypeEnum.REDSHIFT, redshift_url: str = "", mysql_url: str = "") -> None:
try:
database = starroks_jdbc[starroks_jdbc.rfind("/")+1:]
starroks_user = self.conf["starroks"]["user"]
starroks_pass = self.conf["starroks"]["password"]
self.inputs[identifier].write.format("starrocks") \
.option("starrocks.fe.http.url", starroks_fe) \
.option("starrocks.fe.jdbc.url", starroks_jdbc) \
.option("starrocks.table.identifier", database+"."+identifier) \
.option("starrocks.user", starroks_user) \
.option("starrocks.password", starroks_pass) \
.mode("append") \
.save()
if db_type == DatabaseTypeEnum.REDSHIFT:
self.inputs[identifier].coalesce(45).write \
.format("jdbc") \
.option("driver", "com.amazon.redshift.jdbc42.Driver") \
.option("url", redshift_url) \
.option("dbtable", identifier) \
.option("user", "awsuser") \
.option("password", "Awsuser123") \
.mode("append") \
.save()
elif db_type == DatabaseTypeEnum.MYSQL:
self.inputs[identifier].write \
.format("jdbc") \
.option("driver", "com.mysql.cj.jdbc.Driver") \
.option("url", mysql_url) \
.option("dbtable", identifier) \
.option("user", "root") \
.option("password", "root") \
.mode("append") \
.save()
else:
database = starroks_jdbc[starroks_jdbc.rfind("/")+1:]
starroks_user = self.conf["starroks"]["user"]
starroks_pass = self.conf["starroks"]["password"]
self.inputs[identifier].write.format("starrocks") \
.option("starrocks.fe.http.url", starroks_fe) \
.option("starrocks.fe.jdbc.url", starroks_jdbc) \
.option("starrocks.table.identifier", database+"."+identifier) \
.option("starrocks.user", starroks_user) \
.option("starrocks.password", starroks_pass) \
.mode("append") \
.save()
except Exception as e:
logger.error(f"Error guardando resultados. {e}")
from typing import Dict
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf
from pyspark.sql.types import ArrayType, StringType
import logging
from Enum.InputTypeEnum import InputTypeEnum
logger = logging.getLogger()
def createSession(name: str, spark_jars: Dict[str, str]) -> SparkSession:
def createSession(name: str, spark_jars: Dict[str, str], source_type: InputTypeEnum) -> SparkSession:
session = None
try:
jars = list(spark_jars.values())
......@@ -18,17 +18,20 @@ def createSession(name: str, spark_jars: Dict[str, str]) -> SparkSession:
.appName(name) \
.config("spark.jars", jars) \
.config("spark.jars.packages", "graphframes:graphframes:0.8.3-spark3.4-s_2.12") \
.config("spark.hadoop.fs.s3a.aws.credentials.provider",
"com.amazonaws.auth.DefaultAWSCredentialsProviderChain") \
.config("spark.executor.extraClassPath", jars) \
.config("spark.driver.extraClassPath", jars) \
.config("spark.starrocks.driver", "com.starroks.jdbc.Driver") \
.config("spark.sql.catalogImplementation", "in-memory") \
.getOrCreate()
session._jsc.hadoopConfiguration().set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
session._jsc.hadoopConfiguration().set("fs.s3a.endpoint", "http://192.168.21.47:9000")
session._jsc.hadoopConfiguration().set("fs.s3a.connection.ssl.enabled", "false")
session._jsc.hadoopConfiguration().set("fs.s3a.path.style.access", "true")
session._jsc.hadoopConfiguration().set("fs.s3a.access.key", "minioadmin")
session._jsc.hadoopConfiguration().set("fs.s3a.secret.key", "minioadmin")
if source_type == InputTypeEnum.LOCAL:
session._jsc.hadoopConfiguration().set("fs.s3a.endpoint", "http://192.168.21.47:9000")
session._jsc.hadoopConfiguration().set("fs.s3a.connection.ssl.enabled", "false")
session._jsc.hadoopConfiguration().set("fs.s3a.path.style.access", "true")
session._jsc.hadoopConfiguration().set("fs.s3a.access.key", "minioadmin")
session._jsc.hadoopConfiguration().set("fs.s3a.secret.key", "minioadmin")
except Exception as e:
logger.error(f"Error creando sesion. {e}")
finally:
......@@ -70,5 +73,5 @@ def find_related_vertices(graph):
dfs(vertex_id, related_vertices)
# Agregar los vértices relacionados al diccionario
related_vertices_dict[vertex_id] = list(related_vertices)
related_vertices_dict[vertex_id].remove(vertex_id)
return related_vertices_dict
......@@ -5,6 +5,7 @@ from typing import Any, Dict
from prefect import flow, get_run_logger
from Pipeline.CommissionProcess import CommissionProcess
from Enum.DatabaseTypeEnum import DatabaseTypeEnum
SPARK_JARS = {
"STARROK": "/opt/spark-jars/starrocks-spark-connector-3.2_2.12-1.1.2.jar",
......@@ -14,6 +15,10 @@ SPARK_JARS = {
STARROK_JDBC = "jdbc:mysql://192.168.1.37:9030/bcom_spark"
STARROK_FE_NODE = "192.168.1.37:8030"
REDSHIFT_JDBC = "jdbc:redshift://redshift-cluster-1.cumpswji5bs3.us-east-1.redshift.amazonaws.com:5439/dev?currentSchema=prueba_ca"
DB_TYPE = DatabaseTypeEnum.REDSHIFT
@flow()
def run_commission(config: Dict[str, Any]) -> None:
......@@ -29,7 +34,7 @@ def run_commission(config: Dict[str, Any]) -> None:
# Primer task - Extraer la data - RECORDAR: SPARK ES LAZY!!!
start_reader = time.time()
commission_process.get_inputs(commission_process, STARROK_JDBC, STARROK_FE_NODE)
commission_process.get_inputs(commission_process, DB_TYPE, STARROK_JDBC, STARROK_FE_NODE, REDSHIFT_JDBC)
logger.info(f"Duración de extracción de datos desde la BD: {time.time() - start_reader}")
# Tercer task - Obtener metas
......@@ -37,7 +42,7 @@ def run_commission(config: Dict[str, Any]) -> None:
goals = commission_process.get_goals(commission_process, "VENTAS", "GOALS")
# Quinto task - Obtener ejecutados - ¿Aplicar tmb filtro de FLAG_COMISIONABLE y ACTIVE_USER_TRAFFIC?
executes = commission_process.get_executed(commission_process, "VENTAS", "TEAMS")
executes = commission_process.get_executed(commission_process, "VENTAS", "DEVICES")
# Sexo task - Obtener monto origen
base = commission_process.get_source_value(commission_process, "VENTAS", "COMERCIAL_BASE")
......
......@@ -5,6 +5,7 @@ from typing import Any, Dict
from prefect import flow, get_run_logger
from Pipeline.CommissionProcess import CommissionProcess
from Enum.DatabaseTypeEnum import DatabaseTypeEnum
SPARK_JARS = {
"STARROK": "/opt/spark-jars/starrocks-spark-connector-3.2_2.12-1.1.2.jar",
......@@ -14,6 +15,12 @@ SPARK_JARS = {
STARROK_JDBC = "jdbc:mysql://192.168.1.37:9030/bcom_spark"
STARROK_FE_NODE = "192.168.1.37:8030"
REDSHIFT_JDBC = "jdbc:redshift://redshift-cluster-1.cumpswji5bs3.us-east-1.redshift.amazonaws.com:5439/dev?currentSchema=prueba_ca"
MYSQL_JDBC = "jdbc:mysql://localhost:13306/bcom_spark"
DB_TYPE = DatabaseTypeEnum.MYSQL
@flow()
def run_commission(config: Dict[str, Any]) -> None:
......@@ -29,7 +36,8 @@ def run_commission(config: Dict[str, Any]) -> None:
# Primer task - Extraer la data - RECORDAR: SPARK ES LAZY!!!
start_reader = time.time()
commission_process.get_inputs(commission_process, STARROK_JDBC, STARROK_FE_NODE)
commission_process.get_inputs(commission_process, DB_TYPE, STARROK_JDBC, STARROK_FE_NODE, REDSHIFT_JDBC,
MYSQL_JDBC)
logger.info(f"Duración de extracción de datos desde la BD: {time.time() - start_reader}")
# Tercer task - Obtener metas
......@@ -37,20 +45,15 @@ def run_commission(config: Dict[str, Any]) -> None:
goals = commission_process.get_goals_2(commission_process, "GOALS", "ESTRUCTURA_ORGANIZACIONAL")
# Quinto task - Obtener ejecutados - ¿Aplicar tmb filtro de FLAG_COMISIONABLE y ACTIVE_USER_TRAFFIC?
executes = commission_process.get_executed_2(commission_process, "ESTRUCTURA_ORGANIZACIONAL", "TEAMS", "VENTAS")
#
executes = commission_process.get_executed_2(commission_process, "ESTRUCTURA_ORGANIZACIONAL", "DEVICES", "VENTAS")
# Sexo task - Obtener monto origen
base = commission_process.get_source_value_2(commission_process, "ESTRUCTURA_ORGANIZACIONAL", "COMERCIAL_BASE")
# Segundo task - Crear jerarquía
start_process = time.time()
# ["AGENTES", "ESTRUCTURA", "UO", "OGRANIZACIONES"]
identifiers = ["INDIVIDUOS", "ESTRUCTURA_ORGANIZACIONAL", "UNIDAD", "ORGANIZACION"]
jerarquia_graph = commission_process.create_jerarquia(commission_process, identifiers, goals, executes, base)
logger.info(f"Duración de creación de dataframes con grafos (jerarquía): {time.time() - start_process}")
result = commission_process.update_executes(commission_process, jerarquia_graph, goals, executes, base)
result = commission_process.get_commission_per_agent_2(commission_process, result)
......@@ -58,7 +61,8 @@ def run_commission(config: Dict[str, Any]) -> None:
# Task de escritura
start_load = time.time()
_ = commission_process.write_result(commission_process, result, "REPORT_SUMMARY", STARROK_JDBC, STARROK_FE_NODE)
_ = commission_process.write_result(commission_process, result, "REPORT_SUMMARY", DB_TYPE, STARROK_JDBC,
STARROK_FE_NODE, REDSHIFT_JDBC, MYSQL_JDBC)
logger.info(f"Duración de carga del reporte a la BD: {time.time() - start_load}")
logger.info(f"Duración de ejecución del proceso de comision: {time.time() - start_time}")
......
......@@ -9,7 +9,7 @@
"data": [
{
"identifier": "VENTAS",
"path": "s3a://prueba-id/inputs_spark/gross_202311.txt",
"path": "s3a://prueba-id2/bcom-tests/inputs/gross_202311.txt",
"input_type": "txt",
"separator": "|",
"schema": {
......@@ -29,8 +29,8 @@
}
},
{
"identifier": "TEAMS",
"path": "s3a://prueba-id/inputs_spark/equipos_202311.txt",
"identifier": "DEVICES",
"path": "s3a://prueba-id2/bcom-tests/inputs/equipos_202311.txt",
"input_type": "txt",
"separator": "|",
"schema": {
......@@ -45,7 +45,7 @@
},
{
"identifier": "GOALS",
"path": "s3a://prueba-id/inputs_spark/metas_202311.csv",
"path": "s3a://prueba-id2/bcom-tests/inputs/metas_202311.csv",
"input_type": "csv",
"separator": ";",
"schema": {
......@@ -58,7 +58,7 @@
},
{
"identifier": "COMERCIAL_BASE",
"path": "s3a://prueba-id/inputs_spark/planta_comercial_202311.csv",
"path": "s3a://prueba-id2/bcom-tests/inputs/planta_comercial_202311.csv",
"input_type": "csv",
"separator": ";",
"schema": {
......@@ -70,7 +70,7 @@
},
{
"identifier": "INDIVIDUOS",
"path": "s3a://prueba-id/inputs_spark/individuos_2023111813.csv",
"path": "s3a://prueba-id2/bcom-tests/inputs/individuos_2023111813.csv",
"input_type": "csv",
"separator": ";",
"schema": {
......@@ -89,7 +89,7 @@
},
{
"identifier": "ROLES",
"path": "s3a://prueba-id/inputs_spark/roles_2023111812.csv",
"path": "s3a://prueba-id2/bcom-tests/inputs/roles_2023111812.csv",
"input_type": "csv",
"separator": ";",
"schema": {
......@@ -99,7 +99,7 @@
},
{
"identifier": "ORGANIZACION",
"path": "s3a://prueba-id/inputs_spark/organizaciones_2023111813.csv",
"path": "s3a://prueba-id2/bcom-tests/inputs/organizaciones_2023111813.csv",
"input_type": "csv",
"separator": ";",
"schema": {
......@@ -121,7 +121,7 @@
},
{
"identifier": "UNIDAD",
"path": "s3a://prueba-id/inputs_spark/unidades_organizacionales_2023111812.csv",
"path": "s3a://prueba-id2/bcom-tests/inputs/unidades_organizacionales_2023111812.csv",
"input_type": "csv",
"separator": ";",
"schema": {
......@@ -139,7 +139,7 @@
},
{
"identifier": "ESTRUCTURA_ORGANIZACIONAL",
"path": "s3a://prueba-id/inputs_spark/estructura_organizacional_2023111812.csv",
"path": "s3a://prueba-id2/bcom-tests/inputs/estructura_organizacional_2023111812.csv",
"input_type": "csv",
"separator": ";",
"schema": {
......@@ -154,5 +154,13 @@
"starroks": {
"user": "root",
"password": ""
},
"redshift": {
"user": "awsuser",
"password": "Awsuser123"
},
"mysql": {
"user": "root",
"password": "root"
}
}
\ No newline at end of file
......@@ -8,7 +8,7 @@
"data": [
{
"identifier": "FACTURACION",
"path": "s3a://prueba-id/bcom-tests/inputs/Facturacion_20240320.csv",
"path": "s3a://prueba-id2/bcom-tests/inputs/Facturacion_20240320.csv",
"input_type": "csv",
"separator": ";",
"schema": {
......@@ -24,7 +24,7 @@
},
{
"identifier": "ENDING",
"path": "s3a://prueba-id/bcom-tests/inputs/Ending_20240320.csv",
"path": "s3a://prueba-id2/bcom-tests/inputs/Ending_20240320.csv",
"input_type": "csv",
"separator": ";",
"schema": {
......@@ -42,5 +42,13 @@
"starroks": {
"user": "root",
"password": ""
},
"redshift": {
"user": "awsuser",
"password": "Awsuser123"
},
"mysql": {
"user": "admin",
"password": "awsuser123"
}
}
\ No newline at end of file
......@@ -2,6 +2,8 @@ import time
import json
from typing import Any, Dict
from prefect import flow, get_run_logger
from Enum.DatabaseTypeEnum import DatabaseTypeEnum
from Enum.InputTypeEnum import InputTypeEnum
from Pipeline.ETLProcess import ETLProcess
......@@ -12,12 +14,21 @@ SPARK_JARS = {
"COMMON": "/opt/spark-jars/hadoop-common-3.3.4.jar",
"AWS_CLIENT": "/opt/spark-jars/hadoop-client-3.3.4.jar",
"STARROK": "/opt/spark-jars/starrocks-spark-connector-3.2_2.12-1.1.2.jar",
"MYSQL": "/opt/spark-jars/mysql-connector-java-8.0.30.jar"
"MYSQL": "/opt/spark-jars/mysql-connector-java-8.0.30.jar",
"REDSHIFT": "/opt/spark-jars/redshift-jdbc42-2.1.0.12.jar"
}
STARROK_JDBC = "jdbc:mysql://192.168.1.37:9030/bcom_spark"
STARROK_FE_NODE = "192.168.1.37:8030"
REDSHIFT_JDBC = "jdbc:redshift://redshift-cluster-1.cumpswji5bs3.us-east-1.redshift.amazonaws.com:5439/dev?currentSchema=prueba_ca"
MYSQL_JDBC = "jdbc:mysql://localhost:13306/bcom_spark"
DB_TYPE = DatabaseTypeEnum.MYSQL
SOURCE_TYPE = InputTypeEnum.BUCKET
@flow
def run_etl(config: Dict[str, Any]) -> None:
......@@ -29,7 +40,7 @@ def run_etl(config: Dict[str, Any]) -> None:
# Conexion a Spark (LocalMode, StandAlone or Clúster)
start_init = time.time()
etl_process.init(SPARK_JARS)
etl_process.init(SPARK_JARS, SOURCE_TYPE)
logger.info(f"Duración de creación de sesión Spark: {time.time() - start_init}")
# Primer task - (Reader) - Extraer los ficheros
......@@ -42,31 +53,40 @@ def run_etl(config: Dict[str, Any]) -> None:
etl_process.set_schema(etl_process)
# Process - Insumo Gross (Ventas)
ventas_flag = etl_process.process_gross.submit(etl_process, "VENTAS")
ventas_flag = etl_process.process_gross(etl_process, "VENTAS")
# Process - Insumo Team (Equipos)
teams_flag = etl_process.process_teams.submit(etl_process, "TEAMS")
teams_flag = etl_process.process_teams(etl_process, "DEVICES")
logger.info(f"Duración de transformación y limpieza de datos: {time.time() - start_transform}")
# Write - Insumo GROSS
start_load = time.time()
etl_process.write.submit(etl_process, "VENTAS", STARROK_JDBC, STARROK_FE_NODE, ventas_flag)
# Write - Insumo TEAMS
etl_process.write.submit(etl_process, "TEAMS", STARROK_JDBC, STARROK_FE_NODE, teams_flag)
etl_process.write(etl_process, "VENTAS", STARROK_JDBC, STARROK_FE_NODE, ventas_flag, DB_TYPE,
REDSHIFT_JDBC, MYSQL_JDBC)
# Write - Insumo DEVICES
etl_process.write(etl_process, "DEVICES", STARROK_JDBC, STARROK_FE_NODE, teams_flag, DB_TYPE,
REDSHIFT_JDBC, MYSQL_JDBC)
# Write - Insumo GOALS
etl_process.write.submit(etl_process, "GOALS", STARROK_JDBC, STARROK_FE_NODE)
etl_process.write(etl_process, "GOALS", STARROK_JDBC, STARROK_FE_NODE, db_type=DB_TYPE,
redshift_url=REDSHIFT_JDBC, mysql_url=MYSQL_JDBC)
# Write - Insumo PLANTA
etl_process.write.submit(etl_process, "COMERCIAL_BASE", STARROK_JDBC, STARROK_FE_NODE)
etl_process.write(etl_process, "COMERCIAL_BASE", STARROK_JDBC, STARROK_FE_NODE, db_type=DB_TYPE,
redshift_url=REDSHIFT_JDBC, mysql_url=MYSQL_JDBC)
# Write - Insumo INDIVIDUOS
etl_process.write.submit(etl_process, "INDIVIDUOS", STARROK_JDBC, STARROK_FE_NODE)
etl_process.write(etl_process, "INDIVIDUOS", STARROK_JDBC, STARROK_FE_NODE, db_type=DB_TYPE,
redshift_url=REDSHIFT_JDBC, mysql_url=MYSQL_JDBC)
# Write - Insumo ROLES
etl_process.write.submit(etl_process, "ROLES", STARROK_JDBC, STARROK_FE_NODE)
etl_process.write(etl_process, "ROLES", STARROK_JDBC, STARROK_FE_NODE, db_type=DB_TYPE,
redshift_url=REDSHIFT_JDBC, mysql_url=MYSQL_JDBC)
# Write - Insumo ORGANIZACION
etl_process.write.submit(etl_process, "ORGANIZACION", STARROK_JDBC, STARROK_FE_NODE)
etl_process.write(etl_process, "ORGANIZACION", STARROK_JDBC, STARROK_FE_NODE, db_type=DB_TYPE,
redshift_url=REDSHIFT_JDBC, mysql_url=MYSQL_JDBC)
# Write - Insumo UNIDADES
etl_process.write.submit(etl_process, "UNIDAD", STARROK_JDBC, STARROK_FE_NODE)
etl_process.write(etl_process, "UNIDAD", STARROK_JDBC, STARROK_FE_NODE, db_type=DB_TYPE,
redshift_url=REDSHIFT_JDBC, mysql_url=MYSQL_JDBC)
# Write - Insumo ESTRUCTURA
etl_process.write.submit(etl_process, "ESTRUCTURA_ORGANIZACIONAL", STARROK_JDBC, STARROK_FE_NODE)
etl_process.write(etl_process, "ESTRUCTURA_ORGANIZACIONAL", STARROK_JDBC, STARROK_FE_NODE, db_type=DB_TYPE,
redshift_url=REDSHIFT_JDBC, mysql_url=MYSQL_JDBC)
logger.info(f"Duración de carga de datos a la BD: {time.time() - start_load}")
logger.info(f"Duración de ejecución del proceso ETL General: {time.time() - start_time}")
......
......@@ -4,6 +4,8 @@ from typing import Any, Dict
from prefect import flow, get_run_logger
from Pipeline.ETLProcess import ETLProcess
from Enum.DatabaseTypeEnum import DatabaseTypeEnum
from Enum.InputTypeEnum import InputTypeEnum
SPARK_JARS = {
......@@ -12,12 +14,21 @@ SPARK_JARS = {
"COMMON": "/opt/spark-jars/hadoop-common-3.3.4.jar",
"AWS_CLIENT": "/opt/spark-jars/hadoop-client-3.3.4.jar",
"STARROK": "/opt/spark-jars/starrocks-spark-connector-3.2_2.12-1.1.2.jar",
"MYSQL": "/opt/spark-jars/mysql-connector-java-8.0.30.jar"
"MYSQL": "/opt/spark-jars/mysql-connector-java-8.0.30.jar",
"REDSHIFT": "/opt/spark-jars/redshift-jdbc42-2.1.0.12.jar"
}
STARROK_JDBC = "jdbc:mysql://192.168.1.37:9030/bcom_spark"
STARROK_FE_NODE = "192.168.1.37:8030"
REDSHIFT_JDBC = "jdbc:redshift://redshift-cluster-1.cumpswji5bs3.us-east-1.redshift.amazonaws.com:5439/dev?currentSchema=prueba_ca"
MYSQL_JDBC = "jdbc:mysql://localhost:13306/bcom_spark"
DB_TYPE = DatabaseTypeEnum.MYSQL
SOURCE_TYPE = InputTypeEnum.BUCKET
@flow
def run_etl(config: Dict[str, Any]) -> None:
......@@ -29,7 +40,7 @@ def run_etl(config: Dict[str, Any]) -> None:
# Conexion a Spark (LocalMode, StandAlone or Clúster)
start_init = time.time()
etl_process.init(SPARK_JARS)
etl_process.init(SPARK_JARS, SOURCE_TYPE)
logger.info(f"Duración de creación de sesión Spark: {time.time() - start_init}")
# Primer task - (Reader) - Extraer los ficheros
......@@ -46,10 +57,12 @@ def run_etl(config: Dict[str, Any]) -> None:
logger.info(f"Duración de transformación y limpieza de datos: {time.time() - start_transform}")
start_load = time.time()
# Write - Insumo TEAMS
etl_process.write(etl_process, "FACTURACION", STARROK_JDBC, STARROK_FE_NODE, teams_fact)
# Write - Insumo DEVICES
etl_process.write(etl_process, "FACTURACION", STARROK_JDBC, STARROK_FE_NODE, teams_fact, DB_TYPE,
REDSHIFT_JDBC, MYSQL_JDBC)
# Write - Insumo GOALS
etl_process.write(etl_process, "ENDING", STARROK_JDBC, STARROK_FE_NODE)
etl_process.write(etl_process, "ENDING", STARROK_JDBC, STARROK_FE_NODE, db_type=DB_TYPE,
redshift_url=REDSHIFT_JDBC, mysql_url=MYSQL_JDBC)
logger.info(f"Duración de carga de datos a la BD: {time.time() - start_load}")
logger.info(f"Duración de ejecución del proceso ETL General: {time.time() - start_time}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment