Commit e82162be authored by Cristian Aguirre's avatar Cristian Aguirre

Merge branch 'developer_ca' into 'developer'

Actualizacion de script exclude-and-match con dask, pandas y libreria dpss

See merge request !5
parents 287ace11 8ac5263c
FROM python:3.10-slim-buster
ENV FLASK_APP=run.py
COPY run.py flask_app.py gunicorn-cfg.py requirements.txt config.py conf.yml /
COPY app app
COPY scripts scripts
RUN pip install --upgrade pip
RUN pip cache purge
RUN pip install -r requirements.txt
CMD ["gunicorn", "--config", "gunicorn-cfg.py", "run:app"]
...@@ -44,6 +44,15 @@ class Database: ...@@ -44,6 +44,15 @@ class Database:
except Exception as e: except Exception as e:
self.app.logger.error(f"Error cerrando básica conexión. {e}") self.app.logger.error(f"Error cerrando básica conexión. {e}")
def get_dialect(self) -> str:
dialect = ""
try:
dialect = self.factory.get_dialect()
except Exception as e:
self.app.logger.error(f"Error obteniendo dialect. {e}")
finally:
return dialect
def create_engine(self) -> None: def create_engine(self) -> None:
try: try:
if isinstance(self.engine, type(None)): if isinstance(self.engine, type(None)):
......
...@@ -23,11 +23,12 @@ class Mysql: ...@@ -23,11 +23,12 @@ class Mysql:
self.params = params self.params = params
self.engine = None self.engine = None
self.connection = None self.connection = None
self.dialect = None
def create_spark_connection(self): def create_spark_connection(self):
params = {} params = {}
try: try:
url = "jdbc:mysql://"+self.host+":"+str(self.port)+"/"+self.database url = "jdbc:mysql://"+self.user+":"+self.password+"@"+self.host+":"+str(self.port)+"/"+self.database
properties = {"user": self.user, "password": self.password, "driver": "com.mysql.cj.jdbc.Driver"} properties = {"user": self.user, "password": self.password, "driver": "com.mysql.cj.jdbc.Driver"}
params["url"] = url params["url"] = url
params["properties"] = properties params["properties"] = properties
...@@ -46,10 +47,17 @@ class Mysql: ...@@ -46,10 +47,17 @@ class Mysql:
finally: finally:
return self.connection return self.connection
def create_engine(self) -> None: def get_dialect(self) -> str:
try: try:
dialect = DatabaseDialectEnum.MYSQL.value dialect = DatabaseDialectEnum.MYSQL.value
url = f"{dialect}://{self.user}:{self.password}@{self.host}:{str(self.port)}/{self.database}?charset=utf8mb4" self.dialect = f"{dialect}://{self.user}:{self.password}@{self.host}:{str(self.port)}/{self.database}?charset=utf8mb4"
except Exception as e:
self.app.logger.error(f"Error obteniendo dialect de Mysql. {e}")
return self.dialect
def create_engine(self) -> None:
try:
url = self.get_dialect()
self.engine = create_engine(url, pool_recycle=3600, pool_pre_ping=True, **self.params) self.engine = create_engine(url, pool_recycle=3600, pool_pre_ping=True, **self.params)
except Exception as e: except Exception as e:
self.app.logger.error(f"Error creando engine de Mysql. {e}") self.app.logger.error(f"Error creando engine de Mysql. {e}")
......
...@@ -11,3 +11,4 @@ class CodeResponseEnum(Enum): ...@@ -11,3 +11,4 @@ class CodeResponseEnum(Enum):
OUTPUT_ERROR = 606 OUTPUT_ERROR = 606
EMPTY_DATASET = 607 EMPTY_DATASET = 607
ERROR = 609 ERROR = 609
TIMEOUT = 610
...@@ -4,3 +4,4 @@ from enum import Enum ...@@ -4,3 +4,4 @@ from enum import Enum
class StatusEnum(Enum): class StatusEnum(Enum):
OK = 200 OK = 200
ERROR = 609 ERROR = 609
TIMEOUT = 610
from typing import Dict, Any from typing import Dict, Any
import time
import traceback as traceback_lib import traceback as traceback_lib
import importlib import importlib
...@@ -46,17 +45,23 @@ class Process: ...@@ -46,17 +45,23 @@ class Process:
obj_script.parser(self.descriptor) obj_script.parser(self.descriptor)
# Iniciando process # Iniciando process
self.app.logger.info(f"Iniciando procesamiento de script") self.app.logger.info(f"Iniciando procesamiento de script - {self.descriptor['idProcess']}")
obj_script.process(source) obj_script.process(source)
# Guardando resultado # Guardando resultado
self.app.logger.info(f"Generado y guardando resultado") self.app.logger.info(f"Generado y guardando resultado - {self.descriptor['idProcess']}")
response = obj_script.response() response = obj_script.response()
# response.show()
result = self.utils.create_result(response, self.descriptor) result = self.utils.create_result(response, self.descriptor)
del response
save = self.utils.save_result(result, self.descriptor, db_session) save = self.utils.save_result(result, self.descriptor, db_session)
if save["status"] == StatusEnum.ERROR.name: if save["status"] == StatusEnum.ERROR.name:
raise InterruptedError(save["message"]) raise InterruptedError(save["message"])
del result
except TimeoutError as e:
self.app.logger.error(f"Error de Timeout. Error: {e}")
status, status_description = CodeResponseEnum.TIMEOUT, str(e)
except IndexError as e: except IndexError as e:
self.app.logger.error(f"Error extrayendo insumos. Vacío. Error: {e}") self.app.logger.error(f"Error extrayendo insumos. Vacío. Error: {e}")
status, status_description = CodeResponseEnum.EMPTY_DATASET, str(e) status, status_description = CodeResponseEnum.EMPTY_DATASET, str(e)
...@@ -77,7 +82,7 @@ class Process: ...@@ -77,7 +82,7 @@ class Process:
status, status_description = CodeResponseEnum.PARAMETERS_ERROR, str(e) status, status_description = CodeResponseEnum.PARAMETERS_ERROR, str(e)
except Exception as e: except Exception as e:
traceback_lib.print_exc() traceback_lib.print_exc()
self.app.logger.error(f"Error procesando engine. {e}") self.app.logger.error(f"Error procesando engine - {self.descriptor['idProcess']}. {e}")
status, status_description = StatusEnum.ERROR, str(e) status, status_description = StatusEnum.ERROR, str(e)
finally: finally:
return self.utils.create_response(status, status_description) return self.utils.create_response(status, status_description)
...@@ -5,6 +5,8 @@ import shutil ...@@ -5,6 +5,8 @@ import shutil
from enum import Enum from enum import Enum
# from pyspark.sql import SparkSession # from pyspark.sql import SparkSession
import json import json
from app.main.engine.enum.CodeResponseEnum import CodeResponseEnum
from app.main.engine.util.Timezone import Timezone from app.main.engine.util.Timezone import Timezone
# from config import Config as cfg # from config import Config as cfg
...@@ -52,8 +54,11 @@ class Utils: ...@@ -52,8 +54,11 @@ class Utils:
if codeEnum.value == StatusEnum.OK.value: if codeEnum.value == StatusEnum.OK.value:
response.update({'status': StatusEnum.OK.name, 'detail': detail}) response.update({'status': StatusEnum.OK.name, 'detail': detail})
else: else:
error = StatusEnum.ERROR.name
if codeEnum.value == CodeResponseEnum.TIMEOUT.value:
error = StatusEnum.TIMEOUT.name
description = DescResponseEnum[codeEnum.name].value description = DescResponseEnum[codeEnum.name].value
response.update({'status': StatusEnum.ERROR.name, 'message': description, response.update({'status': error, 'message': description,
'detail': detail}) 'detail': detail})
return response return response
...@@ -65,6 +70,14 @@ class Utils: ...@@ -65,6 +70,14 @@ class Utils:
pivot_params = descriptor["params-input"]["pivot-config"] pivot_params = descriptor["params-input"]["pivot-config"]
ctp_params = descriptor["params-input"]["counterpart-config"] ctp_params = descriptor["params-input"]["counterpart-config"]
for key_p, key_c in zip(pivot_params.keys(), ctp_params.keys()):
if isinstance(pivot_params[key_p], str):
pivot_params[key_p] = "PIVOT_" + pivot_params[key_p]
ctp_params[key_c] = "COUNTERPART_" + ctp_params[key_c]
else:
pivot_params[key_p] = ["PIVOT_" + column for column in pivot_params[key_p]]
ctp_params[key_c] = ["COUNTERPART_" + column for column in ctp_params[key_c]]
group_pivot_match = pivot_params["columns-group"] group_pivot_match = pivot_params["columns-group"]
transaction_pivot_match = pivot_params["columns-transaction"] transaction_pivot_match = pivot_params["columns-transaction"]
...@@ -73,7 +86,7 @@ class Utils: ...@@ -73,7 +86,7 @@ class Utils:
used_list = transaction_counterpart_match if exclude_pivot else transaction_pivot_match used_list = transaction_counterpart_match if exclude_pivot else transaction_pivot_match
if data.empty: if data is None or data.empty:
self.app.logger.info(f"El dataframe resultado esta vacio") self.app.logger.info(f"El dataframe resultado esta vacio")
else: else:
for idx, i in data.iterrows(): for idx, i in data.iterrows():
......
...@@ -23,16 +23,16 @@ app: ...@@ -23,16 +23,16 @@ app:
timezone: 'GMT-5' timezone: 'GMT-5'
time_pattern: '%Y-%m-%d %H:%M:%S' time_pattern: '%Y-%m-%d %H:%M:%S'
logging: 'INFO' logging: 'INFO'
max_engine_threads: 2 # threads (maximum) max_engine_threads: 50 # threads (maximum)
# Make the service in a production state # Make the service in a production state
# Manage connections to the REST Service published. Allow workers to receive the connections. # Manage connections to the REST Service published. Allow workers to receive the connections.
# https://docs.gunicorn.org/en/stable/ # https://docs.gunicorn.org/en/stable/
gunicorn: gunicorn:
bind: '0.0.0.0:7500' bind: '0.0.0.0:8000'
worker_class: 'gthread' worker_class: 'gthread'
threads: 8 threads: 51
worker_connections: 50 worker_connections: 100
loglevel: 'debug' loglevel: 'debug'
accesslog: '-' accesslog: '-'
capture_output: True capture_output: True
\ No newline at end of file
FROM public.ecr.aws/emr-serverless/spark/emr-7.0.0:latest
USER root
# install python 3
RUN yum install -y gcc openssl-devel bzip2-devel libffi-devel tar gzip wget make zlib-devel
RUN wget https://www.python.org/ftp/python/3.10.0/Python-3.10.0.tgz && \
tar xzf Python-3.10.0.tgz && cd Python-3.10.0 && \
./configure --enable-optimizations && \
make altinstall
RUN python3 -m pip install numpy pandas py4j python-dateutil pytz six tzdata
# EMRS will run the image as hadoop
USER hadoop:hadoop
\ No newline at end of file
version: "3"
services:
css-cusca-scripts:
image: css_cuscatlan:0.0.2
container_name: css-cusca
deploy:
resources:
limits:
cpus: '8'
memory: 24G
reservations:
cpus: '4'
memory: 8G
restart: always
networks: [ css-cusca-network ]
ports:
- "9500:8000"
volumes:
- "./conf.yml:/conf.yml"
- "./scripts/match-and-exclude-records-actions_v1.py:/scripts/match-and-exclude-records-actions_v1.py"
networks:
css-cusca-network:
driver: bridge
"""
Copyright (c) 2019 - present AppSeed.us
"""
import yaml
conf = yaml.safe_load(open('conf.yml'))
conf = conf["app"]["gunicorn"]
bind = conf["bind"]
worker_class = conf["worker_class"]
threads = conf["threads"]
worker_connections = conf["worker_connections"]
loglevel = conf["loglevel"]
accesslog = conf["accesslog"]
capture_output = conf["capture_output"]
arrow==1.3.0
blinker==1.8.2
cli-exit-tools==1.2.6
click==8.1.7
cloudpickle==3.0.0
dask==2024.1.1
dill==0.3.8
dpss==0.22.0
Flask==3.0.3
fsspec==2024.3.1
greenlet==3.0.3
gunicorn==22.0.0
importlib_metadata==7.1.0
itsdangerous==2.2.0
Jinja2==3.1.4
lib-detect-testenv==2.0.8
locket==1.0.0
MarkupSafe==2.1.5
multiprocess==0.70.16
numpy==1.26.4
packaging==24.0
pandas==2.2.2
partd==1.4.2
pillow==10.3.0
psutil==5.9.8
pyarrow==14.0.2
PyMySQL==1.1.0
python-dateutil==2.9.0.post0
python-decouple==3.8
pytz==2024.1
PyYAML==6.0.1
six==1.16.0
SQLAlchemy==2.0.30
toolz==0.12.1
types-python-dateutil==2.9.0.20240316
typing_extensions==4.11.0
tzdata==2024.1
Werkzeug==3.0.3
wrapt==1.16.0
wrapt_timeout_decorator==1.5.1
zipp==3.18.1
...@@ -6,4 +6,4 @@ base = MainApplication() ...@@ -6,4 +6,4 @@ base = MainApplication()
app = base.create_app() app = base.create_app()
if __name__ == "__main__": if __name__ == "__main__":
base.run(port=8000) base.run(port=7500)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment