Commit b0fd6670 authored by Cristian Aguirre's avatar Cristian Aguirre

Update action-exclude-records-v1

parent 06da121b
...@@ -11,3 +11,4 @@ class CodeResponseEnum(Enum): ...@@ -11,3 +11,4 @@ class CodeResponseEnum(Enum):
OUTPUT_ERROR = 606 OUTPUT_ERROR = 606
EMPTY_DATASET = 607 EMPTY_DATASET = 607
ERROR = 609 ERROR = 609
TIMEOUT = 610
...@@ -4,3 +4,4 @@ from enum import Enum ...@@ -4,3 +4,4 @@ from enum import Enum
class StatusEnum(Enum): class StatusEnum(Enum):
OK = 200 OK = 200
ERROR = 609 ERROR = 609
TIMEOUT = 610
...@@ -57,6 +57,9 @@ class Process: ...@@ -57,6 +57,9 @@ class Process:
save = self.utils.save_result(result, self.descriptor, db_session) save = self.utils.save_result(result, self.descriptor, db_session)
if save["status"] == StatusEnum.ERROR.name: if save["status"] == StatusEnum.ERROR.name:
raise InterruptedError(save["message"]) raise InterruptedError(save["message"])
except TimeoutError as e:
self.app.logger.error(f"Error de Timeout. Error: {e}")
status, status_description = CodeResponseEnum.TIMEOUT, str(e)
except IndexError as e: except IndexError as e:
self.app.logger.error(f"Error extrayendo insumos. Vacío. Error: {e}") self.app.logger.error(f"Error extrayendo insumos. Vacío. Error: {e}")
status, status_description = CodeResponseEnum.EMPTY_DATASET, str(e) status, status_description = CodeResponseEnum.EMPTY_DATASET, str(e)
......
...@@ -5,6 +5,8 @@ import shutil ...@@ -5,6 +5,8 @@ import shutil
from enum import Enum from enum import Enum
# from pyspark.sql import SparkSession # from pyspark.sql import SparkSession
import json import json
from app.main.engine.enum.CodeResponseEnum import CodeResponseEnum
from app.main.engine.util.Timezone import Timezone from app.main.engine.util.Timezone import Timezone
# from config import Config as cfg # from config import Config as cfg
...@@ -52,8 +54,11 @@ class Utils: ...@@ -52,8 +54,11 @@ class Utils:
if codeEnum.value == StatusEnum.OK.value: if codeEnum.value == StatusEnum.OK.value:
response.update({'status': StatusEnum.OK.name, 'detail': detail}) response.update({'status': StatusEnum.OK.name, 'detail': detail})
else: else:
error = StatusEnum.ERROR.name
if codeEnum.value == CodeResponseEnum.TIMEOUT.value:
error = StatusEnum.TIMEOUT.name
description = DescResponseEnum[codeEnum.name].value description = DescResponseEnum[codeEnum.name].value
response.update({'status': StatusEnum.ERROR.name, 'message': description, response.update({'status': error, 'message': description,
'detail': detail}) 'detail': detail})
return response return response
...@@ -65,6 +70,14 @@ class Utils: ...@@ -65,6 +70,14 @@ class Utils:
pivot_params = descriptor["params-input"]["pivot-config"] pivot_params = descriptor["params-input"]["pivot-config"]
ctp_params = descriptor["params-input"]["counterpart-config"] ctp_params = descriptor["params-input"]["counterpart-config"]
for key_p, key_c in zip(pivot_params.keys(), ctp_params.keys()):
if isinstance(pivot_params[key_p], str):
pivot_params[key_p] = "PIVOT_" + pivot_params[key_p]
ctp_params[key_c] = "COUNTERPART_" + ctp_params[key_c]
else:
pivot_params[key_p] = ["PIVOT_" + column for column in pivot_params[key_p]]
ctp_params[key_c] = ["COUNTERPART_" + column for column in ctp_params[key_c]]
group_pivot_match = pivot_params["columns-group"] group_pivot_match = pivot_params["columns-group"]
transaction_pivot_match = pivot_params["columns-transaction"] transaction_pivot_match = pivot_params["columns-transaction"]
...@@ -73,7 +86,7 @@ class Utils: ...@@ -73,7 +86,7 @@ class Utils:
used_list = transaction_counterpart_match if exclude_pivot else transaction_pivot_match used_list = transaction_counterpart_match if exclude_pivot else transaction_pivot_match
if data.empty: if data is None or data.empty:
self.app.logger.info(f"El dataframe resultado esta vacio") self.app.logger.info(f"El dataframe resultado esta vacio")
else: else:
for idx, i in data.iterrows(): for idx, i in data.iterrows():
......
from typing import Any, Dict, List from typing import Any, Dict
import importlib.util import importlib.util
from itertools import combinations
import multiprocessing as mp
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from numba import njit import multiprocessing as mp
from parallel_pandas import ParallelPandas from parallel_pandas import ParallelPandas
from concurrent.futures import ThreadPoolExecutor from wrapt_timeout_decorator import timeout
from app.main.engine.action.ActionInterface import ActionInterface from app.main.engine.action.ActionInterface import ActionInterface
...@@ -35,12 +33,12 @@ class MatchAndExcludeRecordsAction(ActionInterface): ...@@ -35,12 +33,12 @@ class MatchAndExcludeRecordsAction(ActionInterface):
def __init__(self, app) -> None: def __init__(self, app) -> None:
super().__init__(app) super().__init__(app)
self.max_combinations = None self.max_combinations = None
self.comb_per_group = None self.timeout = None
self.exclude_pivot = None self.exclude_pivot = None
self.pivot_params = None self.pivot_params = None
self.ctp_params = None self.ctp_params = None
self.output = None self.output = None
self.config_params = ["max-records-per-combinations", "max-combinations-per-group", "exclude-entity-pivot"] self.config_params = ["max-records-per-combinations", "max-timeout-per-combinations", "exclude-entity-pivot"]
def parser(self, descriptor: Dict[str, Any]): def parser(self, descriptor: Dict[str, Any]):
# Validar si pyspark y su versión está instalada # Validar si pyspark y su versión está instalada
...@@ -76,13 +74,15 @@ class MatchAndExcludeRecordsAction(ActionInterface): ...@@ -76,13 +74,15 @@ class MatchAndExcludeRecordsAction(ActionInterface):
raise ReferenceError(f"Parámetro *{param}* no encontrado en pivot o contraparte") raise ReferenceError(f"Parámetro *{param}* no encontrado en pivot o contraparte")
self.max_combinations = configs["max-records-per-combinations"] self.max_combinations = configs["max-records-per-combinations"]
self.comb_per_group = configs["max-combinations-per-group"] self.timeout = configs["max-timeout-per-combinations"]
self.exclude_pivot = configs["exclude-entity-pivot"] self.exclude_pivot = configs["exclude-entity-pivot"]
self.pivot_params = pivot_params self.pivot_params = pivot_params
self.ctp_params = ctp_params self.ctp_params = ctp_params
def process(self, source_obj): def process(self, source_obs):
try:
@timeout(self.timeout)
def __process(source_obj):
# Inicializar la sesion de Spark # Inicializar la sesion de Spark
session = self.createSession() session = self.createSession()
...@@ -124,7 +124,6 @@ class MatchAndExcludeRecordsAction(ActionInterface): ...@@ -124,7 +124,6 @@ class MatchAndExcludeRecordsAction(ActionInterface):
if self.ctp_params["amount-column"] in ctp_cols: if self.ctp_params["amount-column"] in ctp_cols:
ctp_cols.remove(self.ctp_params["amount-column"]) ctp_cols.remove(self.ctp_params["amount-column"])
comb_per_group = self.comb_per_group
max_combinations = self.max_combinations max_combinations = self.max_combinations
# Ejecutamos lógica de excluir registros # Ejecutamos lógica de excluir registros
...@@ -188,20 +187,20 @@ class MatchAndExcludeRecordsAction(ActionInterface): ...@@ -188,20 +187,20 @@ class MatchAndExcludeRecordsAction(ActionInterface):
df3 = df3.toPandas() df3 = df3.toPandas()
total_cols = group_cols + [amount_col, id_col, EXCLUDE_ROWS_FIELD, "DIFF"] total_cols = group_cols + [amount_col, id_col, EXCLUDE_ROWS_FIELD, "DIFF"]
# ParallelPandas.initialize(n_cpu=mp.cpu_count(), split_factor=8, disable_pr_bar=True) ParallelPandas.initialize(n_cpu=mp.cpu_count(), split_factor=8, disable_pr_bar=True)
df3 = df3.sort_values(group_cols + [amount_col]) df3 = df3.sort_values(group_cols + [amount_col])
resultado = df3[total_cols].groupby(group_cols).apply(lambda x: custom_func(x, amount_col, id_col, max_combinations)) resultado = df3[total_cols].groupby(group_cols).p_apply(lambda x: custom_func(x, amount_col, id_col, max_combinations))
resultado = resultado.reset_index() resultado = resultado.reset_index()
if len(resultado.columns) == 1: if len(resultado.columns) == 1:
resultado = pd.DataFrame([], columns=group_cols + ["LISTA_DIFF"]) resultado = pd.DataFrame([], columns=group_cols + ["LISTA_DIFF"])
else: else:
resultado.columns = group_cols + ["LISTA_DIFF"] resultado.columns = group_cols + ["LISTA_DIFF"]
# print(resultado["LISTA_DIFF"].apply(lambda x: x if pd.notna(x) and x[0]!=-1 else x))
meged2 = resultado.merge(merged_df.toPandas(), 'left', group_cols) meged2 = resultado.merge(merged_df.toPandas(), 'left', group_cols)
print(meged2)
meged2["LISTA_DIFF"] = meged2["LISTA_DIFF"].apply(self.handle_array) meged2["LISTA_DIFF"] = meged2["LISTA_DIFF"].apply(self.handle_array)
meged2 = meged2[(meged2['DIFF'] == 0) | ((meged2['DIFF'] != 0) & (meged2['LISTA_DIFF'].apply(len) > 0))] meged2 = meged2[(meged2['DIFF'] == 0) | ((meged2['DIFF'] != 0) & (meged2['LISTA_DIFF'].apply(len) > 0))]
if meged2.empty: if meged2.empty:
pass pass
elif self.exclude_pivot: elif self.exclude_pivot:
...@@ -215,7 +214,12 @@ class MatchAndExcludeRecordsAction(ActionInterface): ...@@ -215,7 +214,12 @@ class MatchAndExcludeRecordsAction(ActionInterface):
if meged2['INTER_PIVOT_ID'].dtype == 'int64': if meged2['INTER_PIVOT_ID'].dtype == 'int64':
merged_df['INTER_PIVOT_ID'] = merged_df['INTER_PIVOT_ID'].apply(lambda x: [x]).astype('object') merged_df['INTER_PIVOT_ID'] = merged_df['INTER_PIVOT_ID'].apply(lambda x: [x]).astype('object')
self.output = meged2 return meged2
except TimeoutError as e:
raise TimeoutError(f"Tiempo límite superado. {e}")
self.output = __process(source_obs)
def response(self): def response(self):
return self.output return self.output
...@@ -251,91 +255,57 @@ class MatchAndExcludeRecordsAction(ActionInterface): ...@@ -251,91 +255,57 @@ class MatchAndExcludeRecordsAction(ActionInterface):
def custom_func(group, amount_field, id_field, max_combinations): def custom_func(group, amount_field, id_field, max_combinations):
diff = group["DIFF"].values[0] diff = int(group["DIFF"].values[0]*(10**ROUND_DECIMAL))
if pd.isna(diff) or diff == 0: if pd.isna(diff) or diff == 0:
return None return None
group = group[group[EXCLUDE_ROWS_FIELD] == 'S'] group = group[group[EXCLUDE_ROWS_FIELD] == 'S']
group[amount_field] = group[amount_field].astype(float) group[amount_field] = group[amount_field].astype(float)
group = group.reset_index(drop=True) group = group.reset_index(drop=True)
values = group[amount_field].values values = group[amount_field].values
values *= (10**ROUND_DECIMAL)
values = values.astype(np.int64)
ids = group[id_field].values ids = group[id_field].values
tam = len(values)
tam = tam if tam <= max_combinations else max_combinations
n = len(values) result = subset_sum_iter(values, diff, tam)
valores1 = encontrar_comb_1(values, diff) indices = ids[np.isin(values, result)]
if valores1[0] != -1:
indices = ids[valores1]
return indices
valores2 = encontrar_comb_2(values, diff, n)
if valores2[0] != -1:
indices = ids[valores2]
return indices
# Iterar sobre todos los índices posibles
# valores4 = encontrar_comb_4(values, diff, n)
# if valores4[0] != -1:
# indices = ids[valores4]
# return indices
valores5 = encontrar_comb_5(values, diff, n)
if valores5[0] != -1:
indices = ids[valores5]
return indices return indices
@njit def subset_sum_iter(numbers, target, num_elements):
def encontrar_comb_1(valores, target):
indice = [-1] # Initialize solutions list
for idx, value in enumerate(valores): solutions = []
suma = value for step in range(1, num_elements+1):
if round(suma, ROUND_DECIMAL) == target: # Build first index by taking the first num_elements from the numbers
indice = [idx for idx, val in enumerate(valores) if val in [value]] indices = list(range(step))
return indice solution = [numbers[i] for i in indices]
if sum(solution) == target:
return indice solutions.append(solution)
@njit # We iterate over the rest of the indices until we have tried all combinations
def encontrar_comb_2(valores, target, n): while True:
indice = [-1] for i in range(step):
for i in range(n): if indices[i] != i + len(numbers) - step:
array_except = np.delete(valores, i) break
for idx, value in enumerate(array_except): else:
suma = value + valores[i] # No combinations left
if round(suma, ROUND_DECIMAL) == target: break
indice = [idx for idx, val in enumerate(valores) if val in [value, valores[i]]]
return indice # Increase current index and all its following ones
indices[i] += 1
return indice for j in range(i + 1, step):
indices[j] = indices[j - 1] + 1
@njit
def encontrar_comb_4(valores, target, n): # Check current solution
indice = [-1] solution = [numbers[i] for i in indices]
for i in range(n): if round(sum(solution), ROUND_DECIMAL) == target:
a1 = np.delete(valores, i) solutions.append(solution)
for j in range(len(a1)): break
a2 = np.delete(a1, j) if len(solutions) > 0:
for k in range(len(a2)): solutions = solutions[0]
array_except = np.delete(a2, k) break
for idx, value in enumerate(array_except):
suma = value + valores[i] + a1[j] + a2[k] return solutions
if round(suma, ROUND_DECIMAL) == target:
indice = [idx for idx, val in enumerate(valores) if val in [value, valores[i], a1[j], a2[k]]]
return indice
return indice
@njit
def encontrar_comb_5(valores, target, n):
indice = [-1]
for i in range(n):
a1 = np.delete(valores, i)
for j in range(len(a1)):
a2 = np.delete(a1, j)
for k in range(len(a2)):
a3 = np.delete(a2, k)
for l in range(len(a3)):
array_except = np.delete(a2, l)
for idx, value in enumerate(array_except):
suma = value + valores[i] + a1[j] + a2[k] + a3[l]
if round(suma, ROUND_DECIMAL) == target:
indice = [idx for idx, val in enumerate(valores) if val in [value, valores[i], a1[j], a2[k], a3[l]]]
return indice
return indice
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment