Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
CSS-Engine-Python-Cusca
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Proyectos-Innovacion-2024
CSS-Engine-Python-Cusca
Commits
b0fd6670
Commit
b0fd6670
authored
May 06, 2024
by
Cristian Aguirre
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update action-exclude-records-v1
parent
06da121b
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
213 additions
and
225 deletions
+213
-225
CodeResponseEnum.py
app/main/engine/enum/CodeResponseEnum.py
+1
-0
StatusEnum.py
app/main/engine/enum/StatusEnum.py
+1
-0
Process.py
app/main/engine/service/Process.py
+3
-0
Utils.py
app/main/engine/util/Utils.py
+15
-2
match-and-exclude-records-actions_v1.py
scripts/match-and-exclude-records-actions_v1.py
+193
-223
No files found.
app/main/engine/enum/CodeResponseEnum.py
View file @
b0fd6670
...
@@ -11,3 +11,4 @@ class CodeResponseEnum(Enum):
...
@@ -11,3 +11,4 @@ class CodeResponseEnum(Enum):
OUTPUT_ERROR
=
606
OUTPUT_ERROR
=
606
EMPTY_DATASET
=
607
EMPTY_DATASET
=
607
ERROR
=
609
ERROR
=
609
TIMEOUT
=
610
app/main/engine/enum/StatusEnum.py
View file @
b0fd6670
...
@@ -4,3 +4,4 @@ from enum import Enum
...
@@ -4,3 +4,4 @@ from enum import Enum
class
StatusEnum
(
Enum
):
class
StatusEnum
(
Enum
):
OK
=
200
OK
=
200
ERROR
=
609
ERROR
=
609
TIMEOUT
=
610
app/main/engine/service/Process.py
View file @
b0fd6670
...
@@ -57,6 +57,9 @@ class Process:
...
@@ -57,6 +57,9 @@ class Process:
save
=
self
.
utils
.
save_result
(
result
,
self
.
descriptor
,
db_session
)
save
=
self
.
utils
.
save_result
(
result
,
self
.
descriptor
,
db_session
)
if
save
[
"status"
]
==
StatusEnum
.
ERROR
.
name
:
if
save
[
"status"
]
==
StatusEnum
.
ERROR
.
name
:
raise
InterruptedError
(
save
[
"message"
])
raise
InterruptedError
(
save
[
"message"
])
except
TimeoutError
as
e
:
self
.
app
.
logger
.
error
(
f
"Error de Timeout. Error: {e}"
)
status
,
status_description
=
CodeResponseEnum
.
TIMEOUT
,
str
(
e
)
except
IndexError
as
e
:
except
IndexError
as
e
:
self
.
app
.
logger
.
error
(
f
"Error extrayendo insumos. Vacío. Error: {e}"
)
self
.
app
.
logger
.
error
(
f
"Error extrayendo insumos. Vacío. Error: {e}"
)
status
,
status_description
=
CodeResponseEnum
.
EMPTY_DATASET
,
str
(
e
)
status
,
status_description
=
CodeResponseEnum
.
EMPTY_DATASET
,
str
(
e
)
...
...
app/main/engine/util/Utils.py
View file @
b0fd6670
...
@@ -5,6 +5,8 @@ import shutil
...
@@ -5,6 +5,8 @@ import shutil
from
enum
import
Enum
from
enum
import
Enum
# from pyspark.sql import SparkSession
# from pyspark.sql import SparkSession
import
json
import
json
from
app.main.engine.enum.CodeResponseEnum
import
CodeResponseEnum
from
app.main.engine.util.Timezone
import
Timezone
from
app.main.engine.util.Timezone
import
Timezone
# from config import Config as cfg
# from config import Config as cfg
...
@@ -52,8 +54,11 @@ class Utils:
...
@@ -52,8 +54,11 @@ class Utils:
if
codeEnum
.
value
==
StatusEnum
.
OK
.
value
:
if
codeEnum
.
value
==
StatusEnum
.
OK
.
value
:
response
.
update
({
'status'
:
StatusEnum
.
OK
.
name
,
'detail'
:
detail
})
response
.
update
({
'status'
:
StatusEnum
.
OK
.
name
,
'detail'
:
detail
})
else
:
else
:
error
=
StatusEnum
.
ERROR
.
name
if
codeEnum
.
value
==
CodeResponseEnum
.
TIMEOUT
.
value
:
error
=
StatusEnum
.
TIMEOUT
.
name
description
=
DescResponseEnum
[
codeEnum
.
name
]
.
value
description
=
DescResponseEnum
[
codeEnum
.
name
]
.
value
response
.
update
({
'status'
:
StatusEnum
.
ERROR
.
name
,
'message'
:
description
,
response
.
update
({
'status'
:
error
,
'message'
:
description
,
'detail'
:
detail
})
'detail'
:
detail
})
return
response
return
response
...
@@ -65,6 +70,14 @@ class Utils:
...
@@ -65,6 +70,14 @@ class Utils:
pivot_params
=
descriptor
[
"params-input"
][
"pivot-config"
]
pivot_params
=
descriptor
[
"params-input"
][
"pivot-config"
]
ctp_params
=
descriptor
[
"params-input"
][
"counterpart-config"
]
ctp_params
=
descriptor
[
"params-input"
][
"counterpart-config"
]
for
key_p
,
key_c
in
zip
(
pivot_params
.
keys
(),
ctp_params
.
keys
()):
if
isinstance
(
pivot_params
[
key_p
],
str
):
pivot_params
[
key_p
]
=
"PIVOT_"
+
pivot_params
[
key_p
]
ctp_params
[
key_c
]
=
"COUNTERPART_"
+
ctp_params
[
key_c
]
else
:
pivot_params
[
key_p
]
=
[
"PIVOT_"
+
column
for
column
in
pivot_params
[
key_p
]]
ctp_params
[
key_c
]
=
[
"COUNTERPART_"
+
column
for
column
in
ctp_params
[
key_c
]]
group_pivot_match
=
pivot_params
[
"columns-group"
]
group_pivot_match
=
pivot_params
[
"columns-group"
]
transaction_pivot_match
=
pivot_params
[
"columns-transaction"
]
transaction_pivot_match
=
pivot_params
[
"columns-transaction"
]
...
@@ -73,7 +86,7 @@ class Utils:
...
@@ -73,7 +86,7 @@ class Utils:
used_list
=
transaction_counterpart_match
if
exclude_pivot
else
transaction_pivot_match
used_list
=
transaction_counterpart_match
if
exclude_pivot
else
transaction_pivot_match
if
data
.
empty
:
if
data
is
None
or
data
.
empty
:
self
.
app
.
logger
.
info
(
f
"El dataframe resultado esta vacio"
)
self
.
app
.
logger
.
info
(
f
"El dataframe resultado esta vacio"
)
else
:
else
:
for
idx
,
i
in
data
.
iterrows
():
for
idx
,
i
in
data
.
iterrows
():
...
...
scripts/match-and-exclude-records-actions_v1.py
View file @
b0fd6670
from
typing
import
Any
,
Dict
,
List
from
typing
import
Any
,
Dict
import
importlib.util
import
importlib.util
from
itertools
import
combinations
import
multiprocessing
as
mp
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
numba
import
njit
import
multiprocessing
as
mp
from
parallel_pandas
import
ParallelPandas
from
parallel_pandas
import
ParallelPandas
from
concurrent.futures
import
ThreadPoolExecutor
from
wrapt_timeout_decorator
import
timeout
from
app.main.engine.action.ActionInterface
import
ActionInterface
from
app.main.engine.action.ActionInterface
import
ActionInterface
...
@@ -35,12 +33,12 @@ class MatchAndExcludeRecordsAction(ActionInterface):
...
@@ -35,12 +33,12 @@ class MatchAndExcludeRecordsAction(ActionInterface):
def
__init__
(
self
,
app
)
->
None
:
def
__init__
(
self
,
app
)
->
None
:
super
()
.
__init__
(
app
)
super
()
.
__init__
(
app
)
self
.
max_combinations
=
None
self
.
max_combinations
=
None
self
.
comb_per_group
=
None
self
.
timeout
=
None
self
.
exclude_pivot
=
None
self
.
exclude_pivot
=
None
self
.
pivot_params
=
None
self
.
pivot_params
=
None
self
.
ctp_params
=
None
self
.
ctp_params
=
None
self
.
output
=
None
self
.
output
=
None
self
.
config_params
=
[
"max-records-per-combinations"
,
"max-
combinations-per-group
"
,
"exclude-entity-pivot"
]
self
.
config_params
=
[
"max-records-per-combinations"
,
"max-
timeout-per-combinations
"
,
"exclude-entity-pivot"
]
def
parser
(
self
,
descriptor
:
Dict
[
str
,
Any
]):
def
parser
(
self
,
descriptor
:
Dict
[
str
,
Any
]):
# Validar si pyspark y su versión está instalada
# Validar si pyspark y su versión está instalada
...
@@ -76,13 +74,15 @@ class MatchAndExcludeRecordsAction(ActionInterface):
...
@@ -76,13 +74,15 @@ class MatchAndExcludeRecordsAction(ActionInterface):
raise
ReferenceError
(
f
"Parámetro *{param}* no encontrado en pivot o contraparte"
)
raise
ReferenceError
(
f
"Parámetro *{param}* no encontrado en pivot o contraparte"
)
self
.
max_combinations
=
configs
[
"max-records-per-combinations"
]
self
.
max_combinations
=
configs
[
"max-records-per-combinations"
]
self
.
comb_per_group
=
configs
[
"max-combinations-per-group
"
]
self
.
timeout
=
configs
[
"max-timeout-per-combinations
"
]
self
.
exclude_pivot
=
configs
[
"exclude-entity-pivot"
]
self
.
exclude_pivot
=
configs
[
"exclude-entity-pivot"
]
self
.
pivot_params
=
pivot_params
self
.
pivot_params
=
pivot_params
self
.
ctp_params
=
ctp_params
self
.
ctp_params
=
ctp_params
def
process
(
self
,
source_obj
):
def
process
(
self
,
source_obs
):
try
:
@
timeout
(
self
.
timeout
)
def
__process
(
source_obj
):
# Inicializar la sesion de Spark
# Inicializar la sesion de Spark
session
=
self
.
createSession
()
session
=
self
.
createSession
()
...
@@ -124,7 +124,6 @@ class MatchAndExcludeRecordsAction(ActionInterface):
...
@@ -124,7 +124,6 @@ class MatchAndExcludeRecordsAction(ActionInterface):
if
self
.
ctp_params
[
"amount-column"
]
in
ctp_cols
:
if
self
.
ctp_params
[
"amount-column"
]
in
ctp_cols
:
ctp_cols
.
remove
(
self
.
ctp_params
[
"amount-column"
])
ctp_cols
.
remove
(
self
.
ctp_params
[
"amount-column"
])
comb_per_group
=
self
.
comb_per_group
max_combinations
=
self
.
max_combinations
max_combinations
=
self
.
max_combinations
# Ejecutamos lógica de excluir registros
# Ejecutamos lógica de excluir registros
...
@@ -188,20 +187,20 @@ class MatchAndExcludeRecordsAction(ActionInterface):
...
@@ -188,20 +187,20 @@ class MatchAndExcludeRecordsAction(ActionInterface):
df3
=
df3
.
toPandas
()
df3
=
df3
.
toPandas
()
total_cols
=
group_cols
+
[
amount_col
,
id_col
,
EXCLUDE_ROWS_FIELD
,
"DIFF"
]
total_cols
=
group_cols
+
[
amount_col
,
id_col
,
EXCLUDE_ROWS_FIELD
,
"DIFF"
]
#
ParallelPandas.initialize(n_cpu=mp.cpu_count(), split_factor=8, disable_pr_bar=True)
ParallelPandas
.
initialize
(
n_cpu
=
mp
.
cpu_count
(),
split_factor
=
8
,
disable_pr_bar
=
True
)
df3
=
df3
.
sort_values
(
group_cols
+
[
amount_col
])
df3
=
df3
.
sort_values
(
group_cols
+
[
amount_col
])
resultado
=
df3
[
total_cols
]
.
groupby
(
group_cols
)
.
apply
(
lambda
x
:
custom_func
(
x
,
amount_col
,
id_col
,
max_combinations
))
resultado
=
df3
[
total_cols
]
.
groupby
(
group_cols
)
.
p_
apply
(
lambda
x
:
custom_func
(
x
,
amount_col
,
id_col
,
max_combinations
))
resultado
=
resultado
.
reset_index
()
resultado
=
resultado
.
reset_index
()
if
len
(
resultado
.
columns
)
==
1
:
if
len
(
resultado
.
columns
)
==
1
:
resultado
=
pd
.
DataFrame
([],
columns
=
group_cols
+
[
"LISTA_DIFF"
])
resultado
=
pd
.
DataFrame
([],
columns
=
group_cols
+
[
"LISTA_DIFF"
])
else
:
else
:
resultado
.
columns
=
group_cols
+
[
"LISTA_DIFF"
]
resultado
.
columns
=
group_cols
+
[
"LISTA_DIFF"
]
# print(resultado["LISTA_DIFF"].apply(lambda x: x if pd.notna(x) and x[0]!=-1 else x))
meged2
=
resultado
.
merge
(
merged_df
.
toPandas
(),
'left'
,
group_cols
)
meged2
=
resultado
.
merge
(
merged_df
.
toPandas
(),
'left'
,
group_cols
)
print
(
meged2
)
meged2
[
"LISTA_DIFF"
]
=
meged2
[
"LISTA_DIFF"
]
.
apply
(
self
.
handle_array
)
meged2
[
"LISTA_DIFF"
]
=
meged2
[
"LISTA_DIFF"
]
.
apply
(
self
.
handle_array
)
meged2
=
meged2
[(
meged2
[
'DIFF'
]
==
0
)
|
((
meged2
[
'DIFF'
]
!=
0
)
&
(
meged2
[
'LISTA_DIFF'
]
.
apply
(
len
)
>
0
))]
meged2
=
meged2
[(
meged2
[
'DIFF'
]
==
0
)
|
((
meged2
[
'DIFF'
]
!=
0
)
&
(
meged2
[
'LISTA_DIFF'
]
.
apply
(
len
)
>
0
))]
if
meged2
.
empty
:
if
meged2
.
empty
:
pass
pass
elif
self
.
exclude_pivot
:
elif
self
.
exclude_pivot
:
...
@@ -215,7 +214,12 @@ class MatchAndExcludeRecordsAction(ActionInterface):
...
@@ -215,7 +214,12 @@ class MatchAndExcludeRecordsAction(ActionInterface):
if
meged2
[
'INTER_PIVOT_ID'
]
.
dtype
==
'int64'
:
if
meged2
[
'INTER_PIVOT_ID'
]
.
dtype
==
'int64'
:
merged_df
[
'INTER_PIVOT_ID'
]
=
merged_df
[
'INTER_PIVOT_ID'
]
.
apply
(
lambda
x
:
[
x
])
.
astype
(
'object'
)
merged_df
[
'INTER_PIVOT_ID'
]
=
merged_df
[
'INTER_PIVOT_ID'
]
.
apply
(
lambda
x
:
[
x
])
.
astype
(
'object'
)
self
.
output
=
meged2
return
meged2
except
TimeoutError
as
e
:
raise
TimeoutError
(
f
"Tiempo límite superado. {e}"
)
self
.
output
=
__process
(
source_obs
)
def
response
(
self
):
def
response
(
self
):
return
self
.
output
return
self
.
output
...
@@ -251,91 +255,57 @@ class MatchAndExcludeRecordsAction(ActionInterface):
...
@@ -251,91 +255,57 @@ class MatchAndExcludeRecordsAction(ActionInterface):
def
custom_func
(
group
,
amount_field
,
id_field
,
max_combinations
):
def
custom_func
(
group
,
amount_field
,
id_field
,
max_combinations
):
diff
=
group
[
"DIFF"
]
.
values
[
0
]
diff
=
int
(
group
[
"DIFF"
]
.
values
[
0
]
*
(
10
**
ROUND_DECIMAL
))
if
pd
.
isna
(
diff
)
or
diff
==
0
:
if
pd
.
isna
(
diff
)
or
diff
==
0
:
return
None
return
None
group
=
group
[
group
[
EXCLUDE_ROWS_FIELD
]
==
'S'
]
group
=
group
[
group
[
EXCLUDE_ROWS_FIELD
]
==
'S'
]
group
[
amount_field
]
=
group
[
amount_field
]
.
astype
(
float
)
group
[
amount_field
]
=
group
[
amount_field
]
.
astype
(
float
)
group
=
group
.
reset_index
(
drop
=
True
)
group
=
group
.
reset_index
(
drop
=
True
)
values
=
group
[
amount_field
]
.
values
values
=
group
[
amount_field
]
.
values
values
*=
(
10
**
ROUND_DECIMAL
)
values
=
values
.
astype
(
np
.
int64
)
ids
=
group
[
id_field
]
.
values
ids
=
group
[
id_field
]
.
values
tam
=
len
(
values
)
tam
=
tam
if
tam
<=
max_combinations
else
max_combinations
n
=
len
(
values
)
result
=
subset_sum_iter
(
values
,
diff
,
tam
)
valores1
=
encontrar_comb_1
(
values
,
diff
)
indices
=
ids
[
np
.
isin
(
values
,
result
)]
if
valores1
[
0
]
!=
-
1
:
indices
=
ids
[
valores1
]
return
indices
valores2
=
encontrar_comb_2
(
values
,
diff
,
n
)
if
valores2
[
0
]
!=
-
1
:
indices
=
ids
[
valores2
]
return
indices
# Iterar sobre todos los índices posibles
# valores4 = encontrar_comb_4(values, diff, n)
# if valores4[0] != -1:
# indices = ids[valores4]
# return indices
valores5
=
encontrar_comb_5
(
values
,
diff
,
n
)
if
valores5
[
0
]
!=
-
1
:
indices
=
ids
[
valores5
]
return
indices
return
indices
@
njit
def
subset_sum_iter
(
numbers
,
target
,
num_elements
):
def
encontrar_comb_1
(
valores
,
target
):
indice
=
[
-
1
]
# Initialize solutions list
for
idx
,
value
in
enumerate
(
valores
):
solutions
=
[]
suma
=
value
for
step
in
range
(
1
,
num_elements
+
1
):
if
round
(
suma
,
ROUND_DECIMAL
)
==
target
:
# Build first index by taking the first num_elements from the numbers
indice
=
[
idx
for
idx
,
val
in
enumerate
(
valores
)
if
val
in
[
value
]]
indices
=
list
(
range
(
step
))
return
indice
solution
=
[
numbers
[
i
]
for
i
in
indices
]
if
sum
(
solution
)
==
target
:
return
indice
solutions
.
append
(
solution
)
@
njit
# We iterate over the rest of the indices until we have tried all combinations
def
encontrar_comb_2
(
valores
,
target
,
n
):
while
True
:
indice
=
[
-
1
]
for
i
in
range
(
step
):
for
i
in
range
(
n
):
if
indices
[
i
]
!=
i
+
len
(
numbers
)
-
step
:
array_except
=
np
.
delete
(
valores
,
i
)
break
for
idx
,
value
in
enumerate
(
array_except
):
else
:
suma
=
value
+
valores
[
i
]
# No combinations left
if
round
(
suma
,
ROUND_DECIMAL
)
==
target
:
break
indice
=
[
idx
for
idx
,
val
in
enumerate
(
valores
)
if
val
in
[
value
,
valores
[
i
]]]
return
indice
# Increase current index and all its following ones
indices
[
i
]
+=
1
return
indice
for
j
in
range
(
i
+
1
,
step
):
indices
[
j
]
=
indices
[
j
-
1
]
+
1
@
njit
def
encontrar_comb_4
(
valores
,
target
,
n
):
# Check current solution
indice
=
[
-
1
]
solution
=
[
numbers
[
i
]
for
i
in
indices
]
for
i
in
range
(
n
):
if
round
(
sum
(
solution
),
ROUND_DECIMAL
)
==
target
:
a1
=
np
.
delete
(
valores
,
i
)
solutions
.
append
(
solution
)
for
j
in
range
(
len
(
a1
)):
break
a2
=
np
.
delete
(
a1
,
j
)
if
len
(
solutions
)
>
0
:
for
k
in
range
(
len
(
a2
)):
solutions
=
solutions
[
0
]
array_except
=
np
.
delete
(
a2
,
k
)
break
for
idx
,
value
in
enumerate
(
array_except
):
suma
=
value
+
valores
[
i
]
+
a1
[
j
]
+
a2
[
k
]
return
solutions
if
round
(
suma
,
ROUND_DECIMAL
)
==
target
:
indice
=
[
idx
for
idx
,
val
in
enumerate
(
valores
)
if
val
in
[
value
,
valores
[
i
],
a1
[
j
],
a2
[
k
]]]
return
indice
return
indice
@
njit
def
encontrar_comb_5
(
valores
,
target
,
n
):
indice
=
[
-
1
]
for
i
in
range
(
n
):
a1
=
np
.
delete
(
valores
,
i
)
for
j
in
range
(
len
(
a1
)):
a2
=
np
.
delete
(
a1
,
j
)
for
k
in
range
(
len
(
a2
)):
a3
=
np
.
delete
(
a2
,
k
)
for
l
in
range
(
len
(
a3
)):
array_except
=
np
.
delete
(
a2
,
l
)
for
idx
,
value
in
enumerate
(
array_except
):
suma
=
value
+
valores
[
i
]
+
a1
[
j
]
+
a2
[
k
]
+
a3
[
l
]
if
round
(
suma
,
ROUND_DECIMAL
)
==
target
:
indice
=
[
idx
for
idx
,
val
in
enumerate
(
valores
)
if
val
in
[
value
,
valores
[
i
],
a1
[
j
],
a2
[
k
],
a3
[
l
]]]
return
indice
return
indice
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment