Commit 8ebc05c1 authored by Cristian Aguirre's avatar Cristian Aguirre

Outputs can be saved as excels files. "xlsx" formats in file_type

parent 91b0963a
...@@ -2,6 +2,8 @@ from typing import Any, Dict ...@@ -2,6 +2,8 @@ from typing import Any, Dict
import os import os
import json import json
import pandas as pd import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from airflow.utils.task_group import TaskGroup from airflow.utils.task_group import TaskGroup
from airflow.operators.python import PythonOperator from airflow.operators.python import PythonOperator
from airflow.models import Variable from airflow.models import Variable
...@@ -134,6 +136,7 @@ def generate_and_deploy(command: str, intern_conn, params: Dict[str, Any], timez ...@@ -134,6 +136,7 @@ def generate_and_deploy(command: str, intern_conn, params: Dict[str, Any], timez
iterator = get_iterator(tablename, chunksize, engine) iterator = get_iterator(tablename, chunksize, engine)
iterator = iterator["iterator"] iterator = iterator["iterator"]
logger.info(f"Total de pasos para generar archivo resultado: {steps}") logger.info(f"Total de pasos para generar archivo resultado: {steps}")
workbook = None
for step in range(steps): for step in range(steps):
logger.debug(f"STEP: {step}") logger.debug(f"STEP: {step}")
header = True if step == 0 else False header = True if step == 0 else False
...@@ -151,11 +154,16 @@ def generate_and_deploy(command: str, intern_conn, params: Dict[str, Any], timez ...@@ -151,11 +154,16 @@ def generate_and_deploy(command: str, intern_conn, params: Dict[str, Any], timez
if header: if header:
with pd.ExcelWriter(tmp_file, engine="openpyxl", mode='w') as writer: with pd.ExcelWriter(tmp_file, engine="openpyxl", mode='w') as writer:
dataframe.to_excel(writer, index=False, header=True) dataframe.to_excel(writer, index=False, header=True)
workbook = load_workbook(tmp_file)
sheet = workbook['Sheet1']
else: else:
with pd.ExcelWriter(tmp_file, engine="openpyxl", mode='a', if_sheet_exists='overlay') as writer: for row in dataframe_to_rows(dataframe, header=False, index=False):
dataframe.to_excel(writer, index=False, header=False) sheet.append(row)
except StopIteration: except StopIteration:
break break
if not isinstance(workbook, type(None)):
workbook.save(tmp_file)
workbook.close()
list_outputs = params["s3_params"] list_outputs = params["s3_params"]
size = os.path.getsize(tmp_file) size = os.path.getsize(tmp_file)
for output in list_outputs: for output in list_outputs:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment