Commit 4f9592e7 authored by cristian Quezada's avatar cristian Quezada

Actualizando modelos

parent 8ba99638
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"## "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import csv\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"file = open('vocabulary.txt','w') \n",
"with open('data/train.tsv',encoding='utf-8') as tsvfile:\n",
" reader = csv.DictReader(tsvfile, dialect='excel-tab')\n",
" for row in reader:\n",
" #print(row['sentence'])\n",
" file.write(row['sentence']+'\\n')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"file.close()"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Para el de español\n",
"# cambiar path\n",
"file = open('vocabulary_es.txt','w') \n",
"with open('D:/SpeechServices/data/spanish-single-speaker-speech-dataset/transcript.txt',encoding='utf-8') as txtfile:\n",
" for line in txtfile:\n",
" file.write(line.split('|')[1])\n",
"file.close()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# pruebas\n",
"string = '19demarzo/19demarzo_0000.wav|Durante nuestra conversación advertí que la multitud aumentaba, apretándose más.|Durante nuestra conversación advertí que la multitud aumentaba, apretándose más.|5.88'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Durante nuestra conversación advertí que la multitud aumentaba, apretándose más.'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"string.split('|')[1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"## Archivo train"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"path ='D:/SpeechServices/data/spanish-single-speaker-speech-dataset/'\n",
"file = open('trains.csv','w') \n",
"with open(path+'transcript.txt',encoding='utf-8') as txtfile:\n",
" for line in txtfile:\n",
" cols = line.split('|')\n",
" tamanio = os.path.getsize(path+cols[0])\n",
" file.write(cols[0]+','+str(tamanio)+','+cols[1]+'\\n')\n",
"file.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Split data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"with open(path+'transcript.txt',encoding='utf-8') as txtfile:\n",
" data = txtfile.read()\n",
" data = data.split('\\n')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'19demarzo/19demarzo_0155.wav|fue preciso que pasaran catorce años para que Pujitos entrara con distinto nombre en el uso pleno de sus extraordinarias facultades.|fue preciso que pasaran catorce años para que Pujitos entrara con distinto nombre en el uso pleno de sus extraordinarias facultades.|8.92'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[155]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"## shuffle\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"random.shuffle(data)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'19demarzo/19demarzo_1670.wav|-Ella es habilidosilla -afirmó Restituta-, y sabe coser; sólo que le falta voluntad.|-Ella es habilidosilla -afirmó Restituta-, y sabe coser; sólo que le falta voluntad.|6.58'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[155]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"n = len(data)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"11110"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7777"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# train 70% , dev 20%, test 10%\n",
"n_train = 70*n//100\n",
"n_train"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2222"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n_dev = 20*n//100\n",
"n_dev"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1111"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n_test = 10*n//100\n",
"n_test"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"step = 0\n",
"file = open('train_es.csv','w') \n",
"file.write('wav_filename,wav_filesize,transcript'+'\\n')\n",
"for i in range(n_train):\n",
" cols = data[step].replace(',',';').split('|')\n",
" tamanio = os.path.getsize(path+cols[0])\n",
" file.write(cols[0]+','+str(tamanio)+','+cols[1]+'\\n')\n",
" step = step + 1\n",
"file.close()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7777"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"step"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"file = open('dev_es.csv','w') \n",
"file.write('wav_filenam,wav_filesize,transcript'+'\\n')\n",
"for i in range(n_dev):\n",
" cols = data[step].replace(',',';').split('|')\n",
" tamanio = os.path.getsize(path+cols[0])\n",
" file.write(cols[0]+','+str(tamanio)+','+cols[1]+'\\n')\n",
" step = step + 1\n",
"file.close()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"file = open('test_es.csv','w') \n",
"file.write('wav_filename,wav_filesize,transcript'+'\\n')\n",
"for i in range(n_test):\n",
" cols = data[step].replace(',',';').split('|')\n",
" tamanio = os.path.getsize(path+cols[0])\n",
" file.write(cols[0]+','+str(tamanio)+','+cols[1]+'\\n')\n",
" step = step + 1\n",
"file.close()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# comprobar en pandas\n",
"import pandas"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>wav_filename</th>\n",
" <th>wav_filesize</th>\n",
" <th>transcript</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1819.wav</td>\n",
" <td>702298</td>\n",
" <td>un lecho cuyo dosel sostenían torneadas column...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>batalla_arapiles/batalla_arapiles_3356.wav</td>\n",
" <td>865098</td>\n",
" <td>el amor que me ha mostrado; la confianza que h...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>batalla_arapiles/batalla_arapiles_0376.wav</td>\n",
" <td>647738</td>\n",
" <td>-Usted; retirado del mundo; vive de un modo be...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bailen/bailen_2878.wav</td>\n",
" <td>848378</td>\n",
" <td>albergues de otras tantas familias. Peor que m...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1084.wav</td>\n",
" <td>594058</td>\n",
" <td>La aldeana no entendía el castellano corrompid...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>bailen/bailen_3110.wav</td>\n",
" <td>554458</td>\n",
" <td>que a los invasores les era de todo punto impo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>19demarzo/19demarzo_2047.wav</td>\n",
" <td>597578</td>\n",
" <td>Efectivamente; el Príncipe no parecía por ning...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>batalla_arapiles/batalla_arapiles_0483.wav</td>\n",
" <td>346778</td>\n",
" <td>de dormir en campo raso y comer berraza y jara...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>bailen/bailen_1209.wav</td>\n",
" <td>645978</td>\n",
" <td>No olvides que esta tarde tienes que pasar por...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>bailen/bailen_1064.wav</td>\n",
" <td>954858</td>\n",
" <td>hacían visajes de distintas formas; de colosal...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>batalla_arapiles/batalla_arapiles_0536.wav</td>\n",
" <td>441818</td>\n",
" <td>Señora Juan de Dios -dije condolido en extremo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>19demarzo/19demarzo_2826.wav</td>\n",
" <td>468218</td>\n",
" <td>¿no le rebosa el corazón de alegría; no quiere...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>bailen/bailen_1661.wav</td>\n",
" <td>833418</td>\n",
" <td>El mismo día 22 encontré a Santorcaz puesto ya...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1238.wav</td>\n",
" <td>726058</td>\n",
" <td>tomando por atajos y vericuetos hasta llegar a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>19demarzo/19demarzo_2684.wav</td>\n",
" <td>363498</td>\n",
" <td>-Es Vd. el señor chantre de la catedral de Ast...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>batalla_arapiles/batalla_arapiles_2880.wav</td>\n",
" <td>620458</td>\n",
" <td>-No la separarás de mí sino matándola; ruin y ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>batalla_arapiles/batalla_arapiles_0489.wav</td>\n",
" <td>931098</td>\n",
" <td>«Señor; te aborreceré; te negaré si no me la d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1432.wav</td>\n",
" <td>403098</td>\n",
" <td>-Esto que hoy he traído; es porque como venía ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>batalla_arapiles/batalla_arapiles_3391.wav</td>\n",
" <td>704058</td>\n",
" <td>Entre las dos debía desarrollarse al día sigui...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>bailen/bailen_0160.wav</td>\n",
" <td>745418</td>\n",
" <td>pero su merced no se impaciente; porque aunque...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>batalla_arapiles/batalla_arapiles_0857.wav</td>\n",
" <td>764778</td>\n",
" <td>casi diré de artista y de viajera. Las costumb...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>batalla_arapiles/batalla_arapiles_3275.wav</td>\n",
" <td>564138</td>\n",
" <td>Vos también dais en creer; como los demás; que...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1972.wav</td>\n",
" <td>656538</td>\n",
" <td>y recriminándose el uno al otro. -Os juro que ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>bailen/bailen_2448.wav</td>\n",
" <td>791178</td>\n",
" <td>individuo cuya personalidad tenía acabado comp...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>19demarzo/19demarzo_0467.wav</td>\n",
" <td>771818</td>\n",
" <td>El ardid salió bien; porque la turba destrozó ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1786.wav</td>\n",
" <td>880058</td>\n",
" <td>Arrojose a mis plantas y tres veces; tres vece...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>batalla_arapiles/batalla_arapiles_0401.wav</td>\n",
" <td>735738</td>\n",
" <td>Aquella pobre joven tan buena; tan bonita; tan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>batalla_arapiles/batalla_arapiles_3208.wav</td>\n",
" <td>492858</td>\n",
" <td>-A quien amas; padre -añadió la muchacha rodea...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>bailen/bailen_2945.wav</td>\n",
" <td>1035818</td>\n",
" <td>Según me dijo; el mismo día 3 de Mayo se prese...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>19demarzo/19demarzo_2777.wav</td>\n",
" <td>374058</td>\n",
" <td>-¿Pero dónde está Inés? -exclamé con exaltació...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1081</th>\n",
" <td>batalla_arapiles/batalla_arapiles_4287.wav</td>\n",
" <td>733978</td>\n",
" <td>Llevadme; llevadme con vos; señora; a los espa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1082</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1391.wav</td>\n",
" <td>779738</td>\n",
" <td>¡Cuánto trabajo para encontrar al Señora Molic...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1083</th>\n",
" <td>bailen/bailen_2461.wav</td>\n",
" <td>1082458</td>\n",
" <td>pero la señora anciana; más despabilada y locu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1084</th>\n",
" <td>batalla_arapiles/batalla_arapiles_2666.wav</td>\n",
" <td>759498</td>\n",
" <td>La tenacidad; que a veces es en la guerra una ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1085</th>\n",
" <td>bailen/bailen_1657.wav</td>\n",
" <td>991818</td>\n",
" <td>va a concluir; y los notables y jamás vistos s...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1086</th>\n",
" <td>batalla_arapiles/batalla_arapiles_2809.wav</td>\n",
" <td>827258</td>\n",
" <td>Mi general -dije al conde; abriéndome paso ent...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1087</th>\n",
" <td>batalla_arapiles/batalla_arapiles_0224.wav</td>\n",
" <td>829018</td>\n",
" <td>y pude saber que el antiguo comisario de polic...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1088</th>\n",
" <td>19demarzo/19demarzo_1088.wav</td>\n",
" <td>748938</td>\n",
" <td>Todo un arsenal de herramientas no habría bast...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1089</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1389.wav</td>\n",
" <td>653018</td>\n",
" <td>-El vino de la tía Fabiana no puede ser mejor ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1090</th>\n",
" <td>bailen/bailen_1195.wav</td>\n",
" <td>870378</td>\n",
" <td>¡Pero que veo; santos cielos! Este sí que es n...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1091</th>\n",
" <td>19demarzo/19demarzo_2603.wav</td>\n",
" <td>827258</td>\n",
" <td>los cuales son gente muy alborotada. -¿Y qué d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1092</th>\n",
" <td>19demarzo/19demarzo_1764.wav</td>\n",
" <td>681178</td>\n",
" <td>-Cuando yo le doy a Vd. mi palabra de que esas...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1093</th>\n",
" <td>batalla_arapiles/batalla_arapiles_3992.wav</td>\n",
" <td>691738</td>\n",
" <td>a esta alma caritativa que me recogió sin duda...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1094</th>\n",
" <td>batalla_arapiles/batalla_arapiles_2468.wav</td>\n",
" <td>544778</td>\n",
" <td>Por bien que salga el Emperador de esa campaña...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1095</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1922.wav</td>\n",
" <td>875658</td>\n",
" <td>que producen las grandezas de la Naturaleza cu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1096</th>\n",
" <td>bailen/bailen_2437.wav</td>\n",
" <td>183098</td>\n",
" <td>Fue en la batalla de Austerlitz:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1097</th>\n",
" <td>bailen/bailen_1935.wav</td>\n",
" <td>827258</td>\n",
" <td>¿La batalla se perdería? Los franceses; destro...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1098</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1568.wav</td>\n",
" <td>580858</td>\n",
" <td>-En una palabra; señor mío -dijo con impacienc...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1099</th>\n",
" <td>batalla_arapiles/batalla_arapiles_2407.wav</td>\n",
" <td>558858</td>\n",
" <td>-Porque los ingleses entrarán en Salamanca -di...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1100</th>\n",
" <td>19demarzo/19demarzo_0956.wav</td>\n",
" <td>699658</td>\n",
" <td>era preciso hacer un cálculo matemático para s...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1101</th>\n",
" <td>batalla_arapiles/batalla_arapiles_0810.wav</td>\n",
" <td>711978</td>\n",
" <td>añadió la Forfolleda que no era conveniente; p...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1102</th>\n",
" <td>bailen/bailen_2154.wav</td>\n",
" <td>778858</td>\n",
" <td>Amaranta estaba abrumadoramente hermosa; y sus...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1103</th>\n",
" <td>19demarzo/19demarzo_2032.wav</td>\n",
" <td>763018</td>\n",
" <td>se acercó a mí y me dijo: -Gabriel; ¿no te ent...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1104</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1156.wav</td>\n",
" <td>557978</td>\n",
" <td>-¡Y yo salí sin recoger esa carta! -exclamé co...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1105</th>\n",
" <td>batalla_arapiles/batalla_arapiles_0438.wav</td>\n",
" <td>726938</td>\n",
" <td>asistí como fámulo a los pobres de la enfermer...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1106</th>\n",
" <td>batalla_arapiles/batalla_arapiles_4593.wav</td>\n",
" <td>721658</td>\n",
" <td>tuve y tengo un ejército brillante de descendi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1107</th>\n",
" <td>19demarzo/19demarzo_1868.wav</td>\n",
" <td>714618</td>\n",
" <td>Cuando él lea esta carta... pero esto es un se...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1108</th>\n",
" <td>19demarzo/19demarzo_1442.wav</td>\n",
" <td>650378</td>\n",
" <td>Nunca voy a bailes ni a tertulias; y con tan u...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1109</th>\n",
" <td>batalla_arapiles/batalla_arapiles_1260.wav</td>\n",
" <td>673258</td>\n",
" <td>¡Pobre Molichard; tan borracho y tan bueno! Ci...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1110</th>\n",
" <td>bailen/bailen_2361.wav</td>\n",
" <td>641578</td>\n",
" <td>donde entrará D. Gregorio de la Cuesta; el cua...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1111 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" wav_filename wav_filesize \\\n",
"0 batalla_arapiles/batalla_arapiles_1819.wav 702298 \n",
"1 batalla_arapiles/batalla_arapiles_3356.wav 865098 \n",
"2 batalla_arapiles/batalla_arapiles_0376.wav 647738 \n",
"3 bailen/bailen_2878.wav 848378 \n",
"4 batalla_arapiles/batalla_arapiles_1084.wav 594058 \n",
"5 bailen/bailen_3110.wav 554458 \n",
"6 19demarzo/19demarzo_2047.wav 597578 \n",
"7 batalla_arapiles/batalla_arapiles_0483.wav 346778 \n",
"8 bailen/bailen_1209.wav 645978 \n",
"9 bailen/bailen_1064.wav 954858 \n",
"10 batalla_arapiles/batalla_arapiles_0536.wav 441818 \n",
"11 19demarzo/19demarzo_2826.wav 468218 \n",
"12 bailen/bailen_1661.wav 833418 \n",
"13 batalla_arapiles/batalla_arapiles_1238.wav 726058 \n",
"14 19demarzo/19demarzo_2684.wav 363498 \n",
"15 batalla_arapiles/batalla_arapiles_2880.wav 620458 \n",
"16 batalla_arapiles/batalla_arapiles_0489.wav 931098 \n",
"17 batalla_arapiles/batalla_arapiles_1432.wav 403098 \n",
"18 batalla_arapiles/batalla_arapiles_3391.wav 704058 \n",
"19 bailen/bailen_0160.wav 745418 \n",
"20 batalla_arapiles/batalla_arapiles_0857.wav 764778 \n",
"21 batalla_arapiles/batalla_arapiles_3275.wav 564138 \n",
"22 batalla_arapiles/batalla_arapiles_1972.wav 656538 \n",
"23 bailen/bailen_2448.wav 791178 \n",
"24 19demarzo/19demarzo_0467.wav 771818 \n",
"25 batalla_arapiles/batalla_arapiles_1786.wav 880058 \n",
"26 batalla_arapiles/batalla_arapiles_0401.wav 735738 \n",
"27 batalla_arapiles/batalla_arapiles_3208.wav 492858 \n",
"28 bailen/bailen_2945.wav 1035818 \n",
"29 19demarzo/19demarzo_2777.wav 374058 \n",
"... ... ... \n",
"1081 batalla_arapiles/batalla_arapiles_4287.wav 733978 \n",
"1082 batalla_arapiles/batalla_arapiles_1391.wav 779738 \n",
"1083 bailen/bailen_2461.wav 1082458 \n",
"1084 batalla_arapiles/batalla_arapiles_2666.wav 759498 \n",
"1085 bailen/bailen_1657.wav 991818 \n",
"1086 batalla_arapiles/batalla_arapiles_2809.wav 827258 \n",
"1087 batalla_arapiles/batalla_arapiles_0224.wav 829018 \n",
"1088 19demarzo/19demarzo_1088.wav 748938 \n",
"1089 batalla_arapiles/batalla_arapiles_1389.wav 653018 \n",
"1090 bailen/bailen_1195.wav 870378 \n",
"1091 19demarzo/19demarzo_2603.wav 827258 \n",
"1092 19demarzo/19demarzo_1764.wav 681178 \n",
"1093 batalla_arapiles/batalla_arapiles_3992.wav 691738 \n",
"1094 batalla_arapiles/batalla_arapiles_2468.wav 544778 \n",
"1095 batalla_arapiles/batalla_arapiles_1922.wav 875658 \n",
"1096 bailen/bailen_2437.wav 183098 \n",
"1097 bailen/bailen_1935.wav 827258 \n",
"1098 batalla_arapiles/batalla_arapiles_1568.wav 580858 \n",
"1099 batalla_arapiles/batalla_arapiles_2407.wav 558858 \n",
"1100 19demarzo/19demarzo_0956.wav 699658 \n",
"1101 batalla_arapiles/batalla_arapiles_0810.wav 711978 \n",
"1102 bailen/bailen_2154.wav 778858 \n",
"1103 19demarzo/19demarzo_2032.wav 763018 \n",
"1104 batalla_arapiles/batalla_arapiles_1156.wav 557978 \n",
"1105 batalla_arapiles/batalla_arapiles_0438.wav 726938 \n",
"1106 batalla_arapiles/batalla_arapiles_4593.wav 721658 \n",
"1107 19demarzo/19demarzo_1868.wav 714618 \n",
"1108 19demarzo/19demarzo_1442.wav 650378 \n",
"1109 batalla_arapiles/batalla_arapiles_1260.wav 673258 \n",
"1110 bailen/bailen_2361.wav 641578 \n",
"\n",
" transcript \n",
"0 un lecho cuyo dosel sostenían torneadas column... \n",
"1 el amor que me ha mostrado; la confianza que h... \n",
"2 -Usted; retirado del mundo; vive de un modo be... \n",
"3 albergues de otras tantas familias. Peor que m... \n",
"4 La aldeana no entendía el castellano corrompid... \n",
"5 que a los invasores les era de todo punto impo... \n",
"6 Efectivamente; el Príncipe no parecía por ning... \n",
"7 de dormir en campo raso y comer berraza y jara... \n",
"8 No olvides que esta tarde tienes que pasar por... \n",
"9 hacían visajes de distintas formas; de colosal... \n",
"10 Señora Juan de Dios -dije condolido en extremo... \n",
"11 ¿no le rebosa el corazón de alegría; no quiere... \n",
"12 El mismo día 22 encontré a Santorcaz puesto ya... \n",
"13 tomando por atajos y vericuetos hasta llegar a... \n",
"14 -Es Vd. el señor chantre de la catedral de Ast... \n",
"15 -No la separarás de mí sino matándola; ruin y ... \n",
"16 «Señor; te aborreceré; te negaré si no me la d... \n",
"17 -Esto que hoy he traído; es porque como venía ... \n",
"18 Entre las dos debía desarrollarse al día sigui... \n",
"19 pero su merced no se impaciente; porque aunque... \n",
"20 casi diré de artista y de viajera. Las costumb... \n",
"21 Vos también dais en creer; como los demás; que... \n",
"22 y recriminándose el uno al otro. -Os juro que ... \n",
"23 individuo cuya personalidad tenía acabado comp... \n",
"24 El ardid salió bien; porque la turba destrozó ... \n",
"25 Arrojose a mis plantas y tres veces; tres vece... \n",
"26 Aquella pobre joven tan buena; tan bonita; tan... \n",
"27 -A quien amas; padre -añadió la muchacha rodea... \n",
"28 Según me dijo; el mismo día 3 de Mayo se prese... \n",
"29 -¿Pero dónde está Inés? -exclamé con exaltació... \n",
"... ... \n",
"1081 Llevadme; llevadme con vos; señora; a los espa... \n",
"1082 ¡Cuánto trabajo para encontrar al Señora Molic... \n",
"1083 pero la señora anciana; más despabilada y locu... \n",
"1084 La tenacidad; que a veces es en la guerra una ... \n",
"1085 va a concluir; y los notables y jamás vistos s... \n",
"1086 Mi general -dije al conde; abriéndome paso ent... \n",
"1087 y pude saber que el antiguo comisario de polic... \n",
"1088 Todo un arsenal de herramientas no habría bast... \n",
"1089 -El vino de la tía Fabiana no puede ser mejor ... \n",
"1090 ¡Pero que veo; santos cielos! Este sí que es n... \n",
"1091 los cuales son gente muy alborotada. -¿Y qué d... \n",
"1092 -Cuando yo le doy a Vd. mi palabra de que esas... \n",
"1093 a esta alma caritativa que me recogió sin duda... \n",
"1094 Por bien que salga el Emperador de esa campaña... \n",
"1095 que producen las grandezas de la Naturaleza cu... \n",
"1096 Fue en la batalla de Austerlitz: \n",
"1097 ¿La batalla se perdería? Los franceses; destro... \n",
"1098 -En una palabra; señor mío -dijo con impacienc... \n",
"1099 -Porque los ingleses entrarán en Salamanca -di... \n",
"1100 era preciso hacer un cálculo matemático para s... \n",
"1101 añadió la Forfolleda que no era conveniente; p... \n",
"1102 Amaranta estaba abrumadoramente hermosa; y sus... \n",
"1103 se acercó a mí y me dijo: -Gabriel; ¿no te ent... \n",
"1104 -¡Y yo salí sin recoger esa carta! -exclamé co... \n",
"1105 asistí como fámulo a los pobres de la enfermer... \n",
"1106 tuve y tengo un ejército brillante de descendi... \n",
"1107 Cuando él lea esta carta... pero esto es un se... \n",
"1108 Nunca voy a bailes ni a tertulias; y con tan u... \n",
"1109 ¡Pobre Molichard; tan borracho y tan bueno! Ci... \n",
"1110 donde entrará D. Gregorio de la Cuesta; el cua... \n",
"\n",
"[1111 rows x 3 columns]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pandas.read_csv('test_es.csv',encoding='ISO-8859-1')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
......@@ -2,7 +2,7 @@
git : [DeepSpeech](https://github.com/mozilla/DeepSpeech)
## Requisitos:
* Python 3.6
* Python 3.5 o 3.6
* Linux o Mac
Algunos comandos:
......
File added
......@@ -12,6 +12,8 @@ f
g
h
i
j
k
l
m
n
......@@ -23,5 +25,9 @@ s
t
u
v
w
x
y
z
'
# The last (non-comment) line needs to end with a newline.
File added
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -15,7 +15,8 @@
"metadata": {},
"outputs": [],
"source": [
"import csv"
"import csv\n",
"import re"
]
},
{
......@@ -43,7 +44,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -52,7 +53,8 @@
"file = open('vocabulary_es.txt','w') \n",
"with open('D:/SpeechServices/data/spanish-single-speaker-speech-dataset/transcript.txt',encoding='utf-8') as txtfile:\n",
" for line in txtfile:\n",
" file.write(line.split('|')[1])\n",
" text = re.sub('\\W+', '', line.split('|')[1])\n",
" file.write(text)\n",
"file.close()"
]
},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment