Runtime error
Runtime error
Browse files
@@ -0,0 +1,154 @@
1 |
import gradio as gr
2 |
import nltk
3 |
from nltk.corpus import stopwords, cess_esp, conll2002
4 |
from nltk.tokenize import word_tokenize
5 |
import stylecloud
6 |
import matplotlib.pyplot as plt
7 |
from fpdf import FPDF
8 |
import re
9 |
from collections import Counter
10 |
import spacy
11 |
import os
12 |
import random
13 |
import string
14 |
import csv
15 |
16 |
# Descargar recursos necesarios de nltk
17 |
18 |
19 |
20 |
21 |
22 |
# Cargar el modelo de spaCy para español
23 |
nlp = spacy.load('es_core_news_md')
24 |
25 |
# Lista de stopwords adicional
26 |
additional_stopwords = [
27 |
# Aquí va tu lista de stopwords adicional...
28 |
29 |
30 |
# Función de preprocesamiento
31 |
def preprocess_text(text):
32 |
text = text.lower()
33 |
text = re.sub(r'\W', ' ', text)
34 |
doc = nlp(text)
35 |
stop_words = set(stopwords.words('spanish')).union(set(additional_stopwords))
36 |
filtered_words = []
37 |
for token in doc:
38 |
if token.text not in stop_words:
39 |
if token.pos_ in ['VERB', 'ADJ', 'NOUN']: # Considerar verbos, adjetivos y sustantivos
40 |
41 |
elif token.pos_ == 'NUM': # Ignorar números
42 |
43 |
44 |
# Ignorar adverbios y otras categorías gramaticales
45 |
if token.pos_ not in ['ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'PRON', 'SCONJ', 'SYM', 'X']:
46 |
47 |
return filtered_words
48 |
49 |
# Obtener oraciones de ejemplo de múltiples corpus de nltk
50 |
def get_example_sentences(word, num_sentences=1):
51 |
sentences = []
52 |
for corpus in [cess_esp, conll2002]:
53 |
for sent in corpus.sents():
54 |
if word in sent and len(word) > 1:
55 |
sentences.append(' '.join(sent))
56 |
if len(sentences) >= num_sentences:
57 |
58 |
if len(sentences) >= num_sentences:
59 |
60 |
return sentences
61 |
62 |
# Función para generar la nube de palabras con estilo aleatorio
63 |
def generate_random_style_cloud(words, filename):
64 |
text = ' '.join(words)
65 |
icons = ['fas fa-cloud', 'fas fa-star', 'fas fa-heart', 'fas fa-tree', 'fas fa-sun', 'fas fa-moon']
66 |
random_icon = random.choice(icons)
67 |
stylecloud.gen_stylecloud(text=text, icon_name=random_icon, output_name=filename)
68 |
img = plt.imread(filename)
69 |
70 |
71 |
plt.savefig(filename, bbox_inches='tight')
72 |
return filename
73 |
74 |
# Crear el documento PDF
75 |
class PDF(FPDF):
76 |
def header(self):
77 |
self.set_fill_color(200, 220, 255)
78 |
self.rect(0, 0, 10, 297, 'F')
79 |
self.rect(200, 0, 10, 297, 'F')
80 |
81 |
def footer(self):
82 |
83 |
self.set_font('Arial', 'I', 8)
84 |
self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
85 |
86 |
def add_text_to_pdf(pdf, text, title):
87 |
filtered_words = preprocess_text(text)
88 |
word_freq = Counter(filtered_words)
89 |
word_freq_file = f"word_freq_{title}.csv"
90 |
91 |
with open(word_freq_file, 'w') as f:
92 |
writer = csv.writer(f)
93 |
writer.writerow(['word', 'frequency'])
94 |
for word, freq in word_freq.items():
95 |
writer.writerow([word, freq])
96 |
97 |
cloud_filename = f'wordcloud_{title}.png'
98 |
generate_random_style_cloud(filtered_words, cloud_filename)
99 |
100 |
101 |
pdf.set_font('Arial', 'B', 16)
102 |
pdf.cell(0, 10, title, ln=True, align='C')
103 |
pdf.set_draw_color(0, 0, 0)
104 |
105 |
pdf.line(10, 25, 200, 25)
106 |
pdf.image(cloud_filename, x=15, y=30, w=180)
107 |
108 |
109 |
pdf.set_font('Arial', 'B', 16)
110 |
pdf.cell(0, 10, "Oraciones de ejemplo", ln=True, align='C')
111 |
112 |
high_freq_words = sorted([word.upper() for word, freq in word_freq.most_common(20)])
113 |
114 |
pdf.set_font('Arial', 'B', 12)
115 |
pdf.set_fill_color(200, 200, 200)
116 |
pdf.cell(90, 10, 'PALABRA', 1, fill=True)
117 |
pdf.cell(0, 10, 'ORACIÓN DE EJEMPLO', 1, fill=True)
118 |
119 |
120 |
pdf.set_font('Arial', '', 12)
121 |
122 |
for word in high_freq_words:
123 |
example_sent = get_example_sentences(word.lower())
124 |
if example_sent:
125 |
example_sentence = example_sent[0].replace(word.lower(), f'**{word}**').replace(word, f'**{word}**')
126 |
pdf.cell(90, 10, word, 1)
127 |
pdf.set_font('Arial', '', 10)
128 |
pdf.multi_cell(0, 10, example_sentence, 1)
129 |
pdf.set_font('Arial', 'I', 8)
130 |
pdf.cell(90, 10, '', 0)
131 |
pdf.cell(0, 10, 'Fuente: NLTK', 0)
132 |
pdf.set_font('Arial', '', 12)
133 |
134 |
135 |
136 |
137 |
def generar_pdf(text, title):
138 |
pdf = PDF()
139 |
add_text_to_pdf(pdf, text, title)
140 |
output_filename = 'documento.pdf'
141 |
142 |
return output_filename
143 |
144 |
# Crear la interfaz de Gradio
145 |
interface = gr.Interface(
146 |
147 |
inputs=[gr.inputs.Textbox(lines=10, placeholder="Introduce texto en español aquí..."), gr.inputs.Textbox(lines=1, placeholder="Introduce un título aquí...")],
148 |
outputs=gr.outputs.File(label="Documento PDF generado"),
149 |
title="Generador de Nubes de Palabras y Glosario en PDF",
150 |
description="Introduce un texto en español y un título para generar un documento PDF con nubes de palabras y un glosario con oraciones de ejemplo."
151 |
152 |
153 |
if __name__ == "__main__":
154 |