Spaces:

dexttttrees
/

appy.py

Runtime error

App Files Files Community

dexttttrees commited on Jul 17, 2024

Commit

99559eb

verified ·

1 Parent(s): f6d9a68

app.py

Browse files

Files changed (1) hide show

app.py +154 -0

app.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import gradio as gr
+import nltk
+from nltk.corpus import stopwords, cess_esp, conll2002
+from nltk.tokenize import word_tokenize
+import stylecloud
+import matplotlib.pyplot as plt
+from fpdf import FPDF
+import re
+from collections import Counter
+import spacy
+import os
+import random
+import string
+import csv
+# Descargar recursos necesarios de nltk
+nltk.download('punkt')
+nltk.download('stopwords')
+nltk.download('cess_esp')
+nltk.download('conll2002')
+# Cargar el modelo de spaCy para español
+nlp = spacy.load('es_core_news_md')
+# Lista de stopwords adicional
+additional_stopwords = [
+    # Aquí va tu lista de stopwords adicional...
+]
+# Función de preprocesamiento
+def preprocess_text(text):
+    text = text.lower()
+    text = re.sub(r'\W', ' ', text)
+    doc = nlp(text)
+    stop_words = set(stopwords.words('spanish')).union(set(additional_stopwords))
+    filtered_words = []
+    for token in doc:
+        if token.text not in stop_words:
+            if token.pos_ in ['VERB', 'ADJ', 'NOUN']:  # Considerar verbos, adjetivos y sustantivos
+                filtered_words.append(token.lemma_)
+            elif token.pos_ == 'NUM':  # Ignorar números
+                continue
+            else:
+                # Ignorar adverbios y otras categorías gramaticales
+                if token.pos_ not in ['ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'PRON', 'SCONJ', 'SYM', 'X']:
+                    filtered_words.append(token.text)
+    return filtered_words
+# Obtener oraciones de ejemplo de múltiples corpus de nltk
+def get_example_sentences(word, num_sentences=1):
+    sentences = []
+    for corpus in [cess_esp, conll2002]:
+        for sent in corpus.sents():
+            if word in sent and len(word) > 1:
+                sentences.append(' '.join(sent))
+            if len(sentences) >= num_sentences:
+                break
+        if len(sentences) >= num_sentences:
+            break
+    return sentences
+# Función para generar la nube de palabras con estilo aleatorio
+def generate_random_style_cloud(words, filename):
+    text = ' '.join(words)
+    icons = ['fas fa-cloud', 'fas fa-star', 'fas fa-heart', 'fas fa-tree', 'fas fa-sun', 'fas fa-moon']
+    random_icon = random.choice(icons)
+    stylecloud.gen_stylecloud(text=text, icon_name=random_icon, output_name=filename)
+    img = plt.imread(filename)
+    plt.imshow(img)
+    plt.axis('off')
+    plt.savefig(filename, bbox_inches='tight')
+    return filename
+# Crear el documento PDF
+class PDF(FPDF):
+    def header(self):
+        self.set_fill_color(200, 220, 255)
+        self.rect(0, 0, 10, 297, 'F')
+        self.rect(200, 0, 10, 297, 'F')
+    def footer(self):
+        self.set_y(-15)
+        self.set_font('Arial', 'I', 8)
+        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
+def add_text_to_pdf(pdf, text, title):
+    filtered_words = preprocess_text(text)
+    word_freq = Counter(filtered_words)
+    word_freq_file = f"word_freq_{title}.csv"
+    with open(word_freq_file, 'w') as f:
+        writer = csv.writer(f)
+        writer.writerow(['word', 'frequency'])
+        for word, freq in word_freq.items():
+            writer.writerow([word, freq])
+    cloud_filename = f'wordcloud_{title}.png'
+    generate_random_style_cloud(filtered_words, cloud_filename)
+    pdf.add_page()
+    pdf.set_font('Arial', 'B', 16)
+    pdf.cell(0, 10, title, ln=True, align='C')
+    pdf.set_draw_color(0, 0, 0)
+    pdf.set_line_width(0.5)
+    pdf.line(10, 25, 200, 25)
+    pdf.image(cloud_filename, x=15, y=30, w=180)
+    pdf.add_page()
+    pdf.set_font('Arial', 'B', 16)
+    pdf.cell(0, 10, "Oraciones de ejemplo", ln=True, align='C')
+    high_freq_words = sorted([word.upper() for word, freq in word_freq.most_common(20)])
+    pdf.set_font('Arial', 'B', 12)
+    pdf.set_fill_color(200, 200, 200)
+    pdf.cell(90, 10, 'PALABRA', 1, fill=True)
+    pdf.cell(0, 10, 'ORACIÓN DE EJEMPLO', 1, fill=True)
+    pdf.ln()
+    pdf.set_font('Arial', '', 12)
+    pdf.set_line_width(0.1)
+    for word in high_freq_words:
+        example_sent = get_example_sentences(word.lower())
+        if example_sent:
+            example_sentence = example_sent[0].replace(word.lower(), f'**{word}**').replace(word, f'**{word}**')
+            pdf.cell(90, 10, word, 1)
+            pdf.set_font('Arial', '', 10)
+            pdf.multi_cell(0, 10, example_sentence, 1)
+            pdf.set_font('Arial', 'I', 8)
+            pdf.cell(90, 10, '', 0)
+            pdf.cell(0, 10, 'Fuente: NLTK', 0)
+            pdf.set_font('Arial', '', 12)
+        else:
+            continue
+        pdf.ln()
+def generar_pdf(text, title):
+    pdf = PDF()
+    add_text_to_pdf(pdf, text, title)
+    output_filename = 'documento.pdf'
+    pdf.output(output_filename)
+    return output_filename
+# Crear la interfaz de Gradio
+interface = gr.Interface(
+    fn=generar_pdf,
+    inputs=[gr.inputs.Textbox(lines=10, placeholder="Introduce texto en español aquí..."), gr.inputs.Textbox(lines=1, placeholder="Introduce un título aquí...")],
+    outputs=gr.outputs.File(label="Documento PDF generado"),
+    title="Generador de Nubes de Palabras y Glosario en PDF",
+    description="Introduce un texto en español y un título para generar un documento PDF con nubes de palabras y un glosario con oraciones de ejemplo."
+)
+if __name__ == "__main__":
+    interface.launch()