Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import gradio as gr
|
3 |
from docx import Document
|
4 |
import io
|
@@ -14,41 +13,14 @@ def paraphrase_text(text):
|
|
14 |
output_ids = model.generate(input_ids, max_length=256, do_sample=True, top_k=120, top_p=0.95, temperature=1.5)
|
15 |
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
def chunk_text(text, max_tokens=350):
|
22 |
-
sentences = sent_tokenize(text)
|
23 |
-
chunks = []
|
24 |
-
current_chunk = ""
|
25 |
-
current_len = 0
|
26 |
-
|
27 |
-
for sentence in sentences:
|
28 |
-
token_len = len(sentence.split())
|
29 |
-
if current_len + token_len <= max_tokens:
|
30 |
-
current_chunk += " " + sentence
|
31 |
-
current_len += token_len
|
32 |
-
else:
|
33 |
-
chunks.append(current_chunk.strip())
|
34 |
-
current_chunk = sentence
|
35 |
-
current_len = token_len
|
36 |
-
if current_chunk:
|
37 |
-
chunks.append(current_chunk.strip())
|
38 |
-
return chunks
|
39 |
-
|
40 |
|
41 |
def full_article_paraphrase(text):
|
42 |
chunks = chunk_text(text)
|
43 |
-
|
44 |
-
for chunk in chunks:
|
45 |
-
try:
|
46 |
-
result = paraphrase_text(chunk.strip())
|
47 |
-
results.append(result)
|
48 |
-
except Exception as e:
|
49 |
-
results.append(f"[Error paraphrasing chunk: {e}]")
|
50 |
-
return "\n\n".join(results)
|
51 |
-
|
52 |
|
53 |
def extract_text_from_docx(file_obj):
|
54 |
file_bytes = file_obj.read() if hasattr(file_obj, "read") else file_obj
|
@@ -95,3 +67,4 @@ demo = gr.Interface(
|
|
95 |
|
96 |
if __name__ == "__main__":
|
97 |
demo.launch()
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from docx import Document
|
3 |
import io
|
|
|
13 |
output_ids = model.generate(input_ids, max_length=256, do_sample=True, top_k=120, top_p=0.95, temperature=1.5)
|
14 |
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
15 |
|
16 |
+
def chunk_text(text, max_sentences=4):
|
17 |
+
import re
|
18 |
+
sentences = re.split(r'(?<=[.!?]) +', text.strip())
|
19 |
+
return [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
def full_article_paraphrase(text):
|
22 |
chunks = chunk_text(text)
|
23 |
+
return "\n\n".join(paraphrase_text(chunk.strip()) for chunk in chunks if chunk.strip())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def extract_text_from_docx(file_obj):
|
26 |
file_bytes = file_obj.read() if hasattr(file_obj, "read") else file_obj
|
|
|
67 |
|
68 |
if __name__ == "__main__":
|
69 |
demo.launch()
|
70 |
+
|