Arpit24052003 commited on
Commit
3998bf9
·
verified ·
1 Parent(s): b93d97e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -33
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import gradio as gr
3
  from docx import Document
4
  import io
@@ -14,41 +13,14 @@ def paraphrase_text(text):
14
  output_ids = model.generate(input_ids, max_length=256, do_sample=True, top_k=120, top_p=0.95, temperature=1.5)
15
  return tokenizer.decode(output_ids[0], skip_special_tokens=True)
16
 
17
- import nltk
18
- nltk.download('punkt')
19
- from nltk.tokenize import sent_tokenize
20
-
21
- def chunk_text(text, max_tokens=350):
22
- sentences = sent_tokenize(text)
23
- chunks = []
24
- current_chunk = ""
25
- current_len = 0
26
-
27
- for sentence in sentences:
28
- token_len = len(sentence.split())
29
- if current_len + token_len <= max_tokens:
30
- current_chunk += " " + sentence
31
- current_len += token_len
32
- else:
33
- chunks.append(current_chunk.strip())
34
- current_chunk = sentence
35
- current_len = token_len
36
- if current_chunk:
37
- chunks.append(current_chunk.strip())
38
- return chunks
39
-
40
 
41
  def full_article_paraphrase(text):
42
  chunks = chunk_text(text)
43
- results = []
44
- for chunk in chunks:
45
- try:
46
- result = paraphrase_text(chunk.strip())
47
- results.append(result)
48
- except Exception as e:
49
- results.append(f"[Error paraphrasing chunk: {e}]")
50
- return "\n\n".join(results)
51
-
52
 
53
  def extract_text_from_docx(file_obj):
54
  file_bytes = file_obj.read() if hasattr(file_obj, "read") else file_obj
@@ -95,3 +67,4 @@ demo = gr.Interface(
95
 
96
  if __name__ == "__main__":
97
  demo.launch()
 
 
 
1
  import gradio as gr
2
  from docx import Document
3
  import io
 
13
  output_ids = model.generate(input_ids, max_length=256, do_sample=True, top_k=120, top_p=0.95, temperature=1.5)
14
  return tokenizer.decode(output_ids[0], skip_special_tokens=True)
15
 
16
+ def chunk_text(text, max_sentences=4):
17
+ import re
18
+ sentences = re.split(r'(?<=[.!?]) +', text.strip())
19
+ return [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def full_article_paraphrase(text):
22
  chunks = chunk_text(text)
23
+ return "\n\n".join(paraphrase_text(chunk.strip()) for chunk in chunks if chunk.strip())
 
 
 
 
 
 
 
 
24
 
25
  def extract_text_from_docx(file_obj):
26
  file_bytes = file_obj.read() if hasattr(file_obj, "read") else file_obj
 
67
 
68
  if __name__ == "__main__":
69
  demo.launch()
70
+