import gradio as gr from docx import Document import io from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Load model tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws") model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws") def paraphrase_text(text): input_text = f"paraphrase: {text} " input_ids = tokenizer.encode(input_text, return_tensors="pt", truncation=True) output_ids = model.generate(input_ids, max_length=256, do_sample=True, top_k=120, top_p=0.95, temperature=1.5) return tokenizer.decode(output_ids[0], skip_special_tokens=True) def chunk_text(text, max_sentences=4): import re sentences = re.split(r'(?<=[.!?]) +', text.strip()) return [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)] def full_article_paraphrase(text): chunks = chunk_text(text) return "\n\n".join(paraphrase_text(chunk.strip()) for chunk in chunks if chunk.strip()) def extract_text_from_docx(file_obj): file_bytes = file_obj.read() if hasattr(file_obj, "read") else file_obj doc = Document(io.BytesIO(file_bytes)) return "\n".join([para.text for para in doc.paragraphs if para.text.strip()]) def save_docx(text): doc = Document() for para in text.split("\n\n"): doc.add_paragraph(para.strip()) filepath = "/tmp/paraphrased_output.docx" doc.save(filepath) return filepath def get_ai_score(text): return "AI Detection Score: Likely Human" if len(text) > 100 else "AI Detection Score: Short Text" def full_pipeline(input_text=None, file=None): if file is not None: input_text = extract_text_from_docx(file) if not input_text or len(input_text.strip()) < 10: return "Please enter or upload valid text.", None, "No text to analyze." result = full_article_paraphrase(input_text) docx_file = save_docx(result) ai_score = get_ai_score(result) return result, docx_file, ai_score demo = gr.Interface( fn=full_pipeline, inputs=[ gr.Textbox(label="Paste Text (optional)", lines=20, placeholder="Or upload a .docx file below..."), gr.File(label="Upload .docx File (optional)", file_types=[".docx"]) ], outputs=[ gr.Textbox(label="Paraphrased Output"), gr.File(label="Download .docx File"), gr.Textbox(label="AI Detection Score") ], title="Smart Paraphraser", description="Paste or upload your article. Get paraphrased output, download as .docx, and see an AI detection score." ) if __name__ == "__main__": demo.launch()