Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,54 +7,72 @@ from transformers import pipeline
|
|
7 |
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
|
8 |
nlp = spacy.load("en_core_web_sm")
|
9 |
spell = SpellChecker(language='en')
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def simple_correct(text):
|
13 |
return " ".join(spell.correction(w) or w for w in text.split())
|
14 |
|
15 |
def chunk_text(text, max_words=380):
|
|
|
16 |
words = text.split()
|
17 |
for i in range(0, len(words), max_words):
|
18 |
yield " ".join(words[i:i + max_words])
|
19 |
|
20 |
def humanize(text, tone, strength, freeze):
|
21 |
locked = [(ent.text, ent.label_) for ent in nlp(text).ents] if freeze else []
|
22 |
-
|
|
|
23 |
for chunk in chunk_text(text, 380):
|
24 |
-
paraphrased = para(
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
26 |
paraphrased = simple_correct(paraphrased)
|
|
|
|
|
27 |
for ent, label in locked:
|
28 |
paraphrased = re.sub(re.escape(ent), ent, paraphrased, flags=re.IGNORECASE)
|
29 |
-
chunks_out.append(paraphrased)
|
30 |
-
full_text = "\n\n".join(chunks_out)
|
31 |
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
34 |
with open(file_path, "w", encoding="utf-8") as f:
|
35 |
f.write(full_text)
|
|
|
36 |
return full_text, file_path
|
37 |
|
38 |
-
with gr.Blocks(title="AI Humanizer β
|
39 |
-
gr.Markdown("## AI Humanizer β
|
40 |
with gr.Row():
|
41 |
with gr.Column():
|
42 |
-
text_in = gr.Textbox(label="Paste your
|
43 |
tone_dd = gr.Dropdown(["Casual", "Academic", "Marketing", "Legal", "Creative"], value="Casual", label="Tone")
|
44 |
strength_sl = gr.Slider(1, 10, value=5, label="Strength 1-10")
|
45 |
lock_cb = gr.Checkbox(label="Lock facts / dates / names")
|
46 |
submit_btn = gr.Button("Humanize", variant="primary")
|
47 |
|
48 |
with gr.Column():
|
49 |
-
text_out = gr.Textbox(label="
|
50 |
with gr.Row():
|
51 |
-
copy_btn = gr.Button("π Copy
|
52 |
file_out = gr.File(label="Download .txt")
|
53 |
|
54 |
submit_btn.click(humanize, inputs=[text_in, tone_dd, strength_sl, lock_cb],
|
55 |
outputs=[text_out, file_out])
|
56 |
|
57 |
-
# JavaScript one-line copy
|
58 |
copy_btn.click(None, text_out, None,
|
59 |
js="(txt) => navigator.clipboard.writeText(txt)")
|
60 |
|
|
|
7 |
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
|
8 |
nlp = spacy.load("en_core_web_sm")
|
9 |
spell = SpellChecker(language='en')
|
10 |
+
|
11 |
+
# load model once
|
12 |
+
para = pipeline(
|
13 |
+
"text2text-generation",
|
14 |
+
model="Vamsi/T5_Paraphrase_Paws",
|
15 |
+
device=-1,
|
16 |
+
max_new_tokens=1024 # β allow longer outputs per chunk
|
17 |
+
)
|
18 |
|
19 |
def simple_correct(text):
|
20 |
return " ".join(spell.correction(w) or w for w in text.split())
|
21 |
|
22 |
def chunk_text(text, max_words=380):
|
23 |
+
"""Split into exact word-length chunks."""
|
24 |
words = text.split()
|
25 |
for i in range(0, len(words), max_words):
|
26 |
yield " ".join(words[i:i + max_words])
|
27 |
|
28 |
def humanize(text, tone, strength, freeze):
|
29 |
locked = [(ent.text, ent.label_) for ent in nlp(text).ents] if freeze else []
|
30 |
+
out_chunks = []
|
31 |
+
|
32 |
for chunk in chunk_text(text, 380):
|
33 |
+
paraphrased = para(
|
34 |
+
chunk,
|
35 |
+
max_new_tokens=1024,
|
36 |
+
do_sample=True,
|
37 |
+
temperature=0.7 * strength / 10
|
38 |
+
)[0]['generated_text']
|
39 |
+
|
40 |
paraphrased = simple_correct(paraphrased)
|
41 |
+
|
42 |
+
# restore locked entities
|
43 |
for ent, label in locked:
|
44 |
paraphrased = re.sub(re.escape(ent), ent, paraphrased, flags=re.IGNORECASE)
|
|
|
|
|
45 |
|
46 |
+
out_chunks.append(paraphrased)
|
47 |
+
|
48 |
+
full_text = " ".join(out_chunks)
|
49 |
+
|
50 |
+
# downloadable file
|
51 |
+
file_path = "/tmp/full_humanized.txt"
|
52 |
with open(file_path, "w", encoding="utf-8") as f:
|
53 |
f.write(full_text)
|
54 |
+
|
55 |
return full_text, file_path
|
56 |
|
57 |
+
with gr.Blocks(title="AI Humanizer β Exact Length") as demo:
|
58 |
+
gr.Markdown("## AI Humanizer β Exact Length, No Compression")
|
59 |
with gr.Row():
|
60 |
with gr.Column():
|
61 |
+
text_in = gr.Textbox(label="Paste your text (any length)", lines=15, max_lines=None)
|
62 |
tone_dd = gr.Dropdown(["Casual", "Academic", "Marketing", "Legal", "Creative"], value="Casual", label="Tone")
|
63 |
strength_sl = gr.Slider(1, 10, value=5, label="Strength 1-10")
|
64 |
lock_cb = gr.Checkbox(label="Lock facts / dates / names")
|
65 |
submit_btn = gr.Button("Humanize", variant="primary")
|
66 |
|
67 |
with gr.Column():
|
68 |
+
text_out = gr.Textbox(label="Full humanized text (same word-count)", lines=25, interactive=True)
|
69 |
with gr.Row():
|
70 |
+
copy_btn = gr.Button("π Copy")
|
71 |
file_out = gr.File(label="Download .txt")
|
72 |
|
73 |
submit_btn.click(humanize, inputs=[text_in, tone_dd, strength_sl, lock_cb],
|
74 |
outputs=[text_out, file_out])
|
75 |
|
|
|
76 |
copy_btn.click(None, text_out, None,
|
77 |
js="(txt) => navigator.clipboard.writeText(txt)")
|
78 |
|