File size: 2,300 Bytes
b69b713
 
b3dae95
2c12a96
b69b713
2c12a96
b3dae95
b69b713
2c12a96
b69b713
b3dae95
 
 
 
b69b713
 
 
b3dae95
b69b713
b3dae95
 
 
 
 
 
 
 
b69b713
b3dae95
 
b69b713
 
b3dae95
b69b713
b3dae95
 
 
 
 
b69b713
 
b3dae95
b69b713
b3dae95
 
 
 
 
 
 
 
b69b713
 
b3dae95
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# paraphraser.py
from model_loader import paraphraser_model
import time

def paraphrase_comment(comment):
    """
    Paraphrase a given comment using the fine-tuned Granite 3.2-2B-Instruct model to make it non-toxic, empathetic, and professional while retaining the original intent.
    Returns the paraphrased comment.
    """
    try:
        start_time = time.time()
        print("Starting paraphrasing...")

        # Access the model and tokenizer
        model = paraphraser_model.model
        tokenizer = paraphraser_model.tokenizer

        # Define the prompt for paraphrasing
        prompt = (
            f"You are a content moderator tasked with paraphrasing a comment to make it non-toxic, empathetic, and professional while retaining the original intent. "
            f"The original comment is: \"{comment}\". "
            f"Guidelines: "
            f"- Remove any hate speech, offensive language, or toxic elements. "
            f"- Use a neutral or positive tone. "
            f"- Ensure the paraphrased comment is concise and clear. "
            f"- Maintain the core message or intent of the original comment. "
            f"Provide the paraphrased comment only, without additional explanation."
        )

        # Tokenize the prompt
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512)

        # Generate the paraphrased comment
        outputs = model.generate(
            inputs["input_ids"],
            max_length=512,
            num_beams=5,
            no_repeat_ngram_size=2,
            early_stopping=True
        )

        # Decode the output
        paraphrased_comment = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Clean up the output (remove the prompt and any extra text)
        if prompt in paraphrased_comment:
            paraphrased_comment = paraphrased_comment.replace(prompt, "").strip()
        paraphrased_comment = paraphrased_comment.strip()

        print(f"Paraphrasing completed in {time.time() - start_time:.2f} seconds")
        return paraphrased_comment if paraphrased_comment else "Error: Unable to generate paraphrase."

    except Exception as e:
        print(f"Error during paraphrasing: {str(e)}")
        return "Error: Unable to generate paraphrase."