File size: 2,819 Bytes
			
			| b69b713 2c12a96 b69b713 2c12a96 b69b713 2c12a96 b69b713 2c8a781 b69b713 2c8a781 b69b713 2c8a781 b69b713 2c8a781 b69b713 7fa4f70 2c8a781 b69b713 5a980db b69b713 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | # paraphraser.py
from model_loader import paraphraser_model
def paraphrase_comment(comment):
    """
    Paraphrase a toxic comment using the Granite 3.2-2B-Instruct model.
    Returns the paraphrased comment.
    """
    if not comment:
        return None
    try:
        model = paraphraser_model.model
        tokenizer = paraphraser_model.tokenizer
        # Create a detailed prompt with guidelines and examples
        prompt = (
            "You are a content moderator tasked with rewriting toxic comments into neutral and constructive ones while maintaining the original meaning. "
            "Follow these guidelines:\n"
            "- Remove explicit hate speech, personal attacks, or offensive language.\n"
            "- Keep the response neutral and conversational, suitable for a casual online platform.\n"
            "- Ensure the rewritten comment retains the original intent but in a constructive tone, addressing the specific context of the comment (e.g., disagreement, frustration).\n\n"
            "Examples:\n"
            "Toxic: \"You're so dumb! You never understand anything!\"\n"
            "Neutral: \"I think there might be a misunderstanding here. Can we go over this again to clear things up?\"\n"
            "Toxic: \"This is the worst idea ever. Only an idiot would suggest this.\"\n"
            "Neutral: \"I’m not sure this idea works for me. Could we look at some other options instead?\"\n"
            "Toxic: \"You are an idiot and should leave this platform.\"\n"
            "Neutral: \"It seems like you might not be enjoying this platform. Maybe we can talk about what’s not working for you?\"\n\n"
            f"Now, rewrite this comment: \"{comment}\""
        )
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512)
        # Generate the paraphrased comment with optimized parameters
        outputs = model.generate(
            **inputs,
            max_new_tokens=50,  # Specify the number of new tokens to generate (excludes input length)
            num_beams=4,  # Use beam search for faster and more consistent generation
            early_stopping=True,  # Stop generation once a good sequence is found
            do_sample=False  # Disable sampling to use beam search
        )
        paraphrased_comment = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Remove the prompt part from the output
        paraphrased_comment = paraphrased_comment.replace(prompt, "").strip()
        # Remove unwanted prefixes like "Neutral: "
        if paraphrased_comment.startswith("Neutral: "):
            paraphrased_comment = paraphrased_comment[len("Neutral: "):].strip()
        return paraphrased_comment
    except Exception as e:
        return f"Error paraphrasing comment: {str(e)}" | 
