from transformers import pipeline
import torch
import os
from dotenv import load_dotenv

load_dotenv()

class LLMPipeline:
    def __init__(self):
        model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF")
        self.pipeline = pipeline(
            "text-generation",
            model=model_id,
            torch_dtype=torch.float16,
            device_map="auto"
        )

    async def generate(self, prompt: str, max_length: int = 100) -> str:
        """Generate text using the local Gemma model."""
        try:
            result = self.pipeline(
                prompt,
                max_length=max_length,
                num_return_sequences=1,
                temperature=0.7,
                top_p=0.9
            )
            return result[0]['generated_text']
        except Exception as e:
            print(f"Error in LLM generation: {e}")
            return ""