from transformers import pipeline import torch import os from dotenv import load_dotenv load_dotenv() class LLMPipeline: def __init__(self): model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF") self.pipeline = pipeline( "text-generation", model=model_id, torch_dtype=torch.float16, device_map="auto" ) async def generate(self, prompt: str, max_length: int = 100) -> str: """Generate text using the local Gemma model.""" try: result = self.pipeline( prompt, max_length=max_length, num_return_sequences=1, temperature=0.7, top_p=0.9 ) return result[0]['generated_text'] except Exception as e: print(f"Error in LLM generation: {e}") return ""