# query_enhancer.py from llama_cpp import Llama class QueryEnhancer: def __init__(self, model_path="TheBloke/Llama-2-7B-Chat-GGUF", model_file="llama-2-7b-chat.Q4_0.gguf"): """Load LLaMA model with llama.cpp for query enhancement.""" try: self.model = Llama( model_path=f"{model_path}/{model_file}", # Full path or download manually n_ctx=512, # Context length—keep it small for 8 GB n_threads=4 # Use 4 CPU threads—fast on M3 Pro ) print("LLaMA-2-7B loaded successfully with llama.cpp.") except Exception as e: raise RuntimeError(f"Failed to load LLaMA-2-7B: {str(e)}") def enhance_query(self, user_query): """Refine user queries for arXiv search.""" prompt = ( f"You are a research assistant. Improve this search query for better research paper results:\n" f"Original: {user_query}\n" f"Refined: " ) result = self.model( prompt, max_tokens=50, temperature=0.7, stop=["\n"] # Stop at newline for clean output ) refined_query = result["choices"][0]["text"].strip() return refined_query if __name__ == "__main__": # Manually download model to local path if needed enhancer = QueryEnhancer(model_path="Downloads/llama-2-7b-chat.Q4_0.gguf ~/models/", model_file="llama-2-7b-chat.Q4_0.gguf") print("Enhanced Query:", enhancer.enhance_query("AI in healthcare"))