from typing import Optional
#from llama_index.llms import HuggingFaceLLM
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

def setup_llm(model_name: str = "microsoft/phi-3-mini-4k-instruct", 
              device: str = None,
              context_window: int = 4096,
              max_new_tokens: int = 512) -> HuggingFaceLLM:
    """
    Set up the language model for the CSV chatbot.
    
    Args:
        model_name: Name of the Hugging Face model to use
        device: Device to run the model on ('cuda', 'cpu', etc.)
        context_window: Maximum context window size
        max_new_tokens: Maximum number of new tokens to generate
        
    Returns:
        Configured LLM instance
    """
    # Determine device
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Configure quantization for memory efficiency
    if device == "cuda":
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16
        )
    else:
        quantization_config = None
    
    # Configure tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True
    )
    
    # Configure model with appropriate parameters for HF Spaces
    model_kwargs = {
        "trust_remote_code": True,
        "torch_dtype": torch.float16,
    }
    
    if quantization_config:
        model_kwargs["quantization_config"] = quantization_config
    
    # Initialize LLM
    llm = HuggingFaceLLM(
        model_name=model_name,
        tokenizer_name=model_name,
        context_window=context_window,
        max_new_tokens=max_new_tokens,
        generate_kwargs={"temperature": 0.7, "top_p": 0.95},
        device_map=device,
        tokenizer_kwargs={"trust_remote_code": True},
        model_kwargs=model_kwargs,
        # Cache the model to avoid reloading
        cache_folder="./model_cache"
    )
    
    return llm