Spaces:

Chamin09
/

ChatCSV

Sleeping

App Files Files Community

Chamin09 commited on Apr 22

Commit

2202e15

verified ·

1 Parent(s): 5ae6711

Update models/llm_setup.py

Browse files

Files changed (1) hide show

models/llm_setup.py +64 -54

models/llm_setup.py CHANGED Viewed

@@ -1,66 +1,76 @@
-from typing import Optional
-#from llama_index.llms import HuggingFaceLLM
-#from llama_index.llms.huggingface import HuggingFaceLLM
-from llama_index.llms.huggingface import HuggingFaceInferenceAPI
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 def setup_llm(model_name: str = "microsoft/phi-3-mini-4k-instruct",
               device: str = None,
               context_window: int = 4096,
-              max_new_tokens: int = 512) -> HuggingFaceLLM:
-    """
-    Set up the language model for the CSV chatbot.
-    Args:
-        model_name: Name of the Hugging Face model to use
-        device: Device to run the model on ('cuda', 'cpu', etc.)
-        context_window: Maximum context window size
-        max_new_tokens: Maximum number of new tokens to generate
-    Returns:
-        Configured LLM instance
-    """
     # Determine device
     if device is None:
         device = "cuda" if torch.cuda.is_available() else "cpu"
-    # Configure quantization for memory efficiency
-    if device == "cuda":
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.float16
         )
-    else:
-        quantization_config = None
-    # Configure tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_name,
-        trust_remote_code=True
-    )
-    # Configure model with appropriate parameters for HF Spaces
-    model_kwargs = {
-        "trust_remote_code": True,
-        "torch_dtype": torch.float16,
-    }
-    if quantization_config:
-        model_kwargs["quantization_config"] = quantization_config
-    # Initialize LLM
-    llm = HuggingFaceLLM(
-        model_name=model_name,
-        tokenizer_name=model_name,
-        context_window=context_window,
-        max_new_tokens=max_new_tokens,
-        generate_kwargs={"temperature": 0.7, "top_p": 0.95},
-        device_map=device,
-        tokenizer_kwargs={"trust_remote_code": True},
-        model_kwargs=model_kwargs,
-        # Cache the model to avoid reloading
-        cache_folder="./model_cache"
-    )
     return llm

+# Updated import path
+#from llama_index.llms import HuggingFaceInferenceAPI
+from llama_index.llms.huggingface import HuggingFaceLLM
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 import torch
+# If that doesn't work, try:
+# from llama_index.llms.huggingface import HuggingFaceLLM
 def setup_llm(model_name: str = "microsoft/phi-3-mini-4k-instruct",
               device: str = None,
               context_window: int = 4096,
+              max_new_tokens: int = 512):
+    """Set up the language model for the CSV chatbot."""
     # Determine device
     if device is None:
         device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Try the updated class
+    try:
+        # First attempt with new API
+        from llama_index.llms.huggingface import HuggingFaceLLM
+        # Configure model with appropriate parameters for HF Spaces
+        model_kwargs = {
+            "trust_remote_code": True,
+            "torch_dtype": torch.float16,
+        }
+        if device == "cuda":
+            from transformers import BitsAndBytesConfig
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.float16
+            )
+            model_kwargs["quantization_config"] = quantization_config
+        # Initialize LLM
+        llm = HuggingFaceLLM(
+            model_name=model_name,
+            tokenizer_name=model_name,
+            context_window=context_window,
+            max_new_tokens=max_new_tokens,
+            generate_kwargs={"temperature": 0.7, "top_p": 0.95},
+            device_map=device,
+            tokenizer_kwargs={"trust_remote_code": True},
+            model_kwargs=model_kwargs,
+            # Cache the model to avoid reloading
+            cache_folder="./model_cache"
         )
+    except (ImportError, AttributeError):
+        # Fallback to other API options
+        try:
+            from llama_index.llms import HuggingFaceInferenceAPI
+            llm = HuggingFaceInferenceAPI(
+                model_name=model_name,
+                tokenizer_name=model_name,
+                context_window=context_window,
+                max_new_tokens=max_new_tokens,
+                generate_kwargs={"temperature": 0.7, "top_p": 0.95}
+            )
+        except:
+            # Last resort - try the base LLM class
+            from llama_index.llms.base import LLM
+            from llama_index.llms.huggingface import HuggingFaceInference
+            llm = HuggingFaceInference(
+                model_name=model_name,
+                tokenizer_name=model_name,
+                context_window=context_window,
+                max_new_tokens=max_new_tokens,
+                generate_kwargs={"temperature": 0.7, "top_p": 0.95}
+            )
     return llm