Chamin09 commited on
Commit
2202e15
·
verified ·
1 Parent(s): 5ae6711

Update models/llm_setup.py

Browse files
Files changed (1) hide show
  1. models/llm_setup.py +64 -54
models/llm_setup.py CHANGED
@@ -1,66 +1,76 @@
1
- from typing import Optional
2
- #from llama_index.llms import HuggingFaceLLM
3
- #from llama_index.llms.huggingface import HuggingFaceLLM
4
- from llama_index.llms.huggingface import HuggingFaceInferenceAPI
5
  import torch
6
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 
7
 
8
  def setup_llm(model_name: str = "microsoft/phi-3-mini-4k-instruct",
9
  device: str = None,
10
  context_window: int = 4096,
11
- max_new_tokens: int = 512) -> HuggingFaceLLM:
12
- """
13
- Set up the language model for the CSV chatbot.
14
-
15
- Args:
16
- model_name: Name of the Hugging Face model to use
17
- device: Device to run the model on ('cuda', 'cpu', etc.)
18
- context_window: Maximum context window size
19
- max_new_tokens: Maximum number of new tokens to generate
20
-
21
- Returns:
22
- Configured LLM instance
23
- """
24
  # Determine device
25
  if device is None:
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
 
28
- # Configure quantization for memory efficiency
29
- if device == "cuda":
30
- quantization_config = BitsAndBytesConfig(
31
- load_in_4bit=True,
32
- bnb_4bit_compute_dtype=torch.float16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  )
34
- else:
35
- quantization_config = None
36
-
37
- # Configure tokenizer
38
- tokenizer = AutoTokenizer.from_pretrained(
39
- model_name,
40
- trust_remote_code=True
41
- )
42
-
43
- # Configure model with appropriate parameters for HF Spaces
44
- model_kwargs = {
45
- "trust_remote_code": True,
46
- "torch_dtype": torch.float16,
47
- }
48
-
49
- if quantization_config:
50
- model_kwargs["quantization_config"] = quantization_config
51
-
52
- # Initialize LLM
53
- llm = HuggingFaceLLM(
54
- model_name=model_name,
55
- tokenizer_name=model_name,
56
- context_window=context_window,
57
- max_new_tokens=max_new_tokens,
58
- generate_kwargs={"temperature": 0.7, "top_p": 0.95},
59
- device_map=device,
60
- tokenizer_kwargs={"trust_remote_code": True},
61
- model_kwargs=model_kwargs,
62
- # Cache the model to avoid reloading
63
- cache_folder="./model_cache"
64
- )
65
 
66
  return llm
 
1
+ # Updated import path
2
+ #from llama_index.llms import HuggingFaceInferenceAPI
3
+ from llama_index.llms.huggingface import HuggingFaceLLM
4
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
5
  import torch
6
+ # If that doesn't work, try:
7
+ # from llama_index.llms.huggingface import HuggingFaceLLM
8
 
9
  def setup_llm(model_name: str = "microsoft/phi-3-mini-4k-instruct",
10
  device: str = None,
11
  context_window: int = 4096,
12
+ max_new_tokens: int = 512):
13
+ """Set up the language model for the CSV chatbot."""
 
 
 
 
 
 
 
 
 
 
 
14
  # Determine device
15
  if device is None:
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
 
18
+ # Try the updated class
19
+ try:
20
+ # First attempt with new API
21
+ from llama_index.llms.huggingface import HuggingFaceLLM
22
+
23
+ # Configure model with appropriate parameters for HF Spaces
24
+ model_kwargs = {
25
+ "trust_remote_code": True,
26
+ "torch_dtype": torch.float16,
27
+ }
28
+
29
+ if device == "cuda":
30
+ from transformers import BitsAndBytesConfig
31
+ quantization_config = BitsAndBytesConfig(
32
+ load_in_4bit=True,
33
+ bnb_4bit_compute_dtype=torch.float16
34
+ )
35
+ model_kwargs["quantization_config"] = quantization_config
36
+
37
+ # Initialize LLM
38
+ llm = HuggingFaceLLM(
39
+ model_name=model_name,
40
+ tokenizer_name=model_name,
41
+ context_window=context_window,
42
+ max_new_tokens=max_new_tokens,
43
+ generate_kwargs={"temperature": 0.7, "top_p": 0.95},
44
+ device_map=device,
45
+ tokenizer_kwargs={"trust_remote_code": True},
46
+ model_kwargs=model_kwargs,
47
+ # Cache the model to avoid reloading
48
+ cache_folder="./model_cache"
49
  )
50
+
51
+ except (ImportError, AttributeError):
52
+ # Fallback to other API options
53
+ try:
54
+ from llama_index.llms import HuggingFaceInferenceAPI
55
+
56
+ llm = HuggingFaceInferenceAPI(
57
+ model_name=model_name,
58
+ tokenizer_name=model_name,
59
+ context_window=context_window,
60
+ max_new_tokens=max_new_tokens,
61
+ generate_kwargs={"temperature": 0.7, "top_p": 0.95}
62
+ )
63
+ except:
64
+ # Last resort - try the base LLM class
65
+ from llama_index.llms.base import LLM
66
+ from llama_index.llms.huggingface import HuggingFaceInference
67
+
68
+ llm = HuggingFaceInference(
69
+ model_name=model_name,
70
+ tokenizer_name=model_name,
71
+ context_window=context_window,
72
+ max_new_tokens=max_new_tokens,
73
+ generate_kwargs={"temperature": 0.7, "top_p": 0.95}
74
+ )
 
 
 
 
 
 
75
 
76
  return llm