larry1129 commited on
Commit
acf75de
·
verified ·
1 Parent(s): 8d3b1ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -19
app.py CHANGED
@@ -32,38 +32,74 @@ def generate_prompt(instruction, input_text=""):
32
  return prompt
33
 
34
  # 定义生成响应的函数,并使用 @spaces.GPU 装饰
35
- @spaces.GPU(duration=30)
36
  def generate_response(instruction, input_text):
37
  global model, tokenizer
38
 
39
  if model is None:
 
40
  # 检查 bitsandbytes 是否已安装
41
  import importlib.util
42
  if importlib.util.find_spec("bitsandbytes") is None:
43
  import subprocess
44
  subprocess.call(["pip", "install", "--upgrade", "bitsandbytes"])
45
 
46
- # 在函数内部导入需要 GPU 的库
47
- import torch
48
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
49
- from peft import PeftModel
50
-
51
- # 创建量化配置
52
- bnb_config = BitsAndBytesConfig(
53
- load_in_4bit=True,
54
- bnb_4bit_use_double_quant=True,
55
- bnb_4bit_quant_type="nf4",
56
- bnb_4bit_compute_dtype=torch.float16
57
- )
58
-
59
- # 加载分词器
60
- tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
61
-
62
- # 加载基础模型
63
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # 在函数内部导入需要的库
65
  import torch
66
 
 
 
 
 
 
67
  # 生成提示
68
  prompt = generate_prompt(instruction, input_text)
69
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
32
  return prompt
33
 
34
  # 定义生成响应的函数,并使用 @spaces.GPU 装饰
35
+ @spaces.GPU(duration=40) # 建议将 duration 增加到 120
36
  def generate_response(instruction, input_text):
37
  global model, tokenizer
38
 
39
  if model is None:
40
+ print("开始加载模型...")
41
  # 检查 bitsandbytes 是否已安装
42
  import importlib.util
43
  if importlib.util.find_spec("bitsandbytes") is None:
44
  import subprocess
45
  subprocess.call(["pip", "install", "--upgrade", "bitsandbytes"])
46
 
47
+ try:
48
+ # 在函数内部导入需要 GPU 的库
49
+ import torch
50
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig120
51
+ from peft import PeftModel
52
+
53
+ # 创建量化配置
54
+ bnb_config = BitsAndBytesConfig(
55
+ load_in_4bit=True,
56
+ bnb_4bit_use_double_quant=True,
57
+ bnb_4bit_quant_type="nf4",
58
+ bnb_4bit_compute_dtype=torch.float16
59
+ )
60
+
61
+ # 加载分词器
62
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
63
+ print("分词器加载成功。")
64
+
65
+ # 加载基础模型
66
+ base_model = AutoModelForCausalLM.from_pretrained(
67
+ base_model_name,
68
+ quantization_config=bnb_config,
69
+ device_map="auto",
70
+ use_auth_token=hf_token,
71
+ trust_remote_code=True
72
+ )
73
+ print("基础模型加载成功。")
74
+
75
+ # 加载适配器模型
76
+ model = PeftModel.from_pretrained(
77
+ base_model,
78
+ adapter_model_name,
79
+ torch_dtype=torch.float16,
80
+ use_auth_token=hf_token
81
+ )
82
+ print("适配器模型加载成功。")
83
+
84
+ # 设置 pad_token
85
+ tokenizer.pad_token = tokenizer.eos_token
86
+ model.config.pad_token_id = tokenizer.pad_token_id
87
+
88
+ # 切换到评估模式
89
+ model.eval()
90
+ print("模型已切换到评估模式。")
91
+ except Exception as e:
92
+ print("加载模型时出错:", e)
93
+ raise e
94
+ else:
95
  # 在函数内部导入需要的库
96
  import torch
97
 
98
+ # 检查 model 和 tokenizer 是否已正确加载
99
+ if model is None or tokenizer is None:
100
+ print("模型或分词器未正确加载。")
101
+ raise ValueError("模型或分词器未正确加载。")
102
+
103
  # 生成提示
104
  prompt = generate_prompt(instruction, input_text)
105
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)