tclh123 commited on
Commit
1f85c85
·
1 Parent(s): 6be637d
Files changed (2) hide show
  1. err.log +27 -0
  2. main.py +75 -5
err.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ https://huggingface.co/spaces/tclh123/fastapi-minimind-v1-small
3
+
4
+ Runtime error
5
+ Exit code: 1. Reason: 1007, in _find_and_load
6
+ File "<frozen importlib._bootstrap>", line 986, in _find_and_load_unlocked
7
+ File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
8
+ File "<frozen importlib._bootstrap_external>", line 850, in exec_module
9
+ File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
10
+ File "/home/user/app/./main.py", line 12, in <module>
11
+ pipe_flan = pipeline("text2text-generation", model=MODEL, trust_remote_code=True)
12
+ File "/usr/local/lib/python3.9/site-packages/transformers/pipelines/__init__.py", line 940, in pipeline
13
+ framework, model = infer_framework_load_model(
14
+ File "/usr/local/lib/python3.9/site-packages/transformers/pipelines/base.py", line 302, in infer_framework_load_model
15
+ raise ValueError(
16
+ ValueError: Could not load model jingyaogong/minimind-v1-small with any of the following classes: (<class 'transformers.models.auto.modeling_auto.AutoModelForSeq2SeqLM'>,). See the original errors:
17
+
18
+ while loading with AutoModelForSeq2SeqLM, an error is thrown:
19
+ Traceback (most recent call last):
20
+ File "/usr/local/lib/python3.9/site-packages/transformers/pipelines/base.py", line 289, in infer_framework_load_model
21
+ model = model_class.from_pretrained(model, **kwargs)
22
+ File "/usr/local/lib/python3.9/site-packages/transformers/models/auto/auto_factory.py", line 567, in from_pretrained
23
+ raise ValueError(
24
+ ValueError: Unrecognized configuration class <class 'transformers_modules.jingyaogong.minimind-v1-small.fad3b10dd5e251cb6f8050e5a3c8080efbccfdd2.LMConfig.LMConfig'> for this kind of AutoModel: AutoModelForSeq2SeqLM.
25
+ Model type should be one of BartConfig, BigBirdPegasusConfig, BlenderbotConfig, BlenderbotSmallConfig, EncoderDecoderConfig, FSMTConfig, GPTSanJapaneseConfig, LEDConfig, LongT5Config, M2M100Config, MarianConfig, MBartConfig, MT5Config, MvpConfig, NllbMoeConfig, PegasusConfig, PegasusXConfig, PLBartConfig, ProphetNetConfig, Qwen2AudioConfig, SeamlessM4TConfig, SeamlessM4Tv2Config, SwitchTransformersConfig, T5Config, UMT5Config, XLMProphetNetConfig.
26
+
27
+ 实际 `from transformers import PretrainedConfig`
main.py CHANGED
@@ -2,20 +2,90 @@ from fastapi import FastAPI
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import FileResponse
4
 
5
- from transformers import pipeline
 
 
6
 
7
  app = FastAPI()
8
 
9
  # MODEL = "google/flan-t5-small"
10
- MODEL = "jingyaogong/minimind-v1-small"
 
11
 
12
- pipe_flan = pipeline("text2text-generation", model=MODEL, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  @app.get("/infer_t5")
16
  def t5(input):
17
- output = pipe_flan(input)
18
- return {"output": output[0]["generated_text"]}
 
 
19
 
20
 
21
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import FileResponse
4
 
5
+ import torch
6
+ # from transformers import pipeline
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
 
9
  app = FastAPI()
10
 
11
  # MODEL = "google/flan-t5-small"
12
+ # MODEL = "jingyaogong/minimind-v1-small"
13
+ MODEL = "tclh123/minimind-v1-small"
14
 
15
+ # pipe_flan = pipeline("text2text-generation", model=MODEL, trust_remote_code=True)
16
+
17
+ device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
18
+ tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
19
+ model = AutoModelForCausalLM.from_pretrained(MODEL, trust_remote_code=True)
20
+ model = model.to(device)
21
+ model = model.eval()
22
+
23
+
24
+ def query(message, max_seq_len=512, temperature=0.7, top_k=16):
25
+ prompt = '请问,' + message
26
+ messages = []
27
+ messages.append({"role": "user", "content": prompt})
28
+
29
+ stream = True
30
+
31
+ # print(messages)
32
+ new_prompt = tokenizer.apply_chat_template(
33
+ messages,
34
+ tokenize=False,
35
+ add_generation_prompt=True
36
+ )[-(max_seq_len - 1):]
37
+
38
+ x = tokenizer(new_prompt).data['input_ids']
39
+ x = (torch.tensor(x, dtype=torch.long, device=device)[None, ...])
40
+
41
+ res_y = model.generate(x, tokenizer.eos_token_id, max_new_tokens=max_seq_len, temperature=temperature, top_k=top_k, stream=stream)
42
+
43
+ try:
44
+ y = next(res_y)
45
+ except StopIteration:
46
+ # print("No answer")
47
+ return ""
48
+
49
+ ret = []
50
+ history_idx = 0
51
+ while y != None:
52
+ answer = tokenizer.decode(y[0].tolist())
53
+ if answer and answer[-1] == '�':
54
+ try:
55
+ y = next(res_y)
56
+ except:
57
+ break
58
+ continue
59
+ # print(answer)
60
+ if not len(answer):
61
+ try:
62
+ y = next(res_y)
63
+ except:
64
+ break
65
+ continue
66
+
67
+ # print(answer[history_idx:], end='', flush=True)
68
+ ret.append(answer[history_idx:])
69
+ try:
70
+ y = next(res_y)
71
+ except:
72
+ break
73
+ history_idx = len(answer)
74
+ if not stream:
75
+ break
76
+
77
+ # print('\n')
78
+ ret.append('\n')
79
+
80
+ return ''.join(ret)
81
 
82
 
83
  @app.get("/infer_t5")
84
  def t5(input):
85
+ # output = pipe_flan(input)
86
+ # return {"output": output[0]["generated_text"]}
87
+ output = query(input)
88
+ return {"output": output}
89
 
90
 
91
  app.mount("/", StaticFiles(directory="static", html=True), name="static")