Spaces:

tclh123
/

fastapi-minimind-v1-small

Runtime error

App Files Files Community

tclh123 commited on 5 days ago

Commit

1f85c85

1 Parent(s): 6be637d

update

Browse files

Files changed (2) hide show

err.log +27 -0
main.py +75 -5

err.log ADDED Viewed

	@@ -0,0 +1,27 @@

+https://huggingface.co/spaces/tclh123/fastapi-minimind-v1-small
+Runtime error
+Exit code: 1. Reason: 1007, in _find_and_load
+  File "<frozen importlib._bootstrap>", line 986, in _find_and_load_unlocked
+  File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
+  File "<frozen importlib._bootstrap_external>", line 850, in exec_module
+  File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
+  File "/home/user/app/./main.py", line 12, in <module>
+    pipe_flan = pipeline("text2text-generation", model=MODEL, trust_remote_code=True)
+  File "/usr/local/lib/python3.9/site-packages/transformers/pipelines/__init__.py", line 940, in pipeline
+    framework, model = infer_framework_load_model(
+  File "/usr/local/lib/python3.9/site-packages/transformers/pipelines/base.py", line 302, in infer_framework_load_model
+    raise ValueError(
+ValueError: Could not load model jingyaogong/minimind-v1-small with any of the following classes: (<class 'transformers.models.auto.modeling_auto.AutoModelForSeq2SeqLM'>,). See the original errors:
+while loading with AutoModelForSeq2SeqLM, an error is thrown:
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.9/site-packages/transformers/pipelines/base.py", line 289, in infer_framework_load_model
+    model = model_class.from_pretrained(model, **kwargs)
+  File "/usr/local/lib/python3.9/site-packages/transformers/models/auto/auto_factory.py", line 567, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers_modules.jingyaogong.minimind-v1-small.fad3b10dd5e251cb6f8050e5a3c8080efbccfdd2.LMConfig.LMConfig'> for this kind of AutoModel: AutoModelForSeq2SeqLM.
+Model type should be one of BartConfig, BigBirdPegasusConfig, BlenderbotConfig, BlenderbotSmallConfig, EncoderDecoderConfig, FSMTConfig, GPTSanJapaneseConfig, LEDConfig, LongT5Config, M2M100Config, MarianConfig, MBartConfig, MT5Config, MvpConfig, NllbMoeConfig, PegasusConfig, PegasusXConfig, PLBartConfig, ProphetNetConfig, Qwen2AudioConfig, SeamlessM4TConfig, SeamlessM4Tv2Config, SwitchTransformersConfig, T5Config, UMT5Config, XLMProphetNetConfig.
+实际 `from transformers import PretrainedConfig`

main.py CHANGED Viewed

@@ -2,20 +2,90 @@ from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
-from transformers import pipeline
 app = FastAPI()
 # MODEL = "google/flan-t5-small"
-MODEL = "jingyaogong/minimind-v1-small"
-pipe_flan = pipeline("text2text-generation", model=MODEL, trust_remote_code=True)
 @app.get("/infer_t5")
 def t5(input):
-    output = pipe_flan(input)
-    return {"output": output[0]["generated_text"]}
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
+import torch
+# from transformers import pipeline
+from transformers import AutoTokenizer, AutoModelForCausalLM
 app = FastAPI()
 # MODEL = "google/flan-t5-small"
+# MODEL = "jingyaogong/minimind-v1-small"
+MODEL = "tclh123/minimind-v1-small"
+# pipe_flan = pipeline("text2text-generation", model=MODEL, trust_remote_code=True)
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(MODEL, trust_remote_code=True)
+model = model.to(device)
+model = model.eval()
+def query(message, max_seq_len=512, temperature=0.7, top_k=16):
+    prompt = '请问，' + message
+    messages = []
+    messages.append({"role": "user", "content": prompt})
+    stream = True
+    # print(messages)
+    new_prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )[-(max_seq_len - 1):]
+    x = tokenizer(new_prompt).data['input_ids']
+    x = (torch.tensor(x, dtype=torch.long, device=device)[None, ...])
+    res_y = model.generate(x, tokenizer.eos_token_id, max_new_tokens=max_seq_len, temperature=temperature, top_k=top_k, stream=stream)
+    try:
+        y = next(res_y)
+    except StopIteration:
+        # print("No answer")
+        return ""
+    ret = []
+    history_idx = 0
+    while y != None:
+        answer = tokenizer.decode(y[0].tolist())
+        if answer and answer[-1] == '�':
+            try:
+                y = next(res_y)
+            except:
+                break
+            continue
+        # print(answer)
+        if not len(answer):
+            try:
+                y = next(res_y)
+            except:
+                break
+            continue
+        # print(answer[history_idx:], end='', flush=True)
+        ret.append(answer[history_idx:])
+        try:
+            y = next(res_y)
+        except:
+            break
+        history_idx = len(answer)
+        if not stream:
+            break
+    # print('\n')
+    ret.append('\n')
+    return ''.join(ret)
 @app.get("/infer_t5")
 def t5(input):
+    # output = pipe_flan(input)
+    # return {"output": output[0]["generated_text"]}
+    output = query(input)
+    return {"output": output}
 app.mount("/", StaticFiles(directory="static", html=True), name="static")