Spaces:

John6666
/

testpipeline

Runtime error

App Files Files Community

John6666 commited on Nov 28, 2024

Commit

64e19e9

verified ·

1 Parent(s): f23e293

Upload app.py

Browse files

Files changed (1) hide show

app.py +13 -4

app.py CHANGED Viewed

@@ -9,7 +9,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 #model_name = "RickyDeSkywalker/TheoremLlama"
 #model_name = "unsloth/Llama-3.2-1B-Instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model_name = "internlm/internlm2-math-plus-7b"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 #login(HF_TOKEN)
@@ -17,6 +18,10 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
 model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).eval().to(device)
 model = model.eval()
 #generator = pipeline('text-generation', model=model_name, trust_remote_code=True, token=HF_TOKEN)
@@ -25,9 +30,13 @@ model = model.eval()
 def generate_lean4_code(prompt):
     #result = generator(prompt, max_length=1000, num_return_sequences=1)
     #return result[0]['generated_text']
-    response, history = model.chat(tokenizer, prompt, history=[], meta_instruction="")
-    print(response, history)
-    return response
 # Gradio Interface
 title = "Lean 4 Code Generation with TheoremLlama"

 #model_name = "RickyDeSkywalker/TheoremLlama"
 #model_name = "unsloth/Llama-3.2-1B-Instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+#model_name = "internlm/internlm2-math-plus-7b"
+model_name = "deepseek-ai/DeepSeek-Prover-V1.5-RL"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 #login(HF_TOKEN)
 # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
 model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).eval().to(device)
 model = model.eval()
+terminators = [tokenizer.eos_token_id,
+               tokenizer.convert_tokens_to_ids("<|eot_id|>"),
+               tokenizer.convert_tokens_to_ids("<|reserved_special_token_26|>")]
 #generator = pipeline('text-generation', model=model_name, trust_remote_code=True, token=HF_TOKEN)
 def generate_lean4_code(prompt):
     #result = generator(prompt, max_length=1000, num_return_sequences=1)
     #return result[0]['generated_text']
+    #response, history = model.chat(tokenizer, prompt, history=[], meta_instruction="")
+    #print(response, history)
+    #return response
+    input_ids = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(device)
+    results = model.generate(input_ids, max_new_tokens=1024, eos_token_id=terminators, do_sample=True, temperature=0.85, top_p=0.9)
+    result_str = tokenizer.decode(results[0], skip_special_tokens=True)
+    return result_str
 # Gradio Interface
 title = "Lean 4 Code Generation with TheoremLlama"