Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -19,14 +19,26 @@ model = AutoModelForCausalLM.from_pretrained(model_path) | |
| 19 | 
             
            generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
         | 
| 20 |  | 
| 21 | 
             
            @app.post("/generate")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 22 | 
             
            def generate_text(input: ModelInput):
         | 
| 23 | 
             
                try:
         | 
| 24 | 
            -
                     | 
| 25 | 
            -
             | 
| 26 | 
            -
                        max_new_tokens=input.max_new_tokens,
         | 
| 27 | 
            -
                        return_full_text=False,
         | 
| 28 | 
            -
                    )
         | 
| 29 | 
            -
                    return {"generated_text": result[0]["generated_text"]}
         | 
| 30 | 
             
                except Exception as e:
         | 
| 31 | 
             
                    raise HTTPException(status_code=500, detail=str(e))
         | 
| 32 |  | 
|  | |
| 19 | 
             
            generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
         | 
| 20 |  | 
| 21 | 
             
            @app.post("/generate")
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            def generate_response(model, tokenizer, instruction):
         | 
| 24 | 
            +
                """Generate a response from the model based on an instruction."""
         | 
| 25 | 
            +
                messages = [{"role": "user", "content": instruction}]
         | 
| 26 | 
            +
                input_text = tokenizer.apply_chat_template(
         | 
| 27 | 
            +
                    messages, tokenize=False, add_generation_prompt=True
         | 
| 28 | 
            +
                )
         | 
| 29 | 
            +
                inputs = tokenizer.encode(input_text, return_tensors="pt")
         | 
| 30 | 
            +
                outputs = model.generate(
         | 
| 31 | 
            +
                    inputs, max_new_tokens=128, temperature=0.2, top_p=0.9, do_sample=True
         | 
| 32 | 
            +
                )
         | 
| 33 | 
            +
                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         | 
| 34 | 
            +
                return response
         | 
| 35 | 
            +
             | 
| 36 | 
            +
             | 
| 37 | 
            +
             | 
| 38 | 
             
            def generate_text(input: ModelInput):
         | 
| 39 | 
             
                try:
         | 
| 40 | 
            +
                    response = generate_response(model, tokenizer, ModelInput)
         | 
| 41 | 
            +
                    return response}
         | 
|  | |
|  | |
|  | |
|  | |
| 42 | 
             
                except Exception as e:
         | 
| 43 | 
             
                    raise HTTPException(status_code=500, detail=str(e))
         | 
| 44 |  | 
 
			
