Spaces:

zeeshan391
/

fast_api_deploy

Sleeping

App Files Files Community

zeeshan391 commited on Sep 13, 2024

Commit

3ea1132

verified ·

1 Parent(s): 0ab9f12

updated

Browse files

Files changed (1) hide show

app.py +34 -23

app.py CHANGED Viewed

@@ -45,25 +45,25 @@ def load_model(
     #     n_ctx=1024
     # )
-    model = LlamaCpp(
-        model_path=final_model_path,
-        temperature=0.3,
-        max_tokens=2000,
-        top_p=1,
-        n_ctx=1024,
-        callback_manager=callback_manager,
-        verbose=True,
-    )
-    # model = Llama(
     #     model_path=final_model_path,
     #     temperature=0.3,
     #     max_tokens=2000,
     #     n_ctx=1024,
-    #     # n_threads=8,
-    #     echo=False
     # )
     print("Model loaded!")
     return model
@@ -72,6 +72,8 @@ def load_model(
 llm = load_model()
 # Create a prompt template
 # system = """You are a helpful and creative assistant that specializes in generating engaging and imaginative stories for kids.
 # Based on the user's provided mood, preferred story type, theme, age, and desired story length of 500-600 words, create a unique and captivating story.
@@ -97,19 +99,28 @@ async def generate_story(story_request: StoryRequest):
     - **Theme:** {story_request.theme}
     - **Details Provided:** {story_request.txt}
     """
-    final_prompt = prompt_template.format(text=story)
-    # Create the LLMChain
-    # chain = LLMChain(llm=llm, prompt=prompt_template)
-    chain = llm | prompt_template
-    # try:
-    #     response = chain.invoke(final_prompt)
-    #     return {"story": response}
-    # except Exception as e:
-    #     raise HTTPException(status_code=500, detail=str(e))
-    response = chain.invoke(final_prompt)
     if not response:
         raise HTTPException(status_code=500, detail="Failed to generate the story")

     #     n_ctx=1024
     # )
+    # model = LlamaCpp(
     #     model_path=final_model_path,
     #     temperature=0.3,
     #     max_tokens=2000,
+    #     top_p=1,
     #     n_ctx=1024,
+    #     callback_manager=callback_manager,
+    #     verbose=True,
     # )
+    model = Llama(
+        model_path=final_model_path,
+        temperature=0.3,
+        max_tokens=2000,
+        n_ctx=1024,
+        # n_threads=8,
+        echo=False
+    )
     print("Model loaded!")
     return model
 llm = load_model()
 # Create a prompt template
 # system = """You are a helpful and creative assistant that specializes in generating engaging and imaginative stories for kids.
 # Based on the user's provided mood, preferred story type, theme, age, and desired story length of 500-600 words, create a unique and captivating story.
     - **Theme:** {story_request.theme}
     - **Details Provided:** {story_request.txt}
     """
+    response = llm.create_chat_completion(
+      messages = [
+          {"role": "system", "content": system},
+          {"role": "user","content": story}
+      ]
+    )
+    # final_prompt = prompt_template.format(text=story)
+    # # Create the LLMChain
+    # # chain = LLMChain(llm=llm, prompt=prompt_template)
+    # chain = llm | prompt_template
+    # # try:
+    # #     response = chain.invoke(final_prompt)
+    # #     return {"story": response}
+    # # except Exception as e:
+    # #     raise HTTPException(status_code=500, detail=str(e))
+    # response = chain.invoke(final_prompt)
     if not response:
         raise HTTPException(status_code=500, detail="Failed to generate the story")