Spaces:

zeeshan391
/

fast_api_deploy

Sleeping

zeeshan391 commited on Sep 13, 2024

Commit

0ab9f12

verified ·

1 Parent(s): c363d1a

updated

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-# from langchain_community.llms import LlamaCpp
 from huggingface_hub.file_download import http_get
 from llama_cpp import Llama
 from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
@@ -45,25 +45,25 @@ def load_model(
     #     n_ctx=1024
     # )
-    # model = LlamaCpp(
-    #     model_path=final_model_path,
-    #     temperature=0.3,
-    #     max_tokens=2000,
-    #     top_p=1,
-    #     n_ctx=1024,
-    #     callback_manager=callback_manager,
-    #     verbose=True,
-    # )
-    model = Llama(
         model_path=final_model_path,
         temperature=0.3,
         max_tokens=2000,
         n_ctx=1024,
-        # n_threads=8,
-        echo=False
     )
     print("Model loaded!")
     return model

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from langchain_community.llms import LlamaCpp
 from huggingface_hub.file_download import http_get
 from llama_cpp import Llama
 from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
     #     n_ctx=1024
     # )
+    model = LlamaCpp(
         model_path=final_model_path,
         temperature=0.3,
         max_tokens=2000,
+        top_p=1,
         n_ctx=1024,
+        callback_manager=callback_manager,
+        verbose=True,
     )
+    # model = Llama(
+    #     model_path=final_model_path,
+    #     temperature=0.3,
+    #     max_tokens=2000,
+    #     n_ctx=1024,
+    #     # n_threads=8,
+    #     echo=False
+    # )
     print("Model loaded!")
     return model