web-server

Sleeping

App Files Files Community

pvanand commited on Jul 15, 2024

Commit

adb504f

verified ·

1 Parent(s): 2fc91ef

Update main.py

Browse files

Files changed (1) hide show

main.py +17 -11

main.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from fastapi import FastAPI, HTTPException
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 from typing import Literal
@@ -8,6 +9,10 @@ from openai import OpenAI
 app = FastAPI()
 ModelID = Literal[
     "meta-llama/llama-3-70b-instruct",
     "anthropic/claude-3.5-sonnet",
@@ -42,8 +47,7 @@ def get_api_keys():
 api_keys = get_api_keys()
 or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
-def chat_with_llama_stream(messages, model, max_output_tokens=4000):
     try:
         response = or_client.chat.completions.create(
             model=model,
@@ -51,14 +55,20 @@ def chat_with_llama_stream(messages, model, max_output_tokens=4000):
             max_tokens=max_output_tokens,
             stream=True
         )
         for chunk in response:
             if chunk.choices[0].delta.content is not None:
                 yield chunk.choices[0].delta.content
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
 @app.post("/coding-assistant")
-async def coding_assistant(query: QueryModel):
     """
     Coding assistant endpoint that provides programming help based on user queries.
@@ -70,6 +80,8 @@ async def coding_assistant(query: QueryModel):
     - openai/gpt-3.5-turbo-instruct
     - qwen/qwen-72b-chat
     - google/gemma-2-27b-it
     """
     system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
     messages = [
@@ -81,13 +93,7 @@ async def coding_assistant(query: QueryModel):
         chat_with_llama_stream(messages, model=query.model_id),
         media_type="text/event-stream"
     )
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],)
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+from fastapi import FastAPI, HTTPException, Depends, Security
+from fastapi.security import APIKeyHeader
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 from typing import Literal
 app = FastAPI()
+API_KEY_NAME = "X-API-Key"
+API_KEY = os.environ.get("API_KEY", "default_secret_key")  # Set this in your environment variables
+api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
 ModelID = Literal[
     "meta-llama/llama-3-70b-instruct",
     "anthropic/claude-3.5-sonnet",
 api_keys = get_api_keys()
 or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
+def chat_with_llama_stream(messages, model, max_output_tokens=2500):
     try:
         response = or_client.chat.completions.create(
             model=model,
             max_tokens=max_output_tokens,
             stream=True
         )
         for chunk in response:
             if chunk.choices[0].delta.content is not None:
                 yield chunk.choices[0].delta.content
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
+async def verify_api_key(api_key: str = Security(api_key_header)):
+    if api_key != API_KEY:
+        raise HTTPException(status_code=403, detail="Could not validate credentials")
+    return api_key
 @app.post("/coding-assistant")
+async def coding_assistant(query: QueryModel, api_key: str = Depends(verify_api_key)):
     """
     Coding assistant endpoint that provides programming help based on user queries.
     - openai/gpt-3.5-turbo-instruct
     - qwen/qwen-72b-chat
     - google/gemma-2-27b-it
+    Requires API Key authentication via X-API-Key header.
     """
     system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
     messages = [
         chat_with_llama_stream(messages, model=query.model_id),
         media_type="text/event-stream"
     )
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)