Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -104,6 +104,9 @@ app.add_middleware(RateLimitMiddleware, requests_per_second=2)
|
|
| 104 |
secret_api_endpoint = os.getenv('SECRET_API_ENDPOINT')
|
| 105 |
secret_api_endpoint_2 = os.getenv('SECRET_API_ENDPOINT_2')
|
| 106 |
secret_api_endpoint_3 = os.getenv('SECRET_API_ENDPOINT_3') # New endpoint for searchgpt
|
|
|
|
|
|
|
|
|
|
| 107 |
image_endpoint = os.getenv("IMAGE_ENDPOINT")
|
| 108 |
ENDPOINT_ORIGIN = os.getenv('ENDPOINT_ORIGIN')
|
| 109 |
|
|
@@ -244,60 +247,62 @@ async def return_models():
|
|
| 244 |
return await get_models()
|
| 245 |
server_status = True
|
| 246 |
@app.post("/chat/completions")
|
| 247 |
-
@app.post("api/v1/chat/completions")
|
| 248 |
-
async def get_completion(payload: Payload, request: Request,authenticated: bool = Depends(verify_api_key)):
|
| 249 |
# Check server status
|
| 250 |
-
|
| 251 |
-
|
| 252 |
model_to_use = payload.model if payload.model else "gpt-4o-mini"
|
| 253 |
-
|
| 254 |
# Validate model availability
|
| 255 |
if model_to_use not in available_model_ids:
|
| 256 |
raise HTTPException(
|
| 257 |
status_code=400,
|
| 258 |
detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
|
| 259 |
)
|
| 260 |
-
|
| 261 |
usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
|
| 262 |
-
|
| 263 |
# Prepare payload
|
| 264 |
payload_dict = payload.dict()
|
| 265 |
payload_dict["model"] = model_to_use
|
| 266 |
-
|
| 267 |
# Select the appropriate endpoint
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
# Current time and IP logging
|
| 271 |
current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
|
| 272 |
aaip = request.client.host
|
| 273 |
-
print(f"Time: {current_time}, {aaip}
|
| 274 |
-
print(payload_dict)
|
|
|
|
| 275 |
if not server_status:
|
| 276 |
return JSONResponse(
|
| 277 |
status_code=503,
|
| 278 |
content={"message": "Server is under maintenance. Please try again later."}
|
| 279 |
)
|
| 280 |
-
|
|
|
|
|
|
|
| 281 |
async def stream_generator(payload_dict):
|
| 282 |
-
|
| 283 |
-
# Prepare custom headers
|
| 284 |
-
custom_headers = {
|
| 285 |
-
'DNT': '1',
|
| 286 |
-
# 'Origin': ENDPOINT_ORIGIN,
|
| 287 |
-
'Priority': 'u=1, i',
|
| 288 |
-
# 'Referer': ENDPOINT_ORIGIN
|
| 289 |
-
}
|
| 290 |
-
|
| 291 |
try:
|
| 292 |
-
# Send POST request
|
| 293 |
response = scraper.post(
|
| 294 |
-
f"{endpoint}/v1/chat/completions",
|
| 295 |
-
json=payload_dict,
|
| 296 |
headers=custom_headers,
|
| 297 |
stream=True
|
| 298 |
)
|
| 299 |
-
|
| 300 |
-
#
|
| 301 |
if response.status_code == 422:
|
| 302 |
raise HTTPException(status_code=422, detail="Unprocessable entity. Check your payload.")
|
| 303 |
elif response.status_code == 400:
|
|
@@ -308,20 +313,19 @@ async def get_completion(payload: Payload, request: Request,authenticated: bool
|
|
| 308 |
raise HTTPException(status_code=404, detail="The requested resource was not found.")
|
| 309 |
elif response.status_code >= 500:
|
| 310 |
raise HTTPException(status_code=500, detail="Server error. Try again later.")
|
| 311 |
-
|
| 312 |
# Stream response lines to the client
|
| 313 |
for line in response.iter_lines():
|
| 314 |
if line:
|
| 315 |
yield line.decode('utf-8') + "\n"
|
|
|
|
| 316 |
except requests.exceptions.RequestException as req_err:
|
| 317 |
-
# Handle request-specific errors
|
| 318 |
print(response.text)
|
| 319 |
raise HTTPException(status_code=500, detail=f"Request failed: {req_err}")
|
| 320 |
except Exception as e:
|
| 321 |
-
# Handle unexpected errors
|
| 322 |
print(response.text)
|
| 323 |
raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
|
| 324 |
-
|
| 325 |
return StreamingResponse(stream_generator(payload_dict), media_type="application/json")
|
| 326 |
# Remove the duplicated endpoint and combine the functionality
|
| 327 |
@app.api_route("/images/generations", methods=["GET", "POST"]) # Support both GET and POST
|
|
|
|
| 104 |
secret_api_endpoint = os.getenv('SECRET_API_ENDPOINT')
|
| 105 |
secret_api_endpoint_2 = os.getenv('SECRET_API_ENDPOINT_2')
|
| 106 |
secret_api_endpoint_3 = os.getenv('SECRET_API_ENDPOINT_3') # New endpoint for searchgpt
|
| 107 |
+
mistral_api = "https://api.mistral.ai/v1"
|
| 108 |
+
mistral_key = os.getenv('MISTRAL_KEY')
|
| 109 |
+
mistral_models = ['mistral-saba-latest','mistral-small-latest','pixtral-large-latest']
|
| 110 |
image_endpoint = os.getenv("IMAGE_ENDPOINT")
|
| 111 |
ENDPOINT_ORIGIN = os.getenv('ENDPOINT_ORIGIN')
|
| 112 |
|
|
|
|
| 247 |
return await get_models()
|
| 248 |
server_status = True
|
| 249 |
@app.post("/chat/completions")
|
| 250 |
+
@app.post("/api/v1/chat/completions")
|
| 251 |
+
async def get_completion(payload: Payload, request: Request, authenticated: bool = Depends(verify_api_key)):
|
| 252 |
# Check server status
|
|
|
|
|
|
|
| 253 |
model_to_use = payload.model if payload.model else "gpt-4o-mini"
|
| 254 |
+
|
| 255 |
# Validate model availability
|
| 256 |
if model_to_use not in available_model_ids:
|
| 257 |
raise HTTPException(
|
| 258 |
status_code=400,
|
| 259 |
detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
|
| 260 |
)
|
| 261 |
+
|
| 262 |
usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
|
| 263 |
+
|
| 264 |
# Prepare payload
|
| 265 |
payload_dict = payload.dict()
|
| 266 |
payload_dict["model"] = model_to_use
|
| 267 |
+
|
| 268 |
# Select the appropriate endpoint
|
| 269 |
+
if model_to_use in mistral_models:
|
| 270 |
+
endpoint = mistral_api
|
| 271 |
+
custom_headers = {
|
| 272 |
+
"Authorization": f"Bearer {mistral_key}"
|
| 273 |
+
}
|
| 274 |
+
elif model_to_use in alternate_models:
|
| 275 |
+
endpoint = secret_api_endpoint_2
|
| 276 |
+
custom_headers = {}
|
| 277 |
+
else:
|
| 278 |
+
endpoint = secret_api_endpoint
|
| 279 |
+
custom_headers = {}
|
| 280 |
+
|
| 281 |
# Current time and IP logging
|
| 282 |
current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
|
| 283 |
aaip = request.client.host
|
| 284 |
+
print(f"Time: {current_time}, {aaip}, {model_to_use}, server status: {server_status}")
|
| 285 |
+
print(payload_dict)
|
| 286 |
+
|
| 287 |
if not server_status:
|
| 288 |
return JSONResponse(
|
| 289 |
status_code=503,
|
| 290 |
content={"message": "Server is under maintenance. Please try again later."}
|
| 291 |
)
|
| 292 |
+
|
| 293 |
+
scraper = cloudscraper.create_scraper()
|
| 294 |
+
|
| 295 |
async def stream_generator(payload_dict):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
try:
|
| 297 |
+
# Send POST request with the correct headers
|
| 298 |
response = scraper.post(
|
| 299 |
+
f"{endpoint}/v1/chat/completions",
|
| 300 |
+
json=payload_dict,
|
| 301 |
headers=custom_headers,
|
| 302 |
stream=True
|
| 303 |
)
|
| 304 |
+
|
| 305 |
+
# Handle response errors
|
| 306 |
if response.status_code == 422:
|
| 307 |
raise HTTPException(status_code=422, detail="Unprocessable entity. Check your payload.")
|
| 308 |
elif response.status_code == 400:
|
|
|
|
| 313 |
raise HTTPException(status_code=404, detail="The requested resource was not found.")
|
| 314 |
elif response.status_code >= 500:
|
| 315 |
raise HTTPException(status_code=500, detail="Server error. Try again later.")
|
| 316 |
+
|
| 317 |
# Stream response lines to the client
|
| 318 |
for line in response.iter_lines():
|
| 319 |
if line:
|
| 320 |
yield line.decode('utf-8') + "\n"
|
| 321 |
+
|
| 322 |
except requests.exceptions.RequestException as req_err:
|
|
|
|
| 323 |
print(response.text)
|
| 324 |
raise HTTPException(status_code=500, detail=f"Request failed: {req_err}")
|
| 325 |
except Exception as e:
|
|
|
|
| 326 |
print(response.text)
|
| 327 |
raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
|
| 328 |
+
|
| 329 |
return StreamingResponse(stream_generator(payload_dict), media_type="application/json")
|
| 330 |
# Remove the duplicated endpoint and combine the functionality
|
| 331 |
@app.api_route("/images/generations", methods=["GET", "POST"]) # Support both GET and POST
|