|
from flask import Flask, request, jsonify |
|
import requests |
|
from typing import List, Dict, Union |
|
import json |
|
|
|
app = Flask(__name__) |
|
models = ['cognitivecomputations/dolphin-2.6-mixtral-8x7b', 'databricks/dbrx-instruct', 'google/gemma-1.1-7b-it', 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1', 'lizpreciatior/lzlv_70b_fp16_hf', 'meta-llama/Meta-Llama-3-70B-Instruct', 'meta-llama/Meta-Llama-3-8B-Instruct', 'microsoft/WizardLM-2-7B', 'microsoft/WizardLM-2-8x22B', 'mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mixtral-8x22B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'openchat/openchat-3.6-8b'] |
|
class LLM: |
|
def __init__(self, model: str): |
|
self.model = model |
|
self.conversation_history = [{"role": "system", "content": "You are a Helpful AI."}] |
|
|
|
def chat(self, messages: List[Dict[str, str]], system_message: str = None) -> Union[str, None]: |
|
if system_message is not None: |
|
self.conversation_history.insert(0, {"role": "system", "content": system_message}) |
|
all_messages = self.conversation_history + messages |
|
|
|
url = "https://api.deepinfra.com/v1/openai/chat/completions" |
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', |
|
'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', |
|
'Cache-Control': 'no-cache', |
|
'Connection': 'keep-alive', |
|
'Content-Type': 'application/json', |
|
'Origin': 'https://deepinfra.com', |
|
'Pragma': 'no-cache', |
|
'Referer': 'https://deepinfra.com/', |
|
'Sec-Fetch-Dest': 'empty', |
|
'Sec-Fetch-Mode': 'cors', |
|
'Sec-Fetch-Site': 'same-site', |
|
'X-Deepinfra-Source': 'web-embed', |
|
'accept': 'text/event-stream', |
|
'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"', |
|
'sec-ch-ua-mobile': '?0', |
|
'sec-ch-ua-platform': '"macOS"' |
|
} |
|
data = json.dumps( |
|
{ |
|
'model': self.model, |
|
'messages': all_messages, |
|
'temperature': 0.7, |
|
'max_tokens': 8028, |
|
'stop': [], |
|
'stream': False |
|
}, separators=(',', ':') |
|
) |
|
try: |
|
result = requests.post(url=url, data=data, headers=headers) |
|
return result.json()['choices'][0]['message']['content'] |
|
except: |
|
return None |
|
|
|
def GenerativeIO(text, Model, System_Prompt): |
|
llm = LLM(model=Model) |
|
messages = [ |
|
{"role": "system", "content": text}, |
|
{"role": "user", "content": System_Prompt} |
|
] |
|
response = llm.chat(messages) |
|
return response |
|
|
|
@app.route('/generate', methods=['POST']) |
|
def generate(): |
|
data = request.get_json() |
|
text = data.get('text') |
|
Model = data.get('Model') |
|
System_Prompt = data.get('System_Prompt') |
|
response = GenerativeIO(text, Model, System_Prompt) |
|
return jsonify({'response': response}) |
|
|
|
@app.route('/models', methods=['GET']) |
|
def get_models(): |
|
return jsonify(models) |
|
|
|
if __name__ == '__main__': |
|
app.run() |
|
|