|
from flask import Flask, request, jsonify |
|
from vllm import LLM, SamplingParams |
|
import re |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import numpy as np |
|
from dotenv import load_dotenv |
|
from huggingface_hub import snapshot_download, notebook_login, upload_folder, create_repo, login |
|
import os |
|
|
|
load_dotenv() |
|
app = Flask(__name__) |
|
|
|
login( |
|
token=os.getenv("HUGGINGFACE_TOKEN"), |
|
add_to_git_credential=True |
|
) |
|
|
|
models = { |
|
"meta-llama": LLM(model="meta-llama/Meta-Llama-3.1-8B"), |
|
"gemma": LLM(model="google/gemma-2-9b"), |
|
"qwen": LLM(model="Qwen/Qwen2-beta-7B"), |
|
"phi": LLM(model="microsoft/phi-2"), |
|
"falcon": LLM(model="bigcode/starcoder") |
|
} |
|
|
|
def format_response(responses): |
|
unique_responses = list(set(responses)) |
|
if len(unique_responses) > 1: |
|
vectorizer = TfidfVectorizer() |
|
tfidf_matrix = vectorizer.fit_transform(unique_responses) |
|
sim_matrix = cosine_similarity(tfidf_matrix) |
|
avg_sim = np.mean(sim_matrix, axis=1) |
|
most_similar_index = np.argmax(avg_sim) |
|
selected_response = unique_responses[most_similar_index] |
|
else: |
|
selected_response = unique_responses[0] |
|
formatted = re.sub(r'\s+', ' ', selected_response).strip() |
|
return formatted.capitalize() + "." |
|
|
|
@app.route('/chat', methods=['POST']) |
|
@spaces.GPU |
|
def chat(): |
|
data = request.json |
|
user_message = data.get('message', '') |
|
params = SamplingParams( |
|
temperature=0.7, |
|
max_length=50, |
|
top_p=0.9, |
|
num_return_sequences=1 |
|
) |
|
responses = [] |
|
for model_name in models: |
|
response = models[model_name].generate(user_message, params) |
|
responses.append(response[0]) |
|
unified_response = format_response(responses) |
|
return jsonify({'response': unified_response}) |
|
|
|
if __name__ == '__main__': |
|
app.run(debug=True) |
|
|