chat-test / app.py
Yhhxhfh's picture
Update app.py
b3714b6 verified
from flask import Flask, request, jsonify
from vllm import LLM, SamplingParams
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from dotenv import load_dotenv
from huggingface_hub import snapshot_download, notebook_login, upload_folder, create_repo, login
import os
load_dotenv()
app = Flask(__name__)
login(
token=os.getenv("HUGGINGFACE_TOKEN"),
add_to_git_credential=True
)
models = {
"meta-llama": LLM(model="meta-llama/Meta-Llama-3.1-8B"),
"gemma": LLM(model="google/gemma-2-9b"),
"qwen": LLM(model="Qwen/Qwen2-beta-7B"),
"phi": LLM(model="microsoft/phi-2"),
"falcon": LLM(model="bigcode/starcoder")
}
def format_response(responses):
unique_responses = list(set(responses))
if len(unique_responses) > 1:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(unique_responses)
sim_matrix = cosine_similarity(tfidf_matrix)
avg_sim = np.mean(sim_matrix, axis=1)
most_similar_index = np.argmax(avg_sim)
selected_response = unique_responses[most_similar_index]
else:
selected_response = unique_responses[0]
formatted = re.sub(r'\s+', ' ', selected_response).strip()
return formatted.capitalize() + "."
@app.route('/chat', methods=['POST'])
@spaces.GPU
def chat():
data = request.json
user_message = data.get('message', '')
params = SamplingParams(
temperature=0.7,
max_length=50,
top_p=0.9,
num_return_sequences=1
)
responses = []
for model_name in models:
response = models[model_name].generate(user_message, params)
responses.append(response[0])
unified_response = format_response(responses)
return jsonify({'response': unified_response})
if __name__ == '__main__':
app.run(debug=True)