Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from transformers import pipeline | |
from huggingface_hub import InferenceClient | |
import os | |
import random | |
import logging | |
import openai # OpenAI API๋ฅผ ์ฌ์ฉํ๊ธฐ ์ํด ์ถ๊ฐ | |
# ๋ก๊น ์ค์ | |
logging.basicConfig(filename='language_model_playground.log', level=logging.DEBUG, | |
format='%(asctime)s - %(levelname)s - %(message)s') | |
# ๋ชจ๋ธ ๋ชฉ๋ก | |
MODELS = { | |
"Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta", | |
"DeepSeek Coder V2": "deepseek-ai/DeepSeek-Coder-V2-Instruct", | |
"Meta Llama 3.1 8B": "meta-llama/Meta-Llama-3.1-8B-Instruct", | |
"Meta-Llama 3.1 70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct", | |
"Microsoft": "microsoft/Phi-3-mini-4k-instruct", | |
"Mixtral 8x7B": "mistralai/Mistral-7B-Instruct-v0.3", | |
"Mixtral Nous-Hermes": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", | |
"Cohere Command R+": "CohereForAI/c4ai-command-r-plus", | |
"Aya-23-35B": "CohereForAI/aya-23-35B", | |
"GPT-4o Mini": "gpt-4o-mini" # GPT-4o Mini ๋ชจ๋ธ ์ถ๊ฐ | |
} | |
# HuggingFace ํ ํฐ ์ค์ | |
hf_token = os.getenv("HF_TOKEN") | |
if not hf_token: | |
raise ValueError("HF_TOKEN ํ๊ฒฝ ๋ณ์๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
# OpenAI API ํด๋ผ์ด์ธํธ ์ค์ | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
def call_hf_api(prompt, reference_text, max_tokens, temperature, top_p, model): | |
if model == "gpt-4o-mini": | |
return call_openai_api(prompt, max_tokens, temperature, top_p) | |
client = InferenceClient(model=model, token=hf_token) | |
combined_prompt = f"{prompt}\n\n์ฐธ๊ณ ํ ์คํธ:\n{reference_text}" | |
random_seed = random.randint(0, 1000000) | |
try: | |
response = client.text_generation( | |
combined_prompt, | |
max_new_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
seed=random_seed | |
) | |
return response | |
except Exception as e: | |
logging.error(f"HuggingFace API ํธ์ถ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}") | |
return f"์๋ต ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}. ๋์ค์ ๋ค์ ์๋ํด ์ฃผ์ธ์." | |
def call_openai_api(prompt, max_tokens, temperature, top_p): | |
try: | |
response = openai.ChatCompletion.create( | |
model="gpt-4o-mini", | |
messages=[ | |
{"role": "user", "content": prompt}, | |
], | |
max_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
) | |
return response.choices[0].message['content'] | |
except Exception as e: | |
logging.error(f"OpenAI API ํธ์ถ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}") | |
return f"OpenAI ์๋ต ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}. ๋์ค์ ๋ค์ ์๋ํด ์ฃผ์ธ์." | |
def generate_response(prompt, reference_text, max_tokens, temperature, top_p, model): | |
response = call_hf_api(prompt, reference_text, max_tokens, temperature, top_p, MODELS[model]) | |
response_html = f""" | |
<h3>์์ฑ๋ ์๋ต:</h3> | |
<div style='max-height: 500px; overflow-y: auto; white-space: pre-wrap; word-wrap: break-word;'> | |
{response} | |
</div> | |
""" | |
return response_html | |
# ๋ฆฌ๋ทฐ ํ์ผ ์ฒ๋ฆฌ ํจ์ | |
def process_reviews(file): | |
df = pd.read_excel(file.name) | |
if 'review' not in df.columns: | |
return "๋ฆฌ๋ทฐ ํ์ผ์ 'review' ์ด์ด ์์ต๋๋ค. ์ฌ๋ฐ๋ฅธ ํ์ผ์ ์ ๋ก๋ํ์ธ์." | |
sentiment_analyzer = pipeline("sentiment-analysis") | |
reviews = df['review'].tolist() | |
# ๊ฐ์ฑ ๋ถ์ ์ํ | |
sentiments = sentiment_analyzer(reviews) | |
# ๊ธ์ ๋ฐ ๋ถ์ ๋ฆฌ๋ทฐ ํํฐ๋ง | |
positive_reviews = [r['review'] for r, s in zip(reviews, sentiments) if s['label'] == 'POSITIVE'][:10] | |
negative_reviews = [r['review'] for r, s in zip(reviews, sentiments) if s['label'] == 'NEGATIVE'][:10] | |
# ๋ถ์ ๊ฒฐ๊ณผ ์์ฝ | |
total_reviews = len(reviews) | |
positive_count = len([s for s in sentiments if s['label'] == 'POSITIVE']) | |
negative_count = len([s for s in sentiments if s['label'] == 'NEGATIVE']) | |
analysis_summary = f"์ด ๋ฆฌ๋ทฐ ์: {total_reviews}, ๊ธ์ ๋ฆฌ๋ทฐ ์: {positive_count}, ๋ถ์ ๋ฆฌ๋ทฐ ์: {negative_count}" | |
return "\n".join(positive_reviews), "\n".join(negative_reviews), analysis_summary | |
# Gradio ์ธํฐํ์ด์ค ์ค์ | |
with gr.Blocks() as demo: | |
gr.Markdown("## ์ธ์ด ๋ชจ๋ธ ํ๋กฌํํธ ํ๋ ์ด๊ทธ๋ผ์ด๋") | |
with gr.Column(): | |
model_radio = gr.Radio(choices=list(MODELS.keys()), value="Zephyr 7B Beta", label="์ธ์ด ๋ชจ๋ธ ์ ํ") | |
prompt_input = gr.Textbox(label="ํ๋กฌํํธ ์ ๋ ฅ", lines=5) | |
reference_text_input = gr.Textbox(label="์ฐธ๊ณ ํ ์คํธ ์ ๋ ฅ", lines=5) | |
with gr.Row(): | |
max_tokens_slider = gr.Slider(minimum=0, maximum=5000, value=2000, step=100, label="์ต๋ ํ ํฐ ์") | |
temperature_slider = gr.Slider(minimum=0, maximum=1, value=0.75, step=0.05, label="์จ๋") | |
top_p_slider = gr.Slider(minimum=0, maximum=1, value=0.95, step=0.05, label="Top P") | |
generate_button = gr.Button("์๋ต ์์ฑ") | |
response_output = gr.HTML(label="์์ฑ๋ ์๋ต") | |
# ๋ฆฌ๋ทฐ ํ์ผ ์ ๋ก๋ ๋ฉ๋ด ์ถ๊ฐ | |
file_input = gr.File(label="๋ฆฌ๋ทฐ ์์ ํ์ผ ์ ๋ก๋") | |
positive_reviews_output = gr.Textbox(label="๋ํ ๊ธ์ ๋ฆฌ๋ทฐ 10๊ฐ", lines=10, interactive=False) | |
negative_reviews_output = gr.Textbox(label="๋ํ ๋ถ์ ๋ฆฌ๋ทฐ 10๊ฐ", lines=10, interactive=False) | |
analysis_output = gr.Textbox(label="๋ถ์ ๊ฒฐ๊ณผ", interactive=False) | |
# ๋ฆฌ๋ทฐ ํ์ผ ์ฒ๋ฆฌ ๋ฒํผ | |
file_input.change( | |
process_reviews, | |
inputs=file_input, | |
outputs=[positive_reviews_output, negative_reviews_output, analysis_output] | |
) | |
# ๋ฒํผ ํด๋ฆญ ์ ์๋ต ์์ฑ | |
generate_button.click( | |
generate_response, | |
inputs=[prompt_input, reference_text_input, max_tokens_slider, temperature_slider, top_p_slider, model_radio], | |
outputs=response_output | |
) | |
# ์ธํฐํ์ด์ค ์คํ | |
demo.launch(share=True) |