File size: 3,770 Bytes
2aacaa3 685453f 8c51edf 2aacaa3 8c51edf 2aacaa3 8c51edf 2aacaa3 685453f 2aacaa3 685453f 2aacaa3 064d13d 685453f 74a99d1 064d13d 486b72b 064d13d 2aacaa3 8c51edf 2aacaa3 064d13d 685453f 74a99d1 064d13d 486b72b 064d13d 2aacaa3 6a76dc6 2aacaa3 abf5c11 2aacaa3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# Import necessary libraries
import gradio as gr
import numpy as np
import pandas as pd
from rapidfuzz.distance import Levenshtein, JaroWinkler
from sentence_transformers import SentenceTransformer, util
from typing import List
import zipfile
import os
import io
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from matheel.similarity import get_sim_list, calculate_similarity
def calculate_similarity_gradio(code1, code2, Ws, Wl, Wj, model_name):
result = calculate_similarity(code1, code2, Ws, Wl, Wj, model_name)
return "The similarity score between the two codes is: %.2f" % result
def get_sim_list_gradio(zipped_file,Ws, Wl, Wj, model_name,threshold,number_results):
result = get_sim_list(zipped_file,Ws, Wl, Wj, model_name,threshold,number_results)
return result
# Define the Gradio app
with gr.Blocks() as demo:
# Tab for similarity calculation
with gr.Tab("Code Pair Similarity"):
# Input components
code1 = gr.Textbox(label="Code 1")
code2 = gr.Textbox(label="Code 2")
model_dropdown = HuggingfaceHubSearch(
label="Pre-Trained Model to use for Embeddings",
placeholder="Search for Pre-Trained models on Hugging Face",
search_type="model",
)
# Accordion for weights and models
with gr.Accordion("Feature Weights", open=False):
Ws = gr.Slider(0, 1, value=0.7, label="Semantic Search Weight", step=0.1)
Wl = gr.Slider(0, 1, value=0.3, label="Levenshiern Distance Weight", step=0.1)
Wj = gr.Slider(0, 1, value=0.0, label="Jaro Winkler Weight", step=0.1)
# Output component
output = gr.Textbox(label="Similarity Score")
def update_weights(Ws, Wl, Wj):
total = Ws + Wl + Wj
if total != 1:
Wj = 1 - (Ws + Wl)
return Ws, Wl, Wj
# Update weights when any slider changes
Ws.change(update_weights, [Ws, Wl, Wj], [Ws, Wl, Wj])
Wl.change(update_weights, [Ws, Wl, Wj], [Ws, Wl, Wj])
Wj.change(update_weights, [Ws, Wl, Wj], [Ws, Wl, Wj])
# Button to trigger the similarity calculation
calculate_btn = gr.Button("Calculate Similarity")
calculate_btn.click(calculate_similarity_gradio, inputs=[code1, code2, Ws, Wl, Wj, model_dropdown], outputs=output)
# Tab for file upload and DataFrame output
with gr.Tab("Code Collection Pair Similarity"):
# File uploader component
file_uploader = gr.File(label="Upload a Zip file",file_types=[".zip"])
model_dropdown = HuggingfaceHubSearch(
label="Pre-Trained Model to use for Embeddings",
placeholder="Search for Pre-Trained models on Hugging Face",
search_type="model",
)
with gr.Accordion("Feature Weights and Parameters", open=False):
Ws = gr.Slider(0, 1, value=0.7, label="Semantic Search Weight", step=0.1)
Wl = gr.Slider(0, 1, value=0.3, label="Levenshiern Distance Weight", step=0.1)
Wj = gr.Slider(0, 1, value=0.0, label="Jaro Winkler Weight", step=0.1)
threshold = gr.Slider(0, 1, value=0, label="Threshold", step=0.01)
number_results = gr.Slider(1, 1000, value=10, label="Number of Returned pairs", step=1)
# Output component for the DataFrame
df_output = gr.Dataframe(label="Results")
# Button to trigger the file processing
process_btn = gr.Button("Process File")
process_btn.click(get_sim_list, inputs=[file_uploader, Ws, Wl, Wj, model_dropdown,threshold,number_results], outputs=df_output)
# Launch the Gradio app with live=True
demo.launch(show_error=True,debug=True) |