Spaces:

knowledgator
/

GLiClassReranker

Sleeping

App Files Files Community

alexandrlukashov commited on May 9

Commit

321fb77

verified ·

1 Parent(s): 789fd18

added: compare page

Browse files

Files changed (2) hide show

interfaces/__init__.py +2 -1
interfaces/compare.py +144 -0

interfaces/__init__.py CHANGED Viewed

@@ -1,2 +1,3 @@
 from .landing import landing_interface
-from .main_pipeline import main_pipeline

 from .landing import landing_interface
+from .main_pipeline import main_pipeline
+from .compare import compare_st

interfaces/compare.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import os
+import torch
+import gradio as gr
+from typing import List
+import pandas as pd
+from transformers import AutoTokenizer
+from gliclass import GLiClassModel, ZeroShotClassificationPipeline
+from sentence_transformers import CrossEncoder
+device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+model = GLiClassModel.from_pretrained(os.getenv("GLICLASS_MODEL_PATH")).eval().to(device)
+tokenizer = AutoTokenizer.from_pretrained(os.getenv("GLICLASS_MODEL_PATH"))
+multi_label_pipeline = ZeroShotClassificationPipeline(model, tokenizer, classification_type='multi-label',
+                                                      device=device)
+st = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")
+example_1 = [
+    "I want to live in New York.",
+    'York is a cathedral city in North Yorkshire, England, with Roman origins',
+    'San Francisco,[23] officially the City and County of San Francisco, is a commercial, financial, and cultural center within Northern California, United States.',
+    'New York, often called New York City (NYC),[b] is the most populous city in the United States',
+    "New York City is the third album by electronica group Brazilian Girls, released in 2008.",
+    "New York City was an American R&B vocal group.",
+    "New York City is an album by the Peter Malick Group featuring Norah Jones.",
+    "New York City: The Album is the debut studio album by American rapper Troy Ave. ",
+    '"New York City" is a song by British new wave band The Armoury Show',
+]
+example_2 = [
+    "Looking for waterproof hiking boots that can handle freezing temperatures and rugged terrain.",
+    "TrailMaster X200 – waterproof boots with Vibram Arctic Grip soles, rated for -20°C and rocky paths.",
+    "UrbanStep Sneakers – stylish and breathable, not designed for rugged use or cold weather.",
+    "AlpineShield GTX – Gore-Tex lining, insulated to -15°C, ideal for mountain hiking.",
+    "Desert Trek Sandals – open-toe design, breathable and lightweight, not waterproof.",
+    "SummitPro Winter Boots – fleece-lined, waterproof up to ankle depth, tested to -5°C.",
+    "Marathon Lite – road-running shoes with shock-absorbing soles, non-waterproof.",
+    "TrailMaster X100 – waterproof boots with basic insulation, effective down to 0°C.",
+    "Climber Pro GTX – reinforced toe cap, Gore-Tex membrane, insulated to -20°C, certified for alpine routes."
+]
+example_3 = [
+    "Our users are reporting 504 Gateway Timeout errors when accessing the app during peak hours.",
+    "A 504 Gateway Timeout indicates that a server did not receive a timely response from another server upstream.",
+    "A 502 Bad Gateway occurs when the server, acting as a gateway, receives an invalid response from the upstream server.",
+    "Common causes of 504 errors include high server load, network congestion, or misconfigured backend timeouts.",
+    "A 403 Forbidden error suggests that the server is refusing to authorize the request, often due to permissions.",
+    "To resolve 504 errors, check server logs, backend service availability, and increase timeout settings if necessary.",
+    "A 408 Request Timeout is returned when the client fails to send a complete request in time.",
+    "A 500 Internal Server Error is a generic error indicating that the server encountered an unexpected condition.",
+    "Network latency monitoring tools can help identify bottlenecks that may cause 504 errors during high traffic periods."
+]
+example_4 = [
+    "A 45-year-old male presents with persistent cough, night sweats, low-grade fever, and weight loss over 3 months.",
+    "Lung cancer can cause cough and weight loss; however, it often includes hemoptysis and may show a solitary mass on imaging.",
+    "Bronchiectasis is characterized by chronic productive cough and recurrent infections but usually lacks significant weight loss.",
+    "Pneumonia presents acutely with high fever, productive cough, and may show lobar consolidation on imaging.",
+    "Sarcoidosis may cause cough and weight loss, with bilateral hilar lymphadenopathy seen on chest X-ray.",
+    "Tuberculosis typically presents with chronic cough, night sweats, weight loss, and may show upper lobe infiltrates on chest X-ray.",
+    "Chronic obstructive pulmonary disease (COPD) often involves chronic cough and dyspnea but is less associated with night sweats.",
+    "Fungal lung infections like histoplasmosis can mimic TB symptoms but are more common in specific endemic regions.",
+    "Gastroesophageal reflux disease (GERD) can cause chronic cough, but without systemic symptoms like weight loss or fever."
+]
+example_5 = [
+    "How can I set up a recurring payment for my monthly rent via online banking?",
+    "A standing order allows you to set up automatic fixed-amount payments on a regular schedule (e.g., monthly rent) through your bank.",
+    "A direct debit authorizes a third party to withdraw variable amounts from your account, typically used for utility bills.",
+    "Wire transfers are typically one-off payments that do not recur automatically.",
+    "You can schedule a one-time payment for a future date using the online banking portal, but it won’t repeat monthly.",
+    "Bank-issued cashier’s checks are used for large payments but require manual setup each time.",
+    "To set up recurring credit card payments, navigate to your card provider’s auto-pay settings (note: for card bills only).",
+    "Standing orders can be modified or canceled at any time via your online banking dashboard.",
+    "International transfers may incur additional fees and are not ideal for domestic rent payments."
+]
+def compute_scores(*args):
+    labels = [arg for arg in args[1:]]
+    labels = list(filter(None, labels))
+    query = args[0]
+    ranks_st = st.rank(query, labels)
+    ranks_gliclass = sorted(multi_label_pipeline(query, labels, threshold=0.0)[0], key=lambda x: x["score"], reverse=True)
+    docs_gliclass = []
+    scores_gliclass = []
+    docs_st = []
+    scores_st = []
+    label_to_text = {str(i): label for i, label in enumerate(labels)}
+    for predict in ranks_gliclass:
+        docs_gliclass.append(predict["label"])
+        scores_gliclass.append(round(predict["score"], 2))
+    for predict in ranks_st:
+        doc_id = predict["corpus_id"]
+        docs_st.append(label_to_text.get(str(doc_id), ""))
+        scores_st.append(round(predict["score"], 2))
+    for _ in range(int(os.getenv("MAX_DOCS")) - len(docs_st)):
+        docs_st.append("")
+        scores_st.append("")
+    for _ in range(int(os.getenv("MAX_DOCS")) - len(docs_gliclass)):
+        docs_gliclass.append("")
+        scores_gliclass.append("")
+    return docs_gliclass + scores_gliclass, docs_st + scores_st
+def compute_table(*args):
+    gliclass_results, st_results = compute_scores(*args)
+    max_docs = int(os.getenv("MAX_DOCS"))
+    gliclass_labels = gliclass_results[:max_docs]
+    st_labels = st_results[:max_docs]
+    df = pd.DataFrame({
+        "Rank": list(range(1, max_docs + 1)),
+        "GLiClass Label": gliclass_labels,
+        "CrossEncoder Label": st_labels,
+    })
+    return df
+examples = [
+    example + [""] * (int(os.getenv("MAX_DOCS")) - len(example) - 1) for example in
+    [example_1, example_2, example_3, example_4, example_5]
+]
+with gr.Blocks(title="GLiClass-Reranker") as compare_st:
+    inputs = []
+    query = gr.Textbox(
+        value=examples[0][0], label="Text query", placeholder="Enter your query here", lines=4
+    )
+    labels = [gr.Textbox(value=label, label=f"Label {i+1}") for i, label in enumerate(examples[0][1:])]
+    submit_btn = gr.Button("Compare")
+    result_table = gr.Dataframe(headers=["Rank", "GLiClass Label", "CrossEncoder Label"],
+                                label="Comparison Table",
+                                interactive=False)
+    inputs = [query] + labels
+    submit_btn.click(fn=compute_table, inputs=inputs, outputs=result_table)