Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Minseok Bae
commited on
Commit
·
b46b972
1
Parent(s):
dbcffd4
Added citations
Browse files- main_backend.py +5 -5
- src/backend/model_operations.py +3 -0
- src/display/about.py +30 -0
main_backend.py
CHANGED
|
@@ -75,12 +75,12 @@ def run_auto_eval(args):
|
|
| 75 |
else:
|
| 76 |
eval_request = manage_requests.EvalRequest(
|
| 77 |
model=args.model,
|
| 78 |
-
status=PENDING_STATUS,
|
| 79 |
precision=args.precision
|
| 80 |
)
|
| 81 |
pp.pprint(eval_request)
|
| 82 |
logging.info("Running reproducibility eval")
|
| 83 |
-
|
| 84 |
run_eval_suite.run_evaluation(
|
| 85 |
eval_request=eval_request,
|
| 86 |
local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
|
|
@@ -93,14 +93,14 @@ def run_auto_eval(args):
|
|
| 93 |
|
| 94 |
def main():
|
| 95 |
parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature")
|
| 96 |
-
|
| 97 |
# Optional arguments
|
| 98 |
parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results")
|
| 99 |
parser.add_argument("--model", type=str, default=None, help="Your Model ID")
|
| 100 |
parser.add_argument("--precision", type=str, default="float16", help="Precision of your model")
|
| 101 |
-
|
| 102 |
args = parser.parse_args()
|
| 103 |
-
|
| 104 |
run_auto_eval(args)
|
| 105 |
|
| 106 |
|
|
|
|
| 75 |
else:
|
| 76 |
eval_request = manage_requests.EvalRequest(
|
| 77 |
model=args.model,
|
| 78 |
+
status=PENDING_STATUS,
|
| 79 |
precision=args.precision
|
| 80 |
)
|
| 81 |
pp.pprint(eval_request)
|
| 82 |
logging.info("Running reproducibility eval")
|
| 83 |
+
|
| 84 |
run_eval_suite.run_evaluation(
|
| 85 |
eval_request=eval_request,
|
| 86 |
local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
|
|
|
|
| 93 |
|
| 94 |
def main():
|
| 95 |
parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature")
|
| 96 |
+
|
| 97 |
# Optional arguments
|
| 98 |
parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results")
|
| 99 |
parser.add_argument("--model", type=str, default=None, help="Your Model ID")
|
| 100 |
parser.add_argument("--precision", type=str, default="float16", help="Precision of your model")
|
| 101 |
+
|
| 102 |
args = parser.parse_args()
|
| 103 |
+
|
| 104 |
run_auto_eval(args)
|
| 105 |
|
| 106 |
|
src/backend/model_operations.py
CHANGED
|
@@ -8,6 +8,7 @@ import pandas as pd
|
|
| 8 |
import spacy
|
| 9 |
# from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 10 |
from sentence_transformers import CrossEncoder
|
|
|
|
| 11 |
from litellm import completion
|
| 12 |
|
| 13 |
import src.backend.util as util
|
|
@@ -22,6 +23,8 @@ nlp = spacy.load("en_core_web_sm")
|
|
| 22 |
|
| 23 |
os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
|
| 24 |
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def load_evaluation_model(model_path):
|
| 27 |
"""Load the evaluation model from the given path
|
|
|
|
| 8 |
import spacy
|
| 9 |
# from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 10 |
from sentence_transformers import CrossEncoder
|
| 11 |
+
import litellm
|
| 12 |
from litellm import completion
|
| 13 |
|
| 14 |
import src.backend.util as util
|
|
|
|
| 23 |
|
| 24 |
os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
|
| 25 |
|
| 26 |
+
litellm.set_verbose=True
|
| 27 |
+
|
| 28 |
|
| 29 |
def load_evaluation_model(model_path):
|
| 30 |
"""Load the evaluation model from the given path
|
src/display/about.py
CHANGED
|
@@ -147,4 +147,34 @@ Make sure you have followed the above steps first.
|
|
| 147 |
|
| 148 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 149 |
CITATION_BUTTON_TEXT = r"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
"""
|
|
|
|
| 147 |
|
| 148 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 149 |
CITATION_BUTTON_TEXT = r"""
|
| 150 |
+
# This CITATION.cff file was generated with cffinit.
|
| 151 |
+
# Visit https://bit.ly/cffinit to generate yours today!
|
| 152 |
+
|
| 153 |
+
cff-version: 1.2.0
|
| 154 |
+
title: Vectara Hallucination Leaderboard
|
| 155 |
+
message: >-
|
| 156 |
+
If you use this dataset, please cite it using the metadata
|
| 157 |
+
from this file.
|
| 158 |
+
type: dataset
|
| 159 |
+
authors:
|
| 160 |
+
- email: [email protected]
|
| 161 |
+
given-names: Simon
|
| 162 |
+
family-names: Hughes
|
| 163 |
+
- given-names: Minseok
|
| 164 |
+
family-names: Bae
|
| 165 |
+
email: [email protected]
|
| 166 |
+
repository-code: 'https://github.com/vectara/hallucination-leaderboard'
|
| 167 |
+
url: >-
|
| 168 |
+
https://github.com/vectara/hallucination-leaderboard/blob/main/README.md
|
| 169 |
+
abstract: >-
|
| 170 |
+
A leaderboard comparing LLM performance at maintaining
|
| 171 |
+
factual consistency when summarizing a set of facts.
|
| 172 |
+
keywords:
|
| 173 |
+
- nlp
|
| 174 |
+
- llm
|
| 175 |
+
- hallucination
|
| 176 |
+
- nli
|
| 177 |
+
- machine learning
|
| 178 |
+
license: Apache-2.0
|
| 179 |
+
date-released: '2023-11-01'
|
| 180 |
"""
|