Spaces:
Runtime error
Runtime error
File size: 3,886 Bytes
6c14077 e502d68 0ba78e9 e502d68 0ba78e9 8dec3b6 e502d68 8dec3b6 0ba78e9 6c14077 0ba78e9 0779c9b 6c14077 1e40fe5 6c14077 0779c9b 0ba78e9 1e40fe5 0ba78e9 6c14077 31b9ddb 6c14077 0ba78e9 54f6b18 0779c9b 54f6b18 0ba78e9 675f890 5b19fc7 675f890 e502d68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
from typing import Dict, Union
import jsonlines
import requests
from huggingface_hub import HfApi, ModelFilter, Repository, dataset_info
AUTOTRAIN_TASK_TO_HUB_TASK = {
"binary_classification": "text-classification",
"multi_class_classification": "text-classification",
# "multi_label_classification": "text-classification", # Not fully supported in AutoTrain
"entity_extraction": "token-classification",
"extractive_question_answering": "question-answering",
"translation": "translation",
"summarization": "summarization",
# "single_column_regression": 10,
}
HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
LOGS_REPO = "evaluation-job-logs"
api = HfApi()
def get_auth_headers(token: str, prefix: str = "autonlp"):
return {"Authorization": f"{prefix} {token}"}
def http_post(path: str, token: str, payload=None, domain: str = None, params=None) -> requests.Response:
"""HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
try:
response = requests.post(
url=domain + path,
json=payload,
headers=get_auth_headers(token=token),
allow_redirects=True,
params=params,
)
except requests.exceptions.ConnectionError:
print("β Failed to reach AutoNLP API, check your internet connection")
response.raise_for_status()
return response
def http_get(path: str, domain: str, token: str = None, params: dict = None) -> requests.Response:
"""HTTP POST request to `path`, raises UnreachableAPIError if the API cannot be reached"""
try:
response = requests.get(
url=domain + path,
headers=get_auth_headers(token=token),
allow_redirects=True,
params=params,
)
except requests.exceptions.ConnectionError:
print(f"β Failed to reach {path}, check your internet connection")
response.raise_for_status()
return response
def get_metadata(dataset_name: str) -> Union[Dict, None]:
data = dataset_info(dataset_name)
if data.cardData is not None and "train-eval-index" in data.cardData.keys():
return data.cardData["train-eval-index"]
else:
return None
def get_compatible_models(task, dataset_name):
# TODO: relax filter on PyTorch models once supported in AutoTrain
filt = ModelFilter(
task=AUTOTRAIN_TASK_TO_HUB_TASK[task],
trained_dataset=dataset_name,
library=["transformers", "pytorch"],
)
compatible_models = api.list_models(filter=filt)
return sorted([model.modelId for model in compatible_models])
def get_key(col_mapping, val):
for key, value in col_mapping.items():
if val == value:
return key
return "key doesn't exist"
def format_col_mapping(col_mapping: dict) -> dict:
for k, v in col_mapping["answers"].items():
col_mapping[f"answers.{k}"] = f"answers.{v}"
del col_mapping["answers"]
return col_mapping
def commit_evaluation_log(evaluation_log, hf_access_token=None):
logs_repo_url = f"https://huggingface.co/datasets/autoevaluate/{LOGS_REPO}"
logs_repo = Repository(
local_dir=LOGS_REPO,
clone_from=logs_repo_url,
repo_type="dataset",
private=True,
use_auth_token=hf_access_token,
)
logs_repo.git_pull()
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl") as r:
lines = []
for obj in r:
lines.append(obj)
lines.append(evaluation_log)
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl", mode="w") as writer:
for job in lines:
writer.write(job)
logs_repo.push_to_hub(
commit_message=f"Evaluation submitted with project name {evaluation_log['payload']['proj_name']}"
)
print("INFO -- Pushed evaluation logs to the Hub")
|