dalat5 / src /evaluate_t5.py
crossroderick's picture
Model training update with 10 epochs
d17d151
import json
import evaluate
from tqdm import tqdm
from transformers import pipeline
# Load model pipeline
pipe = pipeline("text2text-generation", model = "crossroderick/dalat5", tokenizer = "crossroderick/dalat5")
# Load metrics
bleu = evaluate.load("bleu")
chrf = evaluate.load("chrf")
# Load JSONL dataset
dataset_path = "src/data/clean_corpus.jsonl"
examples = []
with open(dataset_path, "r", encoding="utf-8") as f:
for line in f:
obj = json.loads(line)
if "transliteration" in obj and "src" in obj["transliteration"] and "tgt" in obj["transliteration"]:
examples.append((obj["transliteration"]["src"], obj["transliteration"]["tgt"]))
# Run predictions
predictions = []
references = []
print(f"Evaluating on {len(examples)} examples...\n")
for src, tgt in tqdm(examples):
input_prompt = f"Cyrillic2Latin: {src}"
output = pipe(input_prompt, max_length = 128, do_sample = False)[0]["generated_text"]
predictions.append(output.strip())
references.append([tgt.strip()]) # wrap in list for BLEU
# Evaluate
bleu_result = bleu.compute(predictions = predictions, references = references)
chrf_result = chrf.compute(predictions = predictions, references = references)
# Print results
print("\nEvaluation results:")
print(f"BLEU Score: {bleu_result['bleu']:.2f}")
print(f"chrF Score: {chrf_result['score']:.2f}")