Spaces:

adyen
/

DABstep

Running

App Files Files Community

DABstep / dabstep_benchmark /tests /test_scorer.py

martinigoyanes

initial commit

034ac91 5 months ago

raw

history blame contribute delete

3.3 kB

	import pytest
	from ..evaluation.scorer import question_scorer

	@pytest.mark.parametrize("input1, input2, expected", [
	("42", "42", True),
	("$42.00", "42", True),
	("43", "42", False),
	("10,765", "10765", True),
	("22520", "22520", True)
	])
	def test_numeric_match(input1, input2, expected):
	assert question_scorer(input1, input2) == expected

	@pytest.mark.parametrize("input1, input2, expected", [
	("hello world", "Hello World", True),
	(" sea gull ", "seagull", True),
	("hello", "world", False),
	("Other", "Other", True),
	#("A. Netherlands", "B. Netherlands", False), #ignoring as outlier
	("", "Inditex", False),
	(" ", "Inditex", False),
	("The average transaction amount is 91.85 EUR.", "91.852", True),
	("The top 2 merchants account for approx 59.96% of all transactions.", "0.5996", False),
	("Netherlands", "NL", False),
	("", "", True) # @martini
	])
	def test_string_match(input1, input2, expected):
	assert question_scorer(input1, input2) == expected

	@pytest.mark.parametrize("input1, input2, expected", [
	("1, 2, 3", "1,2,3", True),
	("apple; banana; cherry", "apple;banana;cherry", True),
	("1, 2", "1, 2, 3", False),
	("apple; banana", "apple; banana; cherry", False),
	("uber, spotify, nike, netflix, inditex", "Nike, Netflix, Uber, Inditex, Spotify", True),
	("a, b, c", "['a', 'b', 'c']", True),
	(
	"C: 69.36, F: 77.90, B: 86.22, A: 87.79, D: 94.34, G: 115.23",
	"[C: 69.36, F: 77.9, B: 86.22, A: 87.79, D: 94.34, G: 115.23]",
	True
	),
	(
	"[BE: 85.3, IT: 93.82, FR: 98.57, NL: 99.87, LU: 111.42, SE: 114.36, ES: 134.41, GR: 169.92, ]",
	"BE: 85.3, IT: 93.82, FR: 98.57, NL: 99.87, LU: 111.42, SE: 114.36, ES: 134.41, GR: 169.92",
	True
	)
	])
	def test_list_match(input1, input2, expected):
	assert question_scorer(input1, input2) == expected

	@pytest.mark.parametrize("input1, input2, expected", [
	("42, hello", "42, hello", True),
	("42, world", "42, hello", False),
	])
	def test_mixed_list_match(input1, input2, expected):
	assert question_scorer(input1, input2) == expected

	@pytest.mark.parametrize("input1, input2, expected", [
	("3.14", "3.1483", True),
	("3.14", "3.20", False),
	("1", "1.0", True),
	("1.0", "1", True),
	("0.731495413640441", "0.731495", True),
	("C", "C) both ip_address and email_address", True),
	("0.36706256984345176", "0.3670625698434518", True),
	("$0.10", "$0.10 per retry", True),
	("D", "D) Apples", True),
	("D", "A) Oranges", False),
	("25.0", "0.250", False) #input is not a percentage
	])
	def test_approximate_numeric_match(input1, input2, expected):
	assert question_scorer(input1, input2) == expected

	@pytest.mark.parametrize("input1, input2, expected", [
	("73.15%", "73.1495", True),
	("42%", "42", True),
	("30%", "30.1", True),
	("25", "25%", True),
	("100%", "100", True),
	("0.1%", "0.1", True),
	("73%", "74", False), # This should fail as the difference is too large
	("90%", "89.99971063977545", True),
	("7.79%", "7.787407043027865", True)
	# ("7.787407 %", "0.07787407043027865", True) #TODO FIX

	])
	def test_percentages_match(input1, input2, expected):
	assert question_scorer(input1, input2) == expected