stats-nerd / hypothesis.py
VinayNR's picture
Upload hypothesis.py
f5c1d3a
import scipy.stats as stats
def isfloat(text):
try:
# Attempt to convert the string to a float
fl = float(text)
return True
except ValueError:
return False
def extract_p_val(text):
parts = text.split(' ')
return parts[-1], parts[-2]
class HypothesisTest:
def __init__(self, text):
# split text to form the parts of the test reporting
# "t-test ( 37 ) = 1.414"
# initialization
self.test_type = ''
self.test_stat = 0.0
self.df1 = 0
self.tails = 1
parts = text.split(' ')
if parts[0].lower().startswith('t'):
self.test_type = 't'
elif parts[0].lower().startswith('z'):
self.test_type = 'z'
else:
raise Exception('Failed to parse the test')
for part in parts:
if isfloat(part):
if part.isdigit():
self.df1 = part
else:
self.test_stat = part
@property
def reported_p_val(self):
return self._reported_p_val
@reported_p_val.setter
def reported_p_val(self, value):
# Add any validation or processing logic here
self._reported_p_val = value
@property
def reported_p_val_dir(self):
return self._reported_p_val_dir
@reported_p_val_dir.setter
def reported_p_val_dir(self, dirn):
# Add any validation or processing logic here
if dirn in ['<', '>', '=']:
self._reported_p_val_dir = dirn
else:
print("The direction can be one of <, >, =")
def calculate_p_val(self):
if self.test_type == 't':
return self.tails*(1 - stats.t.cdf(abs(float(self.test_stat)), df=int(self.df1)))
elif self.test_type == 'z':
return self.tails*(1 - stats.norm.cdf(abs(float(self.test_stat))))
@staticmethod
def get_reported_stat_tests(sentence):
tests = []
# group tests with p values
labeled_entities = sentence.get_labels('ner')
for idx, entity in enumerate(labeled_entities):
if entity.value == 'T':
try:
test = HypothesisTest(entity.data_point.text)
# get the p-value closest to this span
# assume this to be at the next index in the list
p_val_span = labeled_entities[idx+1].data_point.text
p_val, dirn = extract_p_val(p_val_span)
test.reported_p_val = p_val
test.reported_p_val_dir = dirn
tests.append(test)
except:
# print('Not a test!')
pass
return tests
def __str__(self):
return "Test Type : " + self.test_type + " | " + "Test Stat : " + str(self.test_stat) + " | " + "DF : " + str(self.df1) + " | " + "Rep P-val : " + str(self._reported_p_val_dir) + str(self.reported_p_val)