Spaces:

xin
/

PatentSolver

Build error

PatentSolver / App /bin /SentenceClassifier.py

initial commit

22738ca almost 4 years ago

1.9 kB

	# -- coding: utf-8 --


	import nltk
	from App.bin import constants

	class SentenceClassifier(object):
	def __init__(self, sentence):
	self.sentence = sentence
	print("Classification....")


	def classifySentence(self):

	sentence = self.sentence

	def bagOfWords(labelled):
	wordsList = []
	for (words, sentiment) in labelled:
	wordsList.extend(words)
	return wordsList

	def wordFeatures(wordList):
	wordList = nltk.FreqDist(wordList)
	wordFeatures = wordList.keys()
	return wordFeatures

	def extract_Features(doc):
	docWords = set(doc)
	feat = {}
	for word in wordFeatures:
	feat['contains(%s)' % word] = (word in docWords)
	return feat


	with open(constants.ASSETS+"trainingsNegative") as l:
	problems = [tuple(map(str, i.strip().split(':'))) for i in l]
	with open(constants.ASSETS+"trainingsPositive") as f:
	solutions = [tuple(map(str, i.strip().split(':'))) for i in f]

	labelled = []
	for (words, polarity) in solutions + problems:
	words_filtered = [e.lower() for e in nltk.word_tokenize(words) if len(e) >= 3]
	labelled.append((words_filtered, polarity))



	wordFeatures = wordFeatures(bagOfWords(labelled))

	training_set = nltk.classify.apply_features(extract_Features, labelled)

	classifier = nltk.NaiveBayesClassifier.train(training_set)

	#print(classifier.show_most_informative_features(32))


	#print (sentence)
	#print("{0} \n Polarity: {1} \n".format(sentence, classifier.classify(extract_Features(sentence.split()))))
	classes = classifier.classify(extract_Features(sentence.split()))
	return classes