Spaces:
Build error
Build error
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Mon Nov 28 16:02:26 2016 | |
| @author: Achille Souili | |
| """ | |
| import re | |
| import nltk | |
| class ComplexParser(object): | |
| def __init__(self, sentence): | |
| self.sentence = sentence | |
| def extract_parameters(self): | |
| sentence = self.sentence | |
| concept = [] | |
| words = nltk.word_tokenize(sentence) | |
| sentence = nltk.pos_tag(words) | |
| grammar = """CLAUSES: {<DT>?<JJ.*>?<DT><NN><.*>?<VB.*>?<.*>+} | |
| """ | |
| parameter_parser = nltk.RegexpParser(grammar) | |
| tree = parameter_parser.parse(sentence) | |
| for subtree in tree.subtrees(): | |
| if subtree.label() == 'CLAUSES': | |
| #print(subtree) | |
| parameter_candidate = " ".join(word for word, tag in subtree.leaves()) | |
| concept.append(parameter_candidate) | |
| concept = "d".join(concept) | |
| return concept | |
| if __name__ == "__main__": | |
| Paragraph = "in which the surface of diffusion (24) is concave." | |
| words = nltk.word_tokenize(Paragraph) | |
| tagged = nltk.pos_tag(words) | |
| print(tagged) | |
| get_parameter = ComplexParser(Paragraph) | |
| parameters_list = get_parameter.extract_parameters() | |
| print (parameters_list) | |