Spaces:
Build error
Build error
| # -*- coding: utf-8 -*- | |
| import json | |
| import os | |
| import re | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from io import StringIO | |
| from App4api.bin import constants | |
| from collections import OrderedDict | |
| from App4api.bin.InformationExtractor import InformationExtractor | |
| from App4api.bin.ParameterExtractor import ParameterExtractor | |
| from App4api.bin.TechnologyFinder import TechnologyFinder | |
| class PGProcessor(object): | |
| def __init__(self, patents,input_folder, file_extension): | |
| self.patents = patents | |
| self.input_folder = input_folder | |
| self.file_extension = file_extension | |
| print("Processing started") | |
| def process_corpus(self): | |
| count_abstract = 0 | |
| count_claims = 0 | |
| count_description = 0 | |
| count_patent = 0 | |
| total_sentences_number =0 | |
| count_concepts_solupart = 0 | |
| count_concepts_problem = 0 | |
| patents = self.patents | |
| input_folder = self.input_folder | |
| file_extension = self.file_extension | |
| project_folder = os.path.basename(os.path.normpath(input_folder)) | |
| graph_folder = constants.GRAPH_FOLDER + project_folder+"/" | |
| extracted_concepts = [] | |
| output_result = [] | |
| parameters_graph = [] | |
| reduced_content = [] | |
| patent_corpus = [] | |
| source_list = [] | |
| parameters_list =[] | |
| technologies_graph =[] | |
| for patent_file in patents: | |
| read_patent = StringIO(patent_file) | |
| patent = json.load(read_patent) | |
| nNumber = patent['number'] | |
| aAbstract = patent['abstract'] | |
| cClaims = patent['claims'] | |
| dDescription = patent['description'] | |
| source = patent['source'] | |
| if dDescription !="": | |
| count_description +=1 | |
| extract_concepts = InformationExtractor(dDescription,input_folder, file_extension, nNumber, source ) | |
| output_json, total_sentences_number = extract_concepts.get_from_description() | |
| if output_json !="": | |
| extracted_concepts.append(output_json) | |
| total_sentences_number += total_sentences_number | |
| elif cClaims !="": | |
| count_claims +=1 | |
| print('Processing claims') | |
| else: | |
| count_abstract +=1 | |
| print("processing abstract") | |
| count_patent +=1 | |
| #print(source) | |
| source_list.append(source) | |
| header = '{' | |
| graph = '"problem_graph": [%s]' % ','.join(extracted_concepts) | |
| footer = '}' | |
| output_result.extend((header, graph, footer)) | |
| output_result = "".join(output_result) | |
| concepts_json = json.loads(output_result) | |
| count_concepts = len(concepts_json['problem_graph']) | |
| for item, value in concepts_json.items(): | |
| #if cle == "type" and value =="partialSolution": | |
| # print ("yes") | |
| for element in value: | |
| for cle, valeur in element.items(): | |
| for k,v in valeur.items(): | |
| if k == "type" and v =="partialSolution": | |
| count_concepts_solupart += 1 | |
| elif k == "type" and v =="problem": | |
| count_concepts_problem += 1 | |
| json_write_to_file = json.dumps(concepts_json, sort_keys=False, indent=4, separators=(',', ': ')) | |
| #print(concepts_json.keys()) | |
| with open(graph_folder+"graph.json", 'w') as json_graph: | |
| json_graph.write(json_write_to_file) | |
| print("Le corpus contenait %s brevets dont %s abstract, %s revendications et %s descriptions" % (count_patent, count_abstract, count_claims, count_description)) | |
| print("%s phrases ont été analysée(s)" % (total_sentences_number)) | |
| print("%s concepts ont été trouvé(s) dont %s problèmes et %s solutions partielles" % (count_concepts, count_concepts_problem, count_concepts_solupart)) | |
| #Display graphics | |
| first_color = (46, 204, 113) | |
| second_color = (245, 176, 65) | |
| #self.make_graphic([count_concepts_problem, count_concepts_solupart], "Ratio",[first_color,second_color],['Problems','Partial Solutions']) | |
| return concepts_json |