Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import re | |
| import json | |
| import numpy as np | |
| import nltk | |
| import stanza | |
| from stanza.models.constituency.parse_tree import Tree | |
| from transformers import AutoTokenizer, AutoModelForTokenClassification, TokenClassificationPipeline | |
| from sentence_transformers import CrossEncoder | |
| from autocorrect import Speller | |
| from transformers import BertTokenizer, BertForSequenceClassification | |
| import torch | |
| from torch.nn.utils.rnn import pad_sequence | |
| from openai import OpenAI | |
| from tenacity import ( | |
| retry, | |
| stop_after_attempt, | |
| wait_random_exponential, | |
| ) # for exponential backoff | |
| import os | |
| # ***************************** Load needed models ***************************** | |
| nlp = stanza.Pipeline(lang='en', processors='tokenize,pos,constituency') | |
| pos_tokenizer = AutoTokenizer.from_pretrained("QCRI/bert-base-multilingual-cased-pos-english") | |
| pos_model = AutoModelForTokenClassification.from_pretrained("QCRI/bert-base-multilingual-cased-pos-english") | |
| #sentences_similarity_model = CrossEncoder('cross-encoder/stsb-roberta-base') | |
| sentences_similarity_model = CrossEncoder('WillHeld/roberta-base-stsb') | |
| nli_model = BertForSequenceClassification.from_pretrained("nouf-sst/bert-base-MultiNLI", use_auth_token="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") | |
| nli_tokenizer = BertTokenizer.from_pretrained("nouf-sst/bert-base-MultiNLI", use_auth_token="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", do_lower_case=True) | |
| # ***************************** GPT API ***************************** | |
| client = OpenAI( | |
| api_key=os.getenv("OpenAI"), | |
| ) | |
| def completion_with_backoff(**kwargs): | |
| return client.chat.completions.create(**kwargs) | |
| def prompt(prompt_message, bad_smell): | |
| message = [ | |
| { | |
| "role": "system", | |
| "content": prompt_message | |
| }, | |
| { | |
| "role": "user", | |
| "content": bad_smell | |
| } | |
| ] | |
| completion = completion_with_backoff( | |
| model="gpt-3.5-turbo", | |
| messages=message, | |
| temperature= 0.2, | |
| ) | |
| return completion.choices[0].message.content | |
| # ***************************** TGRL Parsing ***************************** | |
| def parse_tgrl(file_obj): | |
| with open(file_obj.name, 'r') as f: | |
| tgrl_text = f.read() | |
| tgrl_text = tgrl_text.replace('\t', '') | |
| tgrl_text = tgrl_text.replace('\n', '') | |
| return tgrl_text | |
| def extract_elements(tgrl_text): | |
| # Extract actors | |
| actors = re.findall("(?:.*?actor\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
| # Extract goals | |
| goals = re.findall("(?:.*?goal\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
| # Extract softGoals | |
| softGoals = re.findall("(?:.*?softGoal\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
| # Extract tasks | |
| tasks = re.findall("(?:.*?task\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
| # Extract resources | |
| resources = re.findall("(?:.*?resource\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
| elements = { | |
| "actors": actors, | |
| "goals": goals, | |
| "softGoals": softGoals, | |
| "tasks": tasks, | |
| "resources": resources | |
| } | |
| # get elements per actor | |
| elements_per_actor = {} | |
| for goal in goals: | |
| corresponding_actor = tgrl_text.rfind('actor', 0, tgrl_text.index(goal)) | |
| corresponding_actor = re.split(' |{', tgrl_text[corresponding_actor:])[1] | |
| if corresponding_actor not in elements_per_actor: | |
| elements_per_actor[corresponding_actor] = [] | |
| elements_per_actor[corresponding_actor].append(goal) | |
| for softGoal in softGoals: | |
| corresponding_actor = tgrl_text.rfind('actor', 0, tgrl_text.index(softGoal)) | |
| corresponding_actor = re.split(' |{', tgrl_text[corresponding_actor:])[1] | |
| if corresponding_actor not in elements_per_actor: | |
| elements_per_actor[corresponding_actor] = [] | |
| elements_per_actor[corresponding_actor].append(softGoal) | |
| for task in tasks: | |
| corresponding_actor = tgrl_text.rfind('actor', 0, tgrl_text.index(task)) | |
| corresponding_actor = re.split(' |{', tgrl_text[corresponding_actor:])[1] | |
| if corresponding_actor not in elements_per_actor: | |
| elements_per_actor[corresponding_actor] = [] | |
| elements_per_actor[corresponding_actor].append(task) | |
| # get decomposed elements | |
| new_tgrl_text = tgrl_text | |
| decomposed_elements = {} | |
| main_elements_1 = re.findall("\w+(?=\s+decomposedBy)", new_tgrl_text) | |
| for main_element in main_elements_1: | |
| sub_element_1 = (re.findall(main_element+"\s*(?: decomposedBy )([A-Za-z\s]*)", new_tgrl_text)[0]) | |
| sub_element_1 = sub_element_1.replace(" ", "") | |
| sub_element_2 = (re.findall(main_element+"\s*(?: decomposedBy )"+ sub_element_1 +",\s*([A-Za-z\s]*)", new_tgrl_text)[0]) | |
| new_tgrl_text = new_tgrl_text.replace(main_element+" decomposedBy "+sub_element_1+", "+sub_element_2+";", '') | |
| decomposed_elements[main_element] = [sub_element_1, sub_element_2] | |
| # Replace elements IDs with names | |
| new_decomposed_elements = {} | |
| for key, _ in decomposed_elements.items(): | |
| new_key = re.findall("(?:"+key+"\s*{\s*name\s=\s\")([A-Za-z\s]*)", tgrl_text)[0] | |
| new_values = [] | |
| for element in decomposed_elements[key]: | |
| new_value = re.findall("(?:"+element+"\s*{\s*name\s=\s\")([A-Za-z\s;.,!?:-]*)", tgrl_text)[0] | |
| new_values.append(new_value) | |
| new_decomposed_elements[new_key] = new_values | |
| return elements, elements_per_actor, new_decomposed_elements | |
| # ************************************************************************ | |
| # ************************* Bad Smells Detection ************************* | |
| # ########### Long Elements ########### | |
| def get_long_elements(elements, size_threshold): # Using RegEx | |
| long_elements = [] | |
| for key, value in elements.items(): | |
| for i in range(0, len(elements[key])): | |
| if len(re. findall(r'\w+', elements[key][i])) > size_threshold: | |
| long_elements.append(elements[key][i]) | |
| if long_elements: | |
| output = "" | |
| for long_element in long_elements: | |
| refactored_element = prompt( | |
| '''You are a specialist in English linguistics. | |
| You will be provided with a sentence, and your task is to summarize it in''' + str(size_threshold) + ''' words or fewer. | |
| Comply with the following conditions: | |
| (1) Do not convert a verb phrase to a noun phrase, and vice versa. | |
| (2) Change as few words as possible. | |
| Answer with the new sentence only.''', | |
| long_element) | |
| output = output + '"' + long_element + '" should be refactored to "' + refactored_element + '"\n' | |
| #long_elements = "\n".join(long_elements) | |
| return "Lengthy elements:\n" + output | |
| else: | |
| return "" | |
| # ##################################### | |
| # ######### Complex Sentences ######### | |
| def is_complex_sentence(sentence): | |
| nlp = stanza.Pipeline(lang='en', processors='tokenize,pos,constituency') | |
| doc = nlp(sentence) | |
| for sentence in doc.sentences: | |
| unique_constituent_labels = Tree.get_unique_constituent_labels(sentence.constituency) | |
| if 'SBAR' in unique_constituent_labels: | |
| return True | |
| else: | |
| return False | |
| def get_complex_sentences(elements): | |
| complex_sentences = [] | |
| for key, value in elements.items(): | |
| for i in range(0, len(elements[key])): | |
| if is_complex_sentence(elements[key][i]): | |
| complex_sentences.append(elements[key][i]) | |
| if complex_sentences: | |
| output = "" | |
| for complex_sentence in complex_sentences: | |
| refactored_element = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| A complex sentence is a sentence with one independent clause and at least one dependent clause. A simple sentence has a single independent clause. | |
| You will be provided with a complex sentence, and your task is to make it a simple sentence. | |
| Do not convert a verb phrase to a noun phrase, and vice versa. | |
| Answer with the new sentence only. | |
| ''', complex_sentence) | |
| output = output + '"' + complex_sentence + '" should be refactored to "' + refactored_element + '"\n' | |
| return "Complex elements:\n" + output | |
| else: | |
| return "" | |
| # ##################################### | |
| # ########## Punctuations ######### | |
| def get_punctuations(elements): | |
| punctuations = [] | |
| for key, value in elements.items(): | |
| for i in range(0, len(elements[key])): | |
| if len(re.findall("[^\s\w\d-]", elements[key][i])) > 0: | |
| punctuations.append(elements[key][i]) | |
| if punctuations: | |
| output = "" | |
| for punctuation in punctuations: | |
| refactored_element = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| You will be provided with a sentence, and your task is to remove all punctuation marks. | |
| Answer with the new sentence only.''', punctuation) | |
| output = output + '"' + punctuation + '" should be refactored to "' + refactored_element + '"\n' | |
| #punctuations = "\n".join(punctuations) | |
| return "Punctuation-marked elements:\n" + output | |
| else: | |
| return "" | |
| # ################################# | |
| # ########## Incorrect Actor Syntax ########## | |
| def check_verb_or_noun_phrase(sentence): | |
| result = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| You will be provided with a sentence, and your task is to determine whether the sentence is a noun phrase or a verb phrase. | |
| Answer with "noun phrase" or "verb phrase" and your reasons. | |
| Use JSON format with keys "answer" and "reason".''', sentence) | |
| result = json.loads(result) | |
| return result["answer"] | |
| # def find_non_NPs(sentences): | |
| # pipeline = TokenClassificationPipeline(model=pos_model, tokenizer=pos_tokenizer) | |
| # outputs = pipeline(sentences) | |
| # Non_NPs = [] | |
| # for idx, output in enumerate(outputs): | |
| # if output[0]['entity'].startswith('V'): | |
| # Non_NPs.append(sentences[idx]) | |
| # return Non_NPs | |
| def check_actor_syntax(actors): | |
| incorrect_actors_syntax = [] | |
| for actor in actors: | |
| result = check_verb_or_noun_phrase(actor) | |
| if result == "verb phrase": | |
| incorrect_actors_syntax.append(actor) | |
| if incorrect_actors_syntax: | |
| output = "" | |
| for incorrect_actor_syntax in incorrect_actors_syntax: | |
| refactored_element = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| You will be provided with a sentence that is a verb phrase, and your task is to make it a noun pharse representing an actor. | |
| A noun phrase should start with a noun. | |
| Example of actors: System, PC User, and Privacy Officer. | |
| Answer with the new sentence only.''', incorrect_actor_syntax) | |
| output = output + '"' + incorrect_actor_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
| #incorrect_actor_syntax = "\n".join(incorrect_actor_syntax) | |
| return "Incorrect actors syntax:\n" + output | |
| else: | |
| return "" | |
| # ############################################ | |
| # ########## Incorrect Goal Syntax ########### | |
| def check_goal_syntax(goals): | |
| incorrect_goals_syntax = [] | |
| for goal in goals: | |
| result = check_verb_or_noun_phrase(goal) | |
| if result == "verb phrase": | |
| incorrect_goals_syntax.append(goal) | |
| if incorrect_goals_syntax: | |
| output = "" | |
| for incorrect_goal_syntax in incorrect_goals_syntax: | |
| refactored_element = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| You will be provided with a sentence that is not a noun phrase, and your task is to make it a noun pharse representing a goal. | |
| A noun phrase should start with a noun. | |
| For example: high data quality, fast response time, and course registration. | |
| Answer with the new sentence only.''', incorrect_goal_syntax) | |
| output = output + '"' + incorrect_goal_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
| #incorrect_goal_syntax = "\n".join(incorrect_goal_syntax) | |
| return "Incorrect goals syntax:\n" + output | |
| else: | |
| return "" | |
| # ############################################ | |
| # ########## Incorrect Softgoal Syntax ########### | |
| def check_softgoal_syntax(softgoals): | |
| incorrect_softgoals_syntax = [] | |
| for softgoal in softgoals: | |
| result = check_verb_or_noun_phrase(softgoal) | |
| if result == "verb phrase": | |
| incorrect_softgoals_syntax.append(softgoal) | |
| if incorrect_softgoals_syntax: | |
| output = "" | |
| for incorrect_softgoal_syntax in incorrect_softgoals_syntax: | |
| refactored_element = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| You will be provided with a sentence that is not a noun phrase, and your task is to make it a noun pharse representing a goal. | |
| A noun phrase should start with a noun. | |
| For example: high data quality, fast response time, and course registration. | |
| Answer with the new sentence only.''', incorrect_softgoal_syntax) | |
| output = output + '"' + incorrect_softgoal_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
| #incorrect_softgoal_syntax = "\n".join(incorrect_softgoal_syntax) | |
| return "Incorrect softgoals syntax:\n" + output | |
| else: | |
| return "" | |
| # ############################################ | |
| # ########## Incorrect Task Syntax ########### | |
| # def find_NPs(sentences): | |
| # pipeline = TokenClassificationPipeline(model=pos_model, tokenizer=pos_tokenizer) | |
| # outputs = pipeline(sentences) | |
| # NPs = [] | |
| # for idx, output in enumerate(outputs): | |
| # if not output[0]['entity'].startswith('V'): | |
| # NPs.append(sentences[idx]) | |
| # return NPs | |
| def check_task_syntax(tasks): | |
| incorrect_tasks_syntax = [] | |
| for task in tasks: | |
| result = check_verb_or_noun_phrase(task) | |
| if result == "noun phrase": | |
| incorrect_tasks_syntax.append(task) | |
| if incorrect_tasks_syntax: | |
| output = "" | |
| for incorrect_task_syntax in incorrect_tasks_syntax: | |
| refactored_element = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| You will be provided with a sentence that is not a verb phrase, and your task is to make it a verb pharse representing a task. | |
| A verb phrase should start with a verb. | |
| For example: provide maintenance services, help co-workers, and enhance quality. | |
| Answer with the new sentence only.''', incorrect_task_syntax) | |
| output = output + '"' + incorrect_task_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
| #incorrect_task_syntax = "\n".join(incorrect_task_syntax) | |
| return "Incorrect tasks syntax:\n" + output | |
| else: | |
| return "" | |
| # ############################################ | |
| # ########## Incorrect Resource Syntax ########### | |
| def check_resource_syntax(resources): | |
| if len(resources) == 0: | |
| return "" | |
| #incorrect_resources_syntax = find_non_NPs(resources) | |
| incorrect_resources_syntax = [] | |
| for resource in resources: | |
| result = check_verb_or_noun_phrase(resource) | |
| if result == "verb phrase": | |
| incorrect_resources_syntax.append(resource) | |
| if incorrect_resources_syntax: | |
| output = "" | |
| for incorrect_resource_syntax in incorrect_resources_syntax: | |
| refactored_element = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| You will be provided with a sentence that is not a noun phrase, and your task is to make it a noun pharse representing a resource. | |
| A noun phrase should start with a noun. | |
| For example: internet, database, and files system. | |
| Answer with the new sentence only.''', incorrect_resource_syntax) | |
| output = output + '"' + incorrect_resource_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
| #incorrect_resource_syntax = "\n".join(incorrect_resource_syntax) | |
| return "Incorrect resources syntax:\n" + output | |
| else: | |
| return "" | |
| # ############################################ | |
| # ########## Similarity ########### | |
| def get_similar_elements(elements_per_actor, similarity_threshold): | |
| # Prepare sentence pair array | |
| sentence_pairs = [] | |
| for key, value in elements_per_actor.items(): | |
| for i in range(len(elements_per_actor[key])): | |
| for j in range(i+1,len(elements_per_actor[key])): | |
| sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]]) | |
| # Predict semantic similarity | |
| semantic_similarity_scores = sentences_similarity_model.predict(sentence_pairs, show_progress_bar=True) | |
| similar_elements = [] | |
| for index, value in enumerate(sentence_pairs): | |
| if semantic_similarity_scores[index] > similarity_threshold: | |
| similar_elements.append(value) | |
| #similar_elements.append('"'+value+'"') | |
| #semantic_similarity["pair_"+str(index+1)] = [value,semantic_similarity_scores[index]] | |
| if similar_elements: | |
| result_string = "" | |
| for sublist in similar_elements: | |
| result_string += ' and '.join(f'"{item}"' for item in sublist) + '\n' | |
| #similar_elements = [' and '.join('"' + ele + '"') for ele in similar_elements] | |
| #similar_elements = "\n".join(similar_elements) | |
| return "Similar elements:\n" + result_string | |
| else: | |
| return "" | |
| return semantic_similarity | |
| # ################################# | |
| # ########## Misspelling ########### | |
| # def get_misspelled_words(sentence): | |
| # spell = Speller(only_replacements=True) | |
| # misspelled= [] | |
| # for word in sentence.split(): | |
| # correct_word = spell(word) | |
| # if word != correct_word: | |
| # misspelled.append([word, correct_word]) | |
| # return misspelled | |
| def check_spelling(elements): | |
| refactored_elements = [] | |
| for key, value in elements.items(): | |
| for i in range(0, len(elements[key])): | |
| refactored_element = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| You will be provided with a sentence and your task is to report any misspilled words and correct the spilling if needed. | |
| Answer with "correct" or "misspilled". In case the sentence is misspilled, correct it with the right spelling. | |
| Use a JSON format with keys 'original sentence', 'answer', and 'correct sentence'. | |
| For example: {'original sentence': 'incraese value', 'answer': 'misspilled', 'correct sentence': 'increase value'}''', elements[key][i]) | |
| refactored_element = refactored_element.replace("'", '"') | |
| refactored_element = json.loads(refactored_element) | |
| if refactored_element['answer'] == 'misspilled': | |
| refactored_elements.append('"' + refactored_element["original sentence"] + '" should be written as "' + refactored_element["correct sentence"] + '"') | |
| if refactored_elements: | |
| refactored_elements = "\n".join(refactored_elements) | |
| return "Misspilled elements:\n" + refactored_elements | |
| else: | |
| return "" | |
| # ################################## | |
| # ########## NLI ########### | |
| def do_nli(premise, hypothesis): | |
| # Tokenization | |
| token_ids = [] | |
| seg_ids = [] | |
| mask_ids = [] | |
| premise_id = nli_tokenizer.encode(premise, add_special_tokens = False) | |
| hypothesis_id = nli_tokenizer.encode(hypothesis, add_special_tokens = False) | |
| pair_token_ids = [nli_tokenizer.cls_token_id] + premise_id + [nli_tokenizer.sep_token_id] + hypothesis_id + [nli_tokenizer.sep_token_id] | |
| premise_len = len(premise_id) | |
| hypothesis_len = len(hypothesis_id) | |
| segment_ids = torch.tensor([0] * (premise_len + 2) + [1] * (hypothesis_len + 1)) # sentence 0 and sentence 1 | |
| attention_mask_ids = torch.tensor([1] * (premise_len + hypothesis_len + 3)) # mask padded values | |
| token_ids.append(torch.tensor(pair_token_ids)) | |
| seg_ids.append(segment_ids) | |
| mask_ids.append(attention_mask_ids) | |
| # Forward pass | |
| token_ids = pad_sequence(token_ids, batch_first=True) | |
| mask_ids = pad_sequence(mask_ids, batch_first=True) | |
| seg_ids = pad_sequence(seg_ids, batch_first=True) | |
| with torch.no_grad(): | |
| output = nli_model(token_ids, | |
| token_type_ids=seg_ids, | |
| attention_mask=mask_ids) | |
| # Output predication | |
| result = "" | |
| prediction = np.argmax(output.logits.cpu().numpy()).flatten().item() | |
| if prediction == 0: | |
| result = "Entailment" | |
| #print("Entailment") | |
| elif prediction == 1: | |
| result = "Contradiction" | |
| #print("Contradiction") | |
| elif prediction == 2: | |
| result = "Neutral" | |
| #print("Neutral") | |
| return result | |
| # Entailment | |
| def check_entailment(decomposed_elements): | |
| sentence_pairs = [] | |
| non_matching_elements = [] | |
| for key, value in decomposed_elements.items(): | |
| #print(key, value) | |
| for i in decomposed_elements[key]: | |
| #print(key, i) | |
| sentence_pairs.append([key, i]) | |
| for sentence_pair in sentence_pairs: | |
| result = do_nli(sentence_pair[0], sentence_pair[1]) | |
| print(result) | |
| if result != "Entailment": | |
| non_matching_elements.append(sentence_pair) | |
| if non_matching_elements: | |
| non_matching_elements = [' and '.join(ele) for ele in non_matching_elements] | |
| non_matching_elements = "\n".join(non_matching_elements) | |
| return "The following elements are miss matching:\n" + non_matching_elements | |
| else: | |
| return "There are no miss matched elements." | |
| return result | |
| # Contradiction | |
| def check_for_linguistic_conflict(pairs): | |
| pairs = ",".join(str(element) for element in pairs) | |
| contradicting_pairs = [] | |
| result = prompt( | |
| ''' | |
| You are a specialist in English linguistics. | |
| You will be provided with a list of sentencses pair, and your task is to determine whether each pair can be conflicting or not. | |
| For example: "Inrease quality of service" AND "Cut expenses" are conflicting because increasing quality usually requires spending money. | |
| For each pair, answer with "yes" or "no" with your reason in short. | |
| Use a list of dictionaries format with keys "pair" and "answer". Omit "reason" from your response.''', pairs) | |
| result = result.replace("'", '"') | |
| results = json.loads(result) | |
| for result in results: | |
| if result["answer"] == "yes": | |
| contradicting_pairs.append(result["pair"]) | |
| return contradicting_pairs | |
| def find_paths_between_elements(elements, start_element, end_element, visited, path=[]): | |
| visited[start_element] = True | |
| path.append(start_element) | |
| if start_element == end_element: | |
| yield list(path) | |
| else: | |
| for contrib in elements: | |
| if contrib[1] in visited: ## added | |
| if contrib[0] == start_element and not visited[contrib[1]]: | |
| yield from find_paths_between_elements(elements, contrib[1], end_element, visited, path) | |
| path.pop() | |
| visited[start_element] = False | |
| def check_contradiction(elements_per_actor, contributing_elements): | |
| pairs_to_check_1 = [] | |
| pairs_to_check_2 = [] | |
| pairs_to_check_3 = [] | |
| all_values_contributing_elements = [] | |
| for values_list in contributing_elements.values(): | |
| all_values_contributing_elements.extend(values_list) | |
| sentence_pairs = [] | |
| contradicting_elements = [] | |
| # case 1: contradicting elements contributing similarly to other elements | |
| for key, value in elements_per_actor.items(): | |
| for i in range(len(elements_per_actor[key])): | |
| for j in range(i+1,len(elements_per_actor[key])): | |
| sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]]) | |
| for sentence_pair in sentence_pairs: | |
| contribution_scores = [] | |
| for contributing_element in all_values_contributing_elements: | |
| if contributing_element[0] == sentence_pair[0] or contributing_element[0] == sentence_pair[1]: | |
| if contributing_element[2] == "make": | |
| contribution_score = 75 | |
| elif contributing_element[2] == "help": | |
| contribution_score = 50 | |
| elif contributing_element[2] == "somePositive": | |
| contribution_score = 25 | |
| elif contributing_element[2] == "unknown": | |
| contribution_score = 0 | |
| elif contributing_element[2] == "someNegative": | |
| contribution_score = -25 | |
| elif contributing_element[2] == "break": | |
| contribution_score = -50 | |
| elif contributing_element[2] == "hurt": | |
| contribution_score = -75 | |
| else: | |
| contribution_score = int(contributing_element[2]) | |
| contribution_scores.append((contributing_element[0], contribution_score)) | |
| if len(contribution_scores) < 2: | |
| pairs_to_check_1.append([sentence_pair[0].replace("'", ""), sentence_pair[1].replace("'", "")]) | |
| else: | |
| flag = 0 | |
| for pair in itertools.combinations(contribution_scores, r=2): | |
| if pair[0][0] != pair[1][0]: | |
| if pair[0][1] * pair[1][1] < 0: | |
| flag = 1 | |
| if flag == 0: | |
| pairs_to_check_2.append([sentence_pair[0].replace("'", ""), sentence_pair[1].replace("'", "")]) | |
| # case 2: contradicting elements contributing similarly to each other, taking into considration the full path between the two elements | |
| for key, value in elements_per_actor.items(): | |
| for element1 in value: | |
| for element2 in value: | |
| if element1 != element2: | |
| visited = {e: False for e in value} | |
| for path in find_paths_between_elements(all_values_contributing_elements, element1, element2, visited): | |
| first_edge_value = next((contrib[2] for contrib in all_values_contributing_elements if contrib[0] == path[0] and contrib[1] == path[1]), None) | |
| last_edge_value = next((contrib[2] for contrib in all_values_contributing_elements if contrib[0] == path[-2] and contrib[1] == path[-1]), None) | |
| if first_edge_value is not None and last_edge_value is not None and int(first_edge_value) * int(last_edge_value) > 0: | |
| pairs_to_check_3.append([sentence_pair[0].replace("'", ""), sentence_pair[1].replace("'", "")]) | |
| pairs_to_check = pairs_to_check_1 + pairs_to_check_2 + pairs_to_check_3 | |
| # Initialize an empty list to store the divided lists | |
| divided_lists = [] | |
| # Iterate over the long list and create sublists of 30 items each | |
| for i in range(0, len(pairs_to_check), 30): | |
| sublist = pairs_to_check[i:i + 30] | |
| divided_lists.append(sublist) | |
| for divided_list in divided_lists: | |
| contradicting_elements = contradicting_elements + check_for_linguistic_conflict(divided_list) | |
| if contradicting_elements: | |
| # Using a set to store unique sublists | |
| contradicting_elements = set(tuple(sublist) for sublist in contradicting_elements) | |
| # Converting back to a list of lists | |
| contradicting_elements = [list(sublist) for sublist in contradicting_elements] | |
| contradicting_elements = [' and '.join(ele) for ele in contradicting_elements] | |
| contradicting_elements = "\n".join(contradicting_elements) | |
| return "Conflicting elements:\n" + contradicting_elements | |
| else: | |
| return "" | |
| # ########################## | |
| # ************************* User Interface ************************* | |
| def detect_bad_smells(tgrl_file, selected_bad_smells, size_threshold, similarity_threshold): | |
| output = "" | |
| tgrl_text = parse_tgrl(tgrl_file) | |
| all_elements, elements_per_actor, decomposed_elements, contributing_elements = extract_elements(tgrl_text) | |
| if 'Lengthy element' in selected_bad_smells: | |
| print(output) | |
| result = get_long_elements(all_elements, size_threshold) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Complex element' in selected_bad_smells: | |
| result = get_complex_sentences(all_elements) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Punctuation-marked element' in selected_bad_smells: | |
| result = get_punctuations(all_elements) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Incorrect actor syntax' in selected_bad_smells: | |
| result = check_actor_syntax(all_elements['actors']) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Incorrect goal syntax' in selected_bad_smells: | |
| result = check_goal_syntax(all_elements['goals']) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Incorrect softgoal syntax' in selected_bad_smells: | |
| result = check_softgoal_syntax(all_elements['softGoals']) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Incorrect task syntax' in selected_bad_smells: | |
| result = check_task_syntax(all_elements['tasks']) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Incorrect resource syntax' in selected_bad_smells: | |
| result = check_resource_syntax(all_elements['resources']) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Similar elements' in selected_bad_smells: | |
| result = get_similar_elements(elements_per_actor, similarity_threshold) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Misspelled element' in selected_bad_smells: | |
| result = check_spelling(all_elements) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Goal/Task and Sub-goal/Sub-task mismatch' in selected_bad_smells: | |
| result = check_entailment(decomposed_elements) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| if 'Conflicting elements' in selected_bad_smells: | |
| result = check_contradiction(elements_per_actor, contributing_elements) | |
| if result != "": | |
| output = output + result + "\n\n" | |
| return output | |
| interface = gr.Interface(fn = detect_bad_smells, | |
| inputs = [gr.File(label="TGRL File"), | |
| gr.CheckboxGroup(["Lengthy element", "Complex element", "Punctuation-marked element", "Incorrect actor syntax", "Incorrect goal syntax", "Incorrect softgoal syntax", "Incorrect task syntax", "Incorrect resource syntax", "Similar elements", "Misspelled element", "Goal/Task and Sub-goal/Sub-task mismatch", "Conflicting elements"], | |
| label="Which bad smells you want to detect and refactor?"), | |
| gr.Slider(label= "Length threshold", value = 5, minimum = 2, maximum = 10, step = 1), | |
| gr.Slider(label= "Similarity threshold", value = 0.9, minimum = 0, maximum = 1, step = 0.1)], | |
| outputs = [gr.Textbox(label= "Detected and refactored bad smells:")], | |
| title = "TGRL Bad Smells Detection and Refactoring", | |
| description = "Upload your .xgrl file and we will find the bad smells and refactor them for you!", | |
| theme = gr.themes.Soft()) | |
| interface.launch(inline = False) |