import re import math from pathlib import Path from typing import List, Dict from translate import Translator from config import * from prompt import * from llm import OpenAI3, OpenAI4 import random # question_type_to_material_type = {'WordCompleteChoice': 'word', # 'WordCompleteBlank': 'word', # 'WordRightSpellingChoice': 'word', # 'PhraseTranslatingMatchChoice': 'phrase', # 'SentenceRecompositionBlank': 'sentence', # 'SentenceCompleteChoice': 'sentence', # 'GrammarChoice': 'grammar' # } question_type_to_class_type = {'请将选择正确的拼写补全单词': 'WordCompleteChoice' , '根据汉语意思补全单词': 'WordCompleteBlank', '根据汉语意思选出拼写正确的单词': 'WordRightSpellingChoice', '请根据汉语意思选择正确的短语': 'PhraseTranslatingMatchChoice', '请将下列单词组成完整的一句话': 'SentenceRecompositionBlank', '请选择正确的短语补全句子': 'SentenceCompleteChoice', '请根据语法知识选择正确的选项补全句子': 'GrammarChoice' } def trans_en_to_cn(word: str) -> str: if word in ['crayon', 'Crayon']: chinese = '蜡笔' else: chinese = Translator(from_lang='English', to_lang='Chinese').translate(word) return chinese def delete_index(string: str) -> str: if len(string) > 1: # incase of the sting is a single letter when generate word complete question if string[1] in ['.', ':', '、', ':', ')', ')']: string = string[2:] return string def normalize_options_and_answer(answer, options): if len(options) != 1: random.shuffle(options) options = [delete_index(option) for option in options] answer = delete_index(answer) answer_index = options.index(answer) answer = number_letter_dict[answer_index] + ':' + answer options = [number_letter_dict[i] + ':' + options[i] for i in range(len(options))] options = ','.join(options) else: options = options[0] return answer, options def response_to_question(response): try: try: _, stem, options, answer = re.split( r'stem: |stem:|Stem: |Stem:' r'|options: |options:|Options: |Options:' r'|Correct answer: |Correct answer:|correct answer: |correct answer:' r'|answer: |answer:|Answer: |Answer:' , response) except: try: _, stem, options = re.split( r'stem: |stem:|Stem: |Stem:' r'|options: |options:|Options: |Options:' , response) answer = None except: _, question, answer = re.split( r'question: |question:|Question: |Question:' r'|Correct answer: |Correct answer:|correct answer: |correct answer:' r'|answer: |answer:|Answer: |Answer:' , response) stem = None options = None while question[-1] == '\n': question = question[:-1] if stem: while stem[-1] == '\n': stem = stem[:-1] if options: options = options.split('\n') options = [option for option in options if option != ''] if answer is None: answer = 'None' for i in range(len(options)): if 'correct answer' in options[i]: answer = options[i] answer = re.split(r'\(correct answer\)|correct answer', answer) answer = [i for i in answer if i is not None][0] options[i] = answer answer = answer.split('\n')[0] answer, options = normalize_options_and_answer(answer, options) question = stem + '\n' + options except Exception as e: print(f'fail!,reason:{e},response:{response}') question = 'fail!' answer = 'fail!' return question, answer def response_to_options(response): options = response.split('\n') options = [delete_index(option) for option in options] return options def _materials_select(materials: List[str], question_num: int, option_num: int = 1): """ :param materials: :param question_num: :param option_num: :return: """ material = materials * math.ceil(option_num/len(materials)) select_materials = [random.sample(material, option_num) for i in range(question_num)] if option_num == 1: select_materials = [i[0] for i in select_materials] # random.shuffle(material_index) # if len(material_index) >= question_num: # material_index = material_index[:question_num] # else: # times = int(question_num / len(materials)) # remain = question_num - len(materials) * times # material_index = material_index * times + material_index[:remain] # select_materials = [materials[i] for i in material_index] return select_materials def _generate(prompt, model='openai_3'): model = eval(model_name[model])() response = model(prompt) return response def _generate_fake_word(word): word_vowels = [] word_vowel = {} for vowels in vowels_list: if vowels in word: word_vowels.append(vowels) for i in range(len(word)): if word[i] in vowel_list: word_vowel[i] = word[i] if word_vowels: true_letters = word_vowels[random.randint(0, len(word_vowels) - 1)] fake_letters = [i for i in vowels_list if (len(i) == len(true_letters)) & (i != true_letters)] fake_letters = random.choice(fake_letters) word = word.split(true_letters) index = random.randint(0, len(word) - 2) for i in range(len(word) - 1): if i != index: word[i] = word[i] + true_letters else: word[i] = word[i] + fake_letters fake_word = ''.join(word) elif word_vowel: index = random.choice(list(word_vowel.keys())) true_letter = word[index] vowel_list_copy = vowel_list.copy() vowel_list_copy.pop(vowel_list_copy.index(true_letter)) fake_letters = random.choice(vowel_list_copy) fake_word = word[:index] + fake_letters + word[index+1:] else: index = random.choice(range(len(word))) fake_word = word[:index] + word[index] + word[:index] return fake_word class WordCompleteChoice(object): title = '请将选择正确的拼写补全单词' @staticmethod def generate(words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]: responses = [] words = _materials_select(words, question_num) for word in words: word_vowels = [] for vowels in vowels_list: if vowels in word: word_vowels.append(vowels) if word_vowels: answer = word_vowels[random.randint(0, len(word_vowels) - 1)] stem = '' _stem = word.split(answer) index = random.randint(0, len(_stem) - 2) for i in range(len(_stem) - 1): if i != index: stem = stem + _stem[i] + answer else: stem = stem + _stem[i] + '_' * len(answer) stem = stem + _stem[-1] options = [i for i in vowels_list if (len(i) == len(answer)) & (i != answer)] if len(options) > option_num - 1: random.shuffle(options) options = options[:option_num - 1] options.append(answer) else: index = list(range(len(word))) num = random.randint(2, 3) num = len(word) - 3 if num > len(word) - 3 else num random.shuffle(index) ans_index = index[:num] ans_index.sort() answer = [word[i] + ',' for i in ans_index] answer = ''.join(answer) answer = answer[:-1] stem = ['_' if i in ans_index else word[i] for i in range(len(word))] stem = ''.join(stem) options = [answer] count = 1 while count < option_num: random.shuffle(index) option_index = index[:num] option = [word[i] + ',' for i in option_index] option = ''.join(option) option = option[:-1] if option not in options: options.append(option) count += 1 answer, options = normalize_options_and_answer(answer, options) question = stem + '\n' + options responses.append({'question': question, 'answer': answer}) return responses class WordCompleteBlank(object): title = '根据汉语意思补全单词' @classmethod def generate(cls, words: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]: responses = [] _words = _materials_select(words, question_num, option_num) for words in _words: answer_list = [] stem_list = [] for word in words: word_vowels = [] for vowel in vowels_list: if vowel in word: word_vowels.append(vowel) chinese = trans_en_to_cn(word) if word_vowels: letters = word_vowels[random.randint(0, len(word_vowels) - 1)] stem = chinese + ':' _word = word.split(letters) index = random.randint(0, len(_word) - 2) for i in range(len(_word) - 1): if i != index: stem = stem + _word[i] + letters else: stem = stem + _word[i] + '_' * len(letters) stem = stem + _word[-1] else: num = random.randint(2, 3) num = len(word) - 2 if num > len(word) - 2 else num index = random.randint(0, (len(word)-num+1)) stem = chinese + ':' + word[:index] + '_' * num + word[index+num:] stem_list.append(stem) answer = chinese + ':' + word answer_list.append(answer) responses.append({'question': ' '.join(stem_list), 'answer': ' '.join(answer_list)}) return responses class WordRightSpellingChoice(object): title = '根据汉语意思选出拼写正确的单词' @classmethod def generate(cls, words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]: responses = [] words = _materials_select(words, question_num) for word in words: stem = trans_en_to_cn(word) answer = word options = [_generate_fake_word(word) for i in range(option_num-1)] options.append(answer) answer, options = normalize_options_and_answer(answer, options) question = stem + '\n' + options responses.append({'question': question, 'answer': answer}) return responses class SentenceRecompositionBlank(object): title = '请将下列单词组成完整的一句话' generate_similar_sentence_prompt = 'Please generate a sentence with the same sentence structure as ' \ '<{sentence}> using simple vocabulary' @classmethod def generate(cls, sentences: List[str], question_num: int = 1, option_num=None, model=None, generate_mode: bool = False) -> List[Dict]: responses = [] sentences = _materials_select(sentences, question_num) for sentence in sentences: if generate_mode: try: prompt = 'f"' + cls.generate_similar_sentence_prompt + '"' prompt = eval(prompt) sent = _generate(prompt, model) sent = re.split(r'[.?!]', sent)[0] except: sent = sentence else: sent = sentence answer = sent words = sent.split(' ') words_num = len(words) stem = ['______'] * words_num # stem = stem[:-1] # symbol = words[-1][-1] for i in range(words_num): if words[i][-1] in ['.', '?', '!', '。', '?', '!']: stem[i] = stem[i] + words[i][-1] words[i] = words[i][:-1] words_copy = words.copy() while words_copy == words: random.shuffle(words) stem = ' '.join(stem) words = ', '.join(words) question = stem + '\n' + words responses.append({'question': question, 'answer': answer}) return responses class PhraseTranslatingMatchChoice(object): title = '请根据汉语意思选择正确的短语' generate_similar_phrase_prompt = 'Please generate {num} phrases with the same phrase structure as <{phrase}> by ' \ 'replacing a single word in this phrase. Please return the answer in a list. ' \ 'Try your best to use simple vocabulary when generating phrases.' translate_chinese_phrase_prompt = 'Please translate the following phrase to chinese: <{phrase}>' @classmethod def generate(cls, phrases: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]: responses = [] phrases = _materials_select(phrases, question_num) num = option_num - 1 for phrase in phrases: try: phrase = phrase[0].upper() + phrase[1:] option_prompt = 'f"' + cls.generate_similar_phrase_prompt + '"' option_prompt = eval(option_prompt) option_response = _generate(option_prompt, model) options = response_to_options(option_response) options.append(phrase) answer, options = normalize_options_and_answer(phrase, options) stem_prompt = 'f"' + cls.translate_chinese_phrase_prompt + '"' stem_prompt = eval(stem_prompt) stem_response = _generate(stem_prompt, model) question = stem_response + '\n' + options responses.append({'question': question, 'answer': answer}) except Exception as e: print(e) return responses class SentenceCompleteChoice(object): title = '请选择正确的短语补全句子' generate_sentence_complete_question_prompt = 'Assuming you are an English teacher and a question consists of three ' \ 'parts: the stem, options, and answer. please provide a multiple-choice ' \ 'question based on the sentence <{sentence}>.The specific steps are: ' \ 'Randomly blank out a part of the sentence and replace it with ____ to ' \ 'create the stem.Generate {num} new phrases/words that are ' \ 'structurally consistent with the blanked-out part and include the new ' \ 'phrases/words and blanked-out part as the options.Use the blanked-out ' \ 'part as the answer. For example:' \ 'Stem: And now it is time to ____, we will leave our lovely school ' \ 'Options: A: celebrate B: say goodbye C: take a break D: graduate' \ 'Answer: D: graduate ' \ \ @classmethod def generate(cls, sentences: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]: responses = [] sentences = _materials_select(sentences, question_num) num = option_num - 1 for sentence in sentences: try: prompt = 'f"' + cls.generate_sentence_complete_question_prompt + '"' prompt = eval(prompt) prompt = prompt.replace('.', '\n') response = _generate(prompt, model) question, answer = response_to_question(response) responses.append({'question': question, 'answer': answer}) except Exception as e: print(e) return responses class GrammarChoice(object): title = '请根据语法知识选择正确的选项补全句子' generate_grammar_question_prompt = 'Assuming you are an English teacher and a question consists of three parts: the ' \ 'stem, options, and answer. Please provide a sentence completion type ' \ 'multiple-choice question to test students understanding of the grammar rule' \ ' <{grammar}>.The specific steps are: Generate a sentence using this ' \ 'grammar rule with simple vocabulary. Blank out the part related to the grammar ' \ 'of this sentence. Use the sentence that has been blanked out as the stem.' \ 'Generate {num} phrases/words with the same meaning but different usage using ' \ 'the blanked-out part and include the new phrases/words and blanked-out part as ' \ 'the options.Use the blanked-out part as the correct answer.For example:' \ 'Stem: And now it is time to ____, we will leave our lovely school ' \ 'Options: A: celebrate B: celebrating C: celebrated D: celebrates ' \ 'Answer: D: graduate' @classmethod def generate(cls, grammars: List[str], question_num: int = 1, option_num: int = 4, model=None) -> List[Dict]: responses = [] num = option_num - 1 grammars = _materials_select(grammars, question_num) for grammar in grammars: try: prompt = 'f"' + cls.generate_grammar_question_prompt + '"' prompt = eval(prompt) prompt = prompt.replace('.', '\n') response = _generate(prompt, model) question, answer = response_to_question(response) responses.append({'question': question, 'answer': answer}) except Exception as e: print(e) return responses def txt_to_list(txt): while txt[0] == '\n': txt = txt[1:] while txt[-1] == '\n': txt = txt[:-1] txt = txt.split('\n') txt = [i for i in txt if i is not None] return txt # def generate_scope(progress: str, q_type): # """ # :param progress: # :param q_type: # :return: # """ # # if progress == '期中': # progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7'] # elif progress == '期末': # progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7', 'unit8', 'unit9', # 'unit10', 'unit11', 'unit12', 'unit13', 'unit14'] # else: # progress = [progress] # # scope = {'word': [], 'phrase': [], 'sentence': [], 'grammar': []} # # root_path = Path(__file__).parent # for i in progress: # path = root_path.joinpath('material', i + '.txt') # with open(path, 'r', encoding='utf-8') as file: # content = file.read() # # scope = re.split(r'|||', content) # _, word, phrase, sentence, grammar = re.split(r'|||', content) # scope['word'].extend(txt_to_list(word)) # scope['phrase'].extend(txt_to_list(phrase)) # scope['sentence'].extend(txt_to_list(sentence)) # scope['grammar'].extend(txt_to_list(grammar)) # m_type = question_type_to_material_type[q_type] # return scope[m_type] # def generate(progress, q_type, q_num, **kwargs): # """ # :param progress: # :param q_type: # :param q_num: # :param kwargs: # model is a must when use the llm, for example: # model = 'openai_3' # :return: # """ # materials = generate_scope(progress, q_type) # return eval(q_type).generate(materials, q_num, **kwargs) def generate(materials, q_type, q_num, **kwargs): """ :param materials: :param q_type: :param q_num: :param kwargs: model is a must when use the llm, for example: model = 'chatgpt_3.5' :return: """ # materials = generate_scope(progress, q_type) return eval(question_type_to_class_type[q_type]).generate(materials, q_num, **kwargs) # ans1 = generate_word_complete_question(['word', 'manager', 'answer', 'fight', 'jump', 'hihhttgrh']) # ans2 = generate_phrase_select_question(['the Dragon Boat Festival'], 3) # ans3 = generate_sentence_recomposition_question(['You really require a lot of talent and hard work to succeed.', # 'I have a job interview tomorrow.], False) # ans4 = generate_sentence_complete_question(['You really require a lot of talent and hard work to succeed.', # 'I have a job interview tomorrow.']) # ans5 = generate_grammar_question(['be supposed to', 'It is + adj. + 动词不定式']) # print(ans5)