Spaces:
Sleeping
Sleeping
import re | |
import math | |
from pathlib import Path | |
from typing import List, Dict | |
from translate import Translator | |
from config import * | |
from prompt import * | |
from llm import OpenAI3, OpenAI4 | |
import random | |
# question_type_to_material_type = {'WordCompleteChoice': 'word', | |
# 'WordCompleteBlank': 'word', | |
# 'WordRightSpellingChoice': 'word', | |
# 'PhraseTranslatingMatchChoice': 'phrase', | |
# 'SentenceRecompositionBlank': 'sentence', | |
# 'SentenceCompleteChoice': 'sentence', | |
# 'GrammarChoice': 'grammar' | |
# } | |
question_type_to_class_type = {'请将选择正确的拼写补全单词': 'WordCompleteChoice' , | |
'根据汉语意思补全单词': 'WordCompleteBlank', | |
'根据汉语意思选出拼写正确的单词': 'WordRightSpellingChoice', | |
'请根据汉语意思选择正确的短语': 'PhraseTranslatingMatchChoice', | |
'请将下列单词组成完整的一句话': 'SentenceRecompositionBlank', | |
'请选择正确的短语补全句子': 'SentenceCompleteChoice', | |
'请根据语法知识选择正确的选项补全句子': 'GrammarChoice' | |
} | |
def trans_en_to_cn(word: str) -> str: | |
if word in ['crayon', 'Crayon']: | |
chinese = '蜡笔' | |
else: | |
chinese = Translator(from_lang='English', to_lang='Chinese').translate(word) | |
return chinese | |
def delete_index(string: str) -> str: | |
if len(string) > 1: # incase of the sting is a single letter when generate word complete question | |
if string[1] in ['.', ':', '、', ':', ')', ')']: | |
string = string[2:] | |
return string | |
def normalize_options_and_answer(answer, options): | |
if len(options) != 1: | |
random.shuffle(options) | |
options = [delete_index(option) for option in options] | |
answer = delete_index(answer) | |
answer_index = options.index(answer) | |
answer = number_letter_dict[answer_index] + ':' + answer | |
options = [number_letter_dict[i] + ':' + options[i] for i in range(len(options))] | |
options = ','.join(options) | |
else: | |
options = options[0] | |
return answer, options | |
def response_to_question(response): | |
try: | |
try: | |
_, stem, options, answer = re.split( | |
r'stem: |stem:|Stem: |Stem:' | |
r'|options: |options:|Options: |Options:' | |
r'|Correct answer: |Correct answer:|correct answer: |correct answer:' | |
r'|answer: |answer:|Answer: |Answer:' | |
, response) | |
except: | |
try: | |
_, stem, options = re.split( | |
r'stem: |stem:|Stem: |Stem:' | |
r'|options: |options:|Options: |Options:' | |
, response) | |
answer = None | |
except: | |
_, question, answer = re.split( | |
r'question: |question:|Question: |Question:' | |
r'|Correct answer: |Correct answer:|correct answer: |correct answer:' | |
r'|answer: |answer:|Answer: |Answer:' | |
, response) | |
stem = None | |
options = None | |
while question[-1] == '\n': | |
question = question[:-1] | |
if stem: | |
while stem[-1] == '\n': | |
stem = stem[:-1] | |
if options: | |
options = options.split('\n') | |
options = [option for option in options if option != ''] | |
if answer is None: | |
answer = 'None' | |
for i in range(len(options)): | |
if 'correct answer' in options[i]: | |
answer = options[i] | |
answer = re.split(r'\(correct answer\)|correct answer', answer) | |
answer = [i for i in answer if i is not None][0] | |
options[i] = answer | |
answer = answer.split('\n')[0] | |
answer, options = normalize_options_and_answer(answer, options) | |
question = stem + '\n' + options | |
except Exception as e: | |
print(f'fail!,reason:{e},response:{response}') | |
question = 'fail!' | |
answer = 'fail!' | |
return question, answer | |
def response_to_options(response): | |
options = response.split('\n') | |
options = [delete_index(option) for option in options] | |
return options | |
def _materials_select(materials: List[str], question_num: int, option_num: int = 1): | |
""" | |
:param materials: | |
:param question_num: | |
:param option_num: | |
:return: | |
""" | |
material = materials * math.ceil(option_num/len(materials)) | |
select_materials = [random.sample(material, option_num) for i in range(question_num)] | |
if option_num == 1: | |
select_materials = [i[0] for i in select_materials] | |
# random.shuffle(material_index) | |
# if len(material_index) >= question_num: | |
# material_index = material_index[:question_num] | |
# else: | |
# times = int(question_num / len(materials)) | |
# remain = question_num - len(materials) * times | |
# material_index = material_index * times + material_index[:remain] | |
# select_materials = [materials[i] for i in material_index] | |
return select_materials | |
def _generate(prompt, model='openai_3'): | |
model = eval(model_name[model])() | |
response = model(prompt) | |
return response | |
def _generate_fake_word(word): | |
word_vowels = [] | |
word_vowel = {} | |
for vowels in vowels_list: | |
if vowels in word: | |
word_vowels.append(vowels) | |
for i in range(len(word)): | |
if word[i] in vowel_list: | |
word_vowel[i] = word[i] | |
if word_vowels: | |
true_letters = word_vowels[random.randint(0, len(word_vowels) - 1)] | |
fake_letters = [i for i in vowels_list if (len(i) == len(true_letters)) & (i != true_letters)] | |
fake_letters = random.choice(fake_letters) | |
word = word.split(true_letters) | |
index = random.randint(0, len(word) - 2) | |
for i in range(len(word) - 1): | |
if i != index: | |
word[i] = word[i] + true_letters | |
else: | |
word[i] = word[i] + fake_letters | |
fake_word = ''.join(word) | |
elif word_vowel: | |
index = random.choice(list(word_vowel.keys())) | |
true_letter = word[index] | |
vowel_list_copy = vowel_list.copy() | |
vowel_list_copy.pop(vowel_list_copy.index(true_letter)) | |
fake_letters = random.choice(vowel_list_copy) | |
fake_word = word[:index] + fake_letters + word[index+1:] | |
else: | |
index = random.choice(range(len(word))) | |
fake_word = word[:index] + word[index] + word[:index] | |
return fake_word | |
class WordCompleteChoice(object): | |
title = '请将选择正确的拼写补全单词' | |
def generate(words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]: | |
responses = [] | |
words = _materials_select(words, question_num) | |
for word in words: | |
word_vowels = [] | |
for vowels in vowels_list: | |
if vowels in word: | |
word_vowels.append(vowels) | |
if word_vowels: | |
answer = word_vowels[random.randint(0, len(word_vowels) - 1)] | |
stem = '' | |
_stem = word.split(answer) | |
index = random.randint(0, len(_stem) - 2) | |
for i in range(len(_stem) - 1): | |
if i != index: | |
stem = stem + _stem[i] + answer | |
else: | |
stem = stem + _stem[i] + '_' * len(answer) | |
stem = stem + _stem[-1] | |
options = [i for i in vowels_list if (len(i) == len(answer)) & (i != answer)] | |
if len(options) > option_num - 1: | |
random.shuffle(options) | |
options = options[:option_num - 1] | |
options.append(answer) | |
else: | |
index = list(range(len(word))) | |
num = random.randint(2, 3) | |
num = len(word) - 3 if num > len(word) - 3 else num | |
random.shuffle(index) | |
ans_index = index[:num] | |
ans_index.sort() | |
answer = [word[i] + ',' for i in ans_index] | |
answer = ''.join(answer) | |
answer = answer[:-1] | |
stem = ['_' if i in ans_index else word[i] for i in range(len(word))] | |
stem = ''.join(stem) | |
options = [answer] | |
count = 1 | |
while count < option_num: | |
random.shuffle(index) | |
option_index = index[:num] | |
option = [word[i] + ',' for i in option_index] | |
option = ''.join(option) | |
option = option[:-1] | |
if option not in options: | |
options.append(option) | |
count += 1 | |
answer, options = normalize_options_and_answer(answer, options) | |
question = stem + '\n' + options | |
responses.append({'question': question, 'answer': answer}) | |
return responses | |
class WordCompleteBlank(object): | |
title = '根据汉语意思补全单词' | |
def generate(cls, words: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]: | |
responses = [] | |
_words = _materials_select(words, question_num, option_num) | |
for words in _words: | |
answer_list = [] | |
stem_list = [] | |
for word in words: | |
word_vowels = [] | |
for vowel in vowels_list: | |
if vowel in word: | |
word_vowels.append(vowel) | |
chinese = trans_en_to_cn(word) | |
if word_vowels: | |
letters = word_vowels[random.randint(0, len(word_vowels) - 1)] | |
stem = chinese + ':' | |
_word = word.split(letters) | |
index = random.randint(0, len(_word) - 2) | |
for i in range(len(_word) - 1): | |
if i != index: | |
stem = stem + _word[i] + letters | |
else: | |
stem = stem + _word[i] + '_' * len(letters) | |
stem = stem + _word[-1] | |
else: | |
num = random.randint(2, 3) | |
num = len(word) - 2 if num > len(word) - 2 else num | |
index = random.randint(0, (len(word)-num+1)) | |
stem = chinese + ':' + word[:index] + '_' * num + word[index+num:] | |
stem_list.append(stem) | |
answer = chinese + ':' + word | |
answer_list.append(answer) | |
responses.append({'question': ' '.join(stem_list), 'answer': ' '.join(answer_list)}) | |
return responses | |
class WordRightSpellingChoice(object): | |
title = '根据汉语意思选出拼写正确的单词' | |
def generate(cls, words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]: | |
responses = [] | |
words = _materials_select(words, question_num) | |
for word in words: | |
stem = trans_en_to_cn(word) | |
answer = word | |
options = [_generate_fake_word(word) for i in range(option_num-1)] | |
options.append(answer) | |
answer, options = normalize_options_and_answer(answer, options) | |
question = stem + '\n' + options | |
responses.append({'question': question, 'answer': answer}) | |
return responses | |
class SentenceRecompositionBlank(object): | |
title = '请将下列单词组成完整的一句话' | |
generate_similar_sentence_prompt = 'Please generate a sentence with the same sentence structure as ' \ | |
'<{sentence}> using simple vocabulary' | |
def generate(cls, sentences: List[str], question_num: int = 1, option_num=None, model=None, | |
generate_mode: bool = False) -> List[Dict]: | |
responses = [] | |
sentences = _materials_select(sentences, question_num) | |
for sentence in sentences: | |
if generate_mode: | |
try: | |
prompt = 'f"' + cls.generate_similar_sentence_prompt + '"' | |
prompt = eval(prompt) | |
sent = _generate(prompt, model) | |
sent = re.split(r'[.?!]', sent)[0] | |
except: | |
sent = sentence | |
else: | |
sent = sentence | |
answer = sent | |
words = sent.split(' ') | |
words_num = len(words) | |
stem = ['______'] * words_num | |
# stem = stem[:-1] | |
# symbol = words[-1][-1] | |
for i in range(words_num): | |
if words[i][-1] in ['.', '?', '!', '。', '?', '!']: | |
stem[i] = stem[i] + words[i][-1] | |
words[i] = words[i][:-1] | |
words_copy = words.copy() | |
while words_copy == words: | |
random.shuffle(words) | |
stem = ' '.join(stem) | |
words = ', '.join(words) | |
question = stem + '\n' + words | |
responses.append({'question': question, 'answer': answer}) | |
return responses | |
class PhraseTranslatingMatchChoice(object): | |
title = '请根据汉语意思选择正确的短语' | |
generate_similar_phrase_prompt = 'Please generate {num} phrases with the same phrase structure as <{phrase}> by ' \ | |
'replacing a single word in this phrase. Please return the answer in a list. ' \ | |
'Try your best to use simple vocabulary when generating phrases.' | |
translate_chinese_phrase_prompt = 'Please translate the following phrase to chinese: <{phrase}>' | |
def generate(cls, phrases: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]: | |
responses = [] | |
phrases = _materials_select(phrases, question_num) | |
num = option_num - 1 | |
for phrase in phrases: | |
try: | |
phrase = phrase[0].upper() + phrase[1:] | |
option_prompt = 'f"' + cls.generate_similar_phrase_prompt + '"' | |
option_prompt = eval(option_prompt) | |
option_response = _generate(option_prompt, model) | |
options = response_to_options(option_response) | |
options.append(phrase) | |
answer, options = normalize_options_and_answer(phrase, options) | |
stem_prompt = 'f"' + cls.translate_chinese_phrase_prompt + '"' | |
stem_prompt = eval(stem_prompt) | |
stem_response = _generate(stem_prompt, model) | |
question = stem_response + '\n' + options | |
responses.append({'question': question, 'answer': answer}) | |
except Exception as e: | |
print(e) | |
return responses | |
class SentenceCompleteChoice(object): | |
title = '请选择正确的短语补全句子' | |
generate_sentence_complete_question_prompt = 'Assuming you are an English teacher and a question consists of three ' \ | |
'parts: the stem, options, and answer. please provide a multiple-choice ' \ | |
'question based on the sentence <{sentence}>.The specific steps are: ' \ | |
'Randomly blank out a part of the sentence and replace it with ____ to ' \ | |
'create the stem.Generate {num} new phrases/words that are ' \ | |
'structurally consistent with the blanked-out part and include the new ' \ | |
'phrases/words and blanked-out part as the options.Use the blanked-out ' \ | |
'part as the answer. For example:' \ | |
'Stem: And now it is time to ____, we will leave our lovely school ' \ | |
'Options: A: celebrate B: say goodbye C: take a break D: graduate' \ | |
'Answer: D: graduate ' \ | |
\ | |
def generate(cls, sentences: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]: | |
responses = [] | |
sentences = _materials_select(sentences, question_num) | |
num = option_num - 1 | |
for sentence in sentences: | |
try: | |
prompt = 'f"' + cls.generate_sentence_complete_question_prompt + '"' | |
prompt = eval(prompt) | |
prompt = prompt.replace('.', '\n') | |
response = _generate(prompt, model) | |
question, answer = response_to_question(response) | |
responses.append({'question': question, 'answer': answer}) | |
except Exception as e: | |
print(e) | |
return responses | |
class GrammarChoice(object): | |
title = '请根据语法知识选择正确的选项补全句子' | |
generate_grammar_question_prompt = 'Assuming you are an English teacher and a question consists of three parts: the ' \ | |
'stem, options, and answer. Please provide a sentence completion type ' \ | |
'multiple-choice question to test students understanding of the grammar rule' \ | |
' <{grammar}>.The specific steps are: Generate a sentence using this ' \ | |
'grammar rule with simple vocabulary. Blank out the part related to the grammar ' \ | |
'of this sentence. Use the sentence that has been blanked out as the stem.' \ | |
'Generate {num} phrases/words with the same meaning but different usage using ' \ | |
'the blanked-out part and include the new phrases/words and blanked-out part as ' \ | |
'the options.Use the blanked-out part as the correct answer.For example:' \ | |
'Stem: And now it is time to ____, we will leave our lovely school ' \ | |
'Options: A: celebrate B: celebrating C: celebrated D: celebrates ' \ | |
'Answer: D: graduate' | |
def generate(cls, grammars: List[str], question_num: int = 1, option_num: int = 4, model=None) -> List[Dict]: | |
responses = [] | |
num = option_num - 1 | |
grammars = _materials_select(grammars, question_num) | |
for grammar in grammars: | |
try: | |
prompt = 'f"' + cls.generate_grammar_question_prompt + '"' | |
prompt = eval(prompt) | |
prompt = prompt.replace('.', '\n') | |
response = _generate(prompt, model) | |
question, answer = response_to_question(response) | |
responses.append({'question': question, 'answer': answer}) | |
except Exception as e: | |
print(e) | |
return responses | |
def txt_to_list(txt): | |
while txt[0] == '\n': | |
txt = txt[1:] | |
while txt[-1] == '\n': | |
txt = txt[:-1] | |
txt = txt.split('\n') | |
txt = [i for i in txt if i is not None] | |
return txt | |
# def generate_scope(progress: str, q_type): | |
# """ | |
# :param progress: | |
# :param q_type: | |
# :return: | |
# """ | |
# | |
# if progress == '期中': | |
# progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7'] | |
# elif progress == '期末': | |
# progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7', 'unit8', 'unit9', | |
# 'unit10', 'unit11', 'unit12', 'unit13', 'unit14'] | |
# else: | |
# progress = [progress] | |
# | |
# scope = {'word': [], 'phrase': [], 'sentence': [], 'grammar': []} | |
# | |
# root_path = Path(__file__).parent | |
# for i in progress: | |
# path = root_path.joinpath('material', i + '.txt') | |
# with open(path, 'r', encoding='utf-8') as file: | |
# content = file.read() | |
# # scope = re.split(r'<word>|<phrase>|<sentence>|<grammar>', content) | |
# _, word, phrase, sentence, grammar = re.split(r'<word>|<phrase>|<sentence>|<grammar>', content) | |
# scope['word'].extend(txt_to_list(word)) | |
# scope['phrase'].extend(txt_to_list(phrase)) | |
# scope['sentence'].extend(txt_to_list(sentence)) | |
# scope['grammar'].extend(txt_to_list(grammar)) | |
# m_type = question_type_to_material_type[q_type] | |
# return scope[m_type] | |
# def generate(progress, q_type, q_num, **kwargs): | |
# """ | |
# :param progress: | |
# :param q_type: | |
# :param q_num: | |
# :param kwargs: | |
# model is a must when use the llm, for example: | |
# model = 'openai_3' | |
# :return: | |
# """ | |
# materials = generate_scope(progress, q_type) | |
# return eval(q_type).generate(materials, q_num, **kwargs) | |
def generate(materials, q_type, q_num, **kwargs): | |
""" | |
:param materials: | |
:param q_type: | |
:param q_num: | |
:param kwargs: | |
model is a must when use the llm, for example: | |
model = 'chatgpt_3.5' | |
:return: | |
""" | |
# materials = generate_scope(progress, q_type) | |
return eval(question_type_to_class_type[q_type]).generate(materials, q_num, **kwargs) | |
# ans1 = generate_word_complete_question(['word', 'manager', 'answer', 'fight', 'jump', 'hihhttgrh']) | |
# ans2 = generate_phrase_select_question(['the Dragon Boat Festival'], 3) | |
# ans3 = generate_sentence_recomposition_question(['You really require a lot of talent and hard work to succeed.', | |
# 'I have a job interview tomorrow.], False) | |
# ans4 = generate_sentence_complete_question(['You really require a lot of talent and hard work to succeed.', | |
# 'I have a job interview tomorrow.']) | |
# ans5 = generate_grammar_question(['be supposed to', 'It is + adj. + 动词不定式']) | |
# print(ans5) | |