question_generate / question.py
weiwei1392
init
d8132e8
raw
history blame
22.6 kB
import re
import math
from pathlib import Path
from typing import List, Dict
from translate import Translator
from config import *
from prompt import *
from llm import OpenAI3, OpenAI4
import random
# question_type_to_material_type = {'WordCompleteChoice': 'word',
# 'WordCompleteBlank': 'word',
# 'WordRightSpellingChoice': 'word',
# 'PhraseTranslatingMatchChoice': 'phrase',
# 'SentenceRecompositionBlank': 'sentence',
# 'SentenceCompleteChoice': 'sentence',
# 'GrammarChoice': 'grammar'
# }
question_type_to_class_type = {'请将选择正确的拼写补全单词': 'WordCompleteChoice' ,
'根据汉语意思补全单词': 'WordCompleteBlank',
'根据汉语意思选出拼写正确的单词': 'WordRightSpellingChoice',
'请根据汉语意思选择正确的短语': 'PhraseTranslatingMatchChoice',
'请将下列单词组成完整的一句话': 'SentenceRecompositionBlank',
'请选择正确的短语补全句子': 'SentenceCompleteChoice',
'请根据语法知识选择正确的选项补全句子': 'GrammarChoice'
}
def trans_en_to_cn(word: str) -> str:
if word in ['crayon', 'Crayon']:
chinese = '蜡笔'
else:
chinese = Translator(from_lang='English', to_lang='Chinese').translate(word)
return chinese
def delete_index(string: str) -> str:
if len(string) > 1: # incase of the sting is a single letter when generate word complete question
if string[1] in ['.', ':', '、', ':', ')', ')']:
string = string[2:]
return string
def normalize_options_and_answer(answer, options):
if len(options) != 1:
random.shuffle(options)
options = [delete_index(option) for option in options]
answer = delete_index(answer)
answer_index = options.index(answer)
answer = number_letter_dict[answer_index] + ':' + answer
options = [number_letter_dict[i] + ':' + options[i] for i in range(len(options))]
options = ','.join(options)
else:
options = options[0]
return answer, options
def response_to_question(response):
try:
try:
_, stem, options, answer = re.split(
r'stem: |stem:|Stem: |Stem:'
r'|options: |options:|Options: |Options:'
r'|Correct answer: |Correct answer:|correct answer: |correct answer:'
r'|answer: |answer:|Answer: |Answer:'
, response)
except:
try:
_, stem, options = re.split(
r'stem: |stem:|Stem: |Stem:'
r'|options: |options:|Options: |Options:'
, response)
answer = None
except:
_, question, answer = re.split(
r'question: |question:|Question: |Question:'
r'|Correct answer: |Correct answer:|correct answer: |correct answer:'
r'|answer: |answer:|Answer: |Answer:'
, response)
stem = None
options = None
while question[-1] == '\n':
question = question[:-1]
if stem:
while stem[-1] == '\n':
stem = stem[:-1]
if options:
options = options.split('\n')
options = [option for option in options if option != '']
if answer is None:
answer = 'None'
for i in range(len(options)):
if 'correct answer' in options[i]:
answer = options[i]
answer = re.split(r'\(correct answer\)|correct answer', answer)
answer = [i for i in answer if i is not None][0]
options[i] = answer
answer = answer.split('\n')[0]
answer, options = normalize_options_and_answer(answer, options)
question = stem + '\n' + options
except Exception as e:
print(f'fail!,reason:{e},response:{response}')
question = 'fail!'
answer = 'fail!'
return question, answer
def response_to_options(response):
options = response.split('\n')
options = [delete_index(option) for option in options]
return options
def _materials_select(materials: List[str], question_num: int, option_num: int = 1):
"""
:param materials:
:param question_num:
:param option_num:
:return:
"""
material = materials * math.ceil(option_num/len(materials))
select_materials = [random.sample(material, option_num) for i in range(question_num)]
if option_num == 1:
select_materials = [i[0] for i in select_materials]
# random.shuffle(material_index)
# if len(material_index) >= question_num:
# material_index = material_index[:question_num]
# else:
# times = int(question_num / len(materials))
# remain = question_num - len(materials) * times
# material_index = material_index * times + material_index[:remain]
# select_materials = [materials[i] for i in material_index]
return select_materials
def _generate(prompt, model='openai_3'):
model = eval(model_name[model])()
response = model(prompt)
return response
def _generate_fake_word(word):
word_vowels = []
word_vowel = {}
for vowels in vowels_list:
if vowels in word:
word_vowels.append(vowels)
for i in range(len(word)):
if word[i] in vowel_list:
word_vowel[i] = word[i]
if word_vowels:
true_letters = word_vowels[random.randint(0, len(word_vowels) - 1)]
fake_letters = [i for i in vowels_list if (len(i) == len(true_letters)) & (i != true_letters)]
fake_letters = random.choice(fake_letters)
word = word.split(true_letters)
index = random.randint(0, len(word) - 2)
for i in range(len(word) - 1):
if i != index:
word[i] = word[i] + true_letters
else:
word[i] = word[i] + fake_letters
fake_word = ''.join(word)
elif word_vowel:
index = random.choice(list(word_vowel.keys()))
true_letter = word[index]
vowel_list_copy = vowel_list.copy()
vowel_list_copy.pop(vowel_list_copy.index(true_letter))
fake_letters = random.choice(vowel_list_copy)
fake_word = word[:index] + fake_letters + word[index+1:]
else:
index = random.choice(range(len(word)))
fake_word = word[:index] + word[index] + word[:index]
return fake_word
class WordCompleteChoice(object):
title = '请将选择正确的拼写补全单词'
@staticmethod
def generate(words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]:
responses = []
words = _materials_select(words, question_num)
for word in words:
word_vowels = []
for vowels in vowels_list:
if vowels in word:
word_vowels.append(vowels)
if word_vowels:
answer = word_vowels[random.randint(0, len(word_vowels) - 1)]
stem = ''
_stem = word.split(answer)
index = random.randint(0, len(_stem) - 2)
for i in range(len(_stem) - 1):
if i != index:
stem = stem + _stem[i] + answer
else:
stem = stem + _stem[i] + '_' * len(answer)
stem = stem + _stem[-1]
options = [i for i in vowels_list if (len(i) == len(answer)) & (i != answer)]
if len(options) > option_num - 1:
random.shuffle(options)
options = options[:option_num - 1]
options.append(answer)
else:
index = list(range(len(word)))
num = random.randint(2, 3)
num = len(word) - 3 if num > len(word) - 3 else num
random.shuffle(index)
ans_index = index[:num]
ans_index.sort()
answer = [word[i] + ',' for i in ans_index]
answer = ''.join(answer)
answer = answer[:-1]
stem = ['_' if i in ans_index else word[i] for i in range(len(word))]
stem = ''.join(stem)
options = [answer]
count = 1
while count < option_num:
random.shuffle(index)
option_index = index[:num]
option = [word[i] + ',' for i in option_index]
option = ''.join(option)
option = option[:-1]
if option not in options:
options.append(option)
count += 1
answer, options = normalize_options_and_answer(answer, options)
question = stem + '\n' + options
responses.append({'question': question, 'answer': answer})
return responses
class WordCompleteBlank(object):
title = '根据汉语意思补全单词'
@classmethod
def generate(cls, words: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]:
responses = []
_words = _materials_select(words, question_num, option_num)
for words in _words:
answer_list = []
stem_list = []
for word in words:
word_vowels = []
for vowel in vowels_list:
if vowel in word:
word_vowels.append(vowel)
chinese = trans_en_to_cn(word)
if word_vowels:
letters = word_vowels[random.randint(0, len(word_vowels) - 1)]
stem = chinese + ':'
_word = word.split(letters)
index = random.randint(0, len(_word) - 2)
for i in range(len(_word) - 1):
if i != index:
stem = stem + _word[i] + letters
else:
stem = stem + _word[i] + '_' * len(letters)
stem = stem + _word[-1]
else:
num = random.randint(2, 3)
num = len(word) - 2 if num > len(word) - 2 else num
index = random.randint(0, (len(word)-num+1))
stem = chinese + ':' + word[:index] + '_' * num + word[index+num:]
stem_list.append(stem)
answer = chinese + ':' + word
answer_list.append(answer)
responses.append({'question': ' '.join(stem_list), 'answer': ' '.join(answer_list)})
return responses
class WordRightSpellingChoice(object):
title = '根据汉语意思选出拼写正确的单词'
@classmethod
def generate(cls, words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]:
responses = []
words = _materials_select(words, question_num)
for word in words:
stem = trans_en_to_cn(word)
answer = word
options = [_generate_fake_word(word) for i in range(option_num-1)]
options.append(answer)
answer, options = normalize_options_and_answer(answer, options)
question = stem + '\n' + options
responses.append({'question': question, 'answer': answer})
return responses
class SentenceRecompositionBlank(object):
title = '请将下列单词组成完整的一句话'
generate_similar_sentence_prompt = 'Please generate a sentence with the same sentence structure as ' \
'<{sentence}> using simple vocabulary'
@classmethod
def generate(cls, sentences: List[str], question_num: int = 1, option_num=None, model=None,
generate_mode: bool = False) -> List[Dict]:
responses = []
sentences = _materials_select(sentences, question_num)
for sentence in sentences:
if generate_mode:
try:
prompt = 'f"' + cls.generate_similar_sentence_prompt + '"'
prompt = eval(prompt)
sent = _generate(prompt, model)
sent = re.split(r'[.?!]', sent)[0]
except:
sent = sentence
else:
sent = sentence
answer = sent
words = sent.split(' ')
words_num = len(words)
stem = ['______'] * words_num
# stem = stem[:-1]
# symbol = words[-1][-1]
for i in range(words_num):
if words[i][-1] in ['.', '?', '!', '。', '?', '!']:
stem[i] = stem[i] + words[i][-1]
words[i] = words[i][:-1]
words_copy = words.copy()
while words_copy == words:
random.shuffle(words)
stem = ' '.join(stem)
words = ', '.join(words)
question = stem + '\n' + words
responses.append({'question': question, 'answer': answer})
return responses
class PhraseTranslatingMatchChoice(object):
title = '请根据汉语意思选择正确的短语'
generate_similar_phrase_prompt = 'Please generate {num} phrases with the same phrase structure as <{phrase}> by ' \
'replacing a single word in this phrase. Please return the answer in a list. ' \
'Try your best to use simple vocabulary when generating phrases.'
translate_chinese_phrase_prompt = 'Please translate the following phrase to chinese: <{phrase}>'
@classmethod
def generate(cls, phrases: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]:
responses = []
phrases = _materials_select(phrases, question_num)
num = option_num - 1
for phrase in phrases:
try:
phrase = phrase[0].upper() + phrase[1:]
option_prompt = 'f"' + cls.generate_similar_phrase_prompt + '"'
option_prompt = eval(option_prompt)
option_response = _generate(option_prompt, model)
options = response_to_options(option_response)
options.append(phrase)
answer, options = normalize_options_and_answer(phrase, options)
stem_prompt = 'f"' + cls.translate_chinese_phrase_prompt + '"'
stem_prompt = eval(stem_prompt)
stem_response = _generate(stem_prompt, model)
question = stem_response + '\n' + options
responses.append({'question': question, 'answer': answer})
except Exception as e:
print(e)
return responses
class SentenceCompleteChoice(object):
title = '请选择正确的短语补全句子'
generate_sentence_complete_question_prompt = 'Assuming you are an English teacher and a question consists of three ' \
'parts: the stem, options, and answer. please provide a multiple-choice ' \
'question based on the sentence <{sentence}>.The specific steps are: ' \
'Randomly blank out a part of the sentence and replace it with ____ to ' \
'create the stem.Generate {num} new phrases/words that are ' \
'structurally consistent with the blanked-out part and include the new ' \
'phrases/words and blanked-out part as the options.Use the blanked-out ' \
'part as the answer. For example:' \
'Stem: And now it is time to ____, we will leave our lovely school ' \
'Options: A: celebrate B: say goodbye C: take a break D: graduate' \
'Answer: D: graduate ' \
\
@classmethod
def generate(cls, sentences: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]:
responses = []
sentences = _materials_select(sentences, question_num)
num = option_num - 1
for sentence in sentences:
try:
prompt = 'f"' + cls.generate_sentence_complete_question_prompt + '"'
prompt = eval(prompt)
prompt = prompt.replace('.', '\n')
response = _generate(prompt, model)
question, answer = response_to_question(response)
responses.append({'question': question, 'answer': answer})
except Exception as e:
print(e)
return responses
class GrammarChoice(object):
title = '请根据语法知识选择正确的选项补全句子'
generate_grammar_question_prompt = 'Assuming you are an English teacher and a question consists of three parts: the ' \
'stem, options, and answer. Please provide a sentence completion type ' \
'multiple-choice question to test students understanding of the grammar rule' \
' <{grammar}>.The specific steps are: Generate a sentence using this ' \
'grammar rule with simple vocabulary. Blank out the part related to the grammar ' \
'of this sentence. Use the sentence that has been blanked out as the stem.' \
'Generate {num} phrases/words with the same meaning but different usage using ' \
'the blanked-out part and include the new phrases/words and blanked-out part as ' \
'the options.Use the blanked-out part as the correct answer.For example:' \
'Stem: And now it is time to ____, we will leave our lovely school ' \
'Options: A: celebrate B: celebrating C: celebrated D: celebrates ' \
'Answer: D: graduate'
@classmethod
def generate(cls, grammars: List[str], question_num: int = 1, option_num: int = 4, model=None) -> List[Dict]:
responses = []
num = option_num - 1
grammars = _materials_select(grammars, question_num)
for grammar in grammars:
try:
prompt = 'f"' + cls.generate_grammar_question_prompt + '"'
prompt = eval(prompt)
prompt = prompt.replace('.', '\n')
response = _generate(prompt, model)
question, answer = response_to_question(response)
responses.append({'question': question, 'answer': answer})
except Exception as e:
print(e)
return responses
def txt_to_list(txt):
while txt[0] == '\n':
txt = txt[1:]
while txt[-1] == '\n':
txt = txt[:-1]
txt = txt.split('\n')
txt = [i for i in txt if i is not None]
return txt
# def generate_scope(progress: str, q_type):
# """
# :param progress:
# :param q_type:
# :return:
# """
#
# if progress == '期中':
# progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7']
# elif progress == '期末':
# progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7', 'unit8', 'unit9',
# 'unit10', 'unit11', 'unit12', 'unit13', 'unit14']
# else:
# progress = [progress]
#
# scope = {'word': [], 'phrase': [], 'sentence': [], 'grammar': []}
#
# root_path = Path(__file__).parent
# for i in progress:
# path = root_path.joinpath('material', i + '.txt')
# with open(path, 'r', encoding='utf-8') as file:
# content = file.read()
# # scope = re.split(r'<word>|<phrase>|<sentence>|<grammar>', content)
# _, word, phrase, sentence, grammar = re.split(r'<word>|<phrase>|<sentence>|<grammar>', content)
# scope['word'].extend(txt_to_list(word))
# scope['phrase'].extend(txt_to_list(phrase))
# scope['sentence'].extend(txt_to_list(sentence))
# scope['grammar'].extend(txt_to_list(grammar))
# m_type = question_type_to_material_type[q_type]
# return scope[m_type]
# def generate(progress, q_type, q_num, **kwargs):
# """
# :param progress:
# :param q_type:
# :param q_num:
# :param kwargs:
# model is a must when use the llm, for example:
# model = 'openai_3'
# :return:
# """
# materials = generate_scope(progress, q_type)
# return eval(q_type).generate(materials, q_num, **kwargs)
def generate(materials, q_type, q_num, **kwargs):
"""
:param materials:
:param q_type:
:param q_num:
:param kwargs:
model is a must when use the llm, for example:
model = 'chatgpt_3.5'
:return:
"""
# materials = generate_scope(progress, q_type)
return eval(question_type_to_class_type[q_type]).generate(materials, q_num, **kwargs)
# ans1 = generate_word_complete_question(['word', 'manager', 'answer', 'fight', 'jump', 'hihhttgrh'])
# ans2 = generate_phrase_select_question(['the Dragon Boat Festival'], 3)
# ans3 = generate_sentence_recomposition_question(['You really require a lot of talent and hard work to succeed.',
# 'I have a job interview tomorrow.], False)
# ans4 = generate_sentence_complete_question(['You really require a lot of talent and hard work to succeed.',
# 'I have a job interview tomorrow.'])
# ans5 = generate_grammar_question(['be supposed to', 'It is + adj. + 动词不定式'])
# print(ans5)