Spaces:

weiwei1392
/

question_generate

Sleeping

File size: 22,616 Bytes

d8132e8

import re
import math
from pathlib import Path
from typing import List, Dict
from translate import Translator
from config import *
from prompt import *
from llm import OpenAI3, OpenAI4
import random



# question_type_to_material_type = {'WordCompleteChoice': 'word',
#                                   'WordCompleteBlank': 'word',
#                                   'WordRightSpellingChoice': 'word',
#                                   'PhraseTranslatingMatchChoice': 'phrase',
#                                   'SentenceRecompositionBlank': 'sentence',
#                                   'SentenceCompleteChoice': 'sentence',
#                                   'GrammarChoice': 'grammar'
#                                   }


question_type_to_class_type = {'请将选择正确的拼写补全单词': 'WordCompleteChoice' ,
                                  '根据汉语意思补全单词': 'WordCompleteBlank',
                                  '根据汉语意思选出拼写正确的单词': 'WordRightSpellingChoice',
                                  '请根据汉语意思选择正确的短语': 'PhraseTranslatingMatchChoice',
                                  '请将下列单词组成完整的一句话': 'SentenceRecompositionBlank',
                                  '请选择正确的短语补全句子': 'SentenceCompleteChoice',
                                  '请根据语法知识选择正确的选项补全句子': 'GrammarChoice'
                                  }


def trans_en_to_cn(word: str)  -> str:
    if word in ['crayon', 'Crayon']:
        chinese = '蜡笔'
    else:
        chinese = Translator(from_lang='English', to_lang='Chinese').translate(word)
    return chinese


def delete_index(string: str) -> str:
    if len(string) > 1:  # incase of the sting is a single letter when generate word complete question
        if string[1] in ['.', ':', '、', '：', ')', '）']:
            string = string[2:]
    return string


def normalize_options_and_answer(answer, options):
    if len(options) != 1:
        random.shuffle(options)
        options = [delete_index(option) for option in options]
        answer = delete_index(answer)
        answer_index = options.index(answer)
        answer = number_letter_dict[answer_index] + ':' + answer
        options = [number_letter_dict[i] + ':' + options[i] for i in range(len(options))]
        options = ','.join(options)
    else:
        options = options[0]

    return answer, options


def response_to_question(response):
    try:
        try:
            _, stem, options, answer = re.split(
                r'stem: |stem:|Stem: |Stem:'
                r'|options: |options:|Options: |Options:'
                r'|Correct answer: |Correct answer:|correct answer: |correct answer:'
                r'|answer: |answer:|Answer: |Answer:'
                , response)
        except:

            try:
                _, stem, options = re.split(
                    r'stem: |stem:|Stem: |Stem:'
                    r'|options: |options:|Options: |Options:'
                    , response)
                answer = None

            except:
                _, question, answer = re.split(
                    r'question: |question:|Question: |Question:'
                    r'|Correct answer: |Correct answer:|correct answer: |correct answer:'
                    r'|answer: |answer:|Answer: |Answer:'
                    , response)
                stem = None
                options = None
                while question[-1] == '\n':
                    question = question[:-1]

        if stem:
            while stem[-1] == '\n':
                stem = stem[:-1]

        if options:
            options = options.split('\n')
            options = [option for option in options if option != '']

            if answer is None:
                answer = 'None'
                for i in range(len(options)):
                    if 'correct answer' in options[i]:
                        answer = options[i]
                        answer = re.split(r'\(correct answer\)|correct answer', answer)
                        answer = [i for i in answer if i is not None][0]
                        options[i] = answer

            answer = answer.split('\n')[0]
            answer, options = normalize_options_and_answer(answer, options)
            question = stem + '\n' + options

    except Exception as e:
        print(f'fail!,reason:{e},response:{response}')
        question = 'fail!'
        answer = 'fail!'

    return question, answer


def response_to_options(response):

    options = response.split('\n')
    options = [delete_index(option) for option in options]

    return options


def _materials_select(materials: List[str], question_num: int, option_num: int = 1):
    """
    :param materials:
    :param question_num:
    :param option_num:
    :return:
    """
    material = materials * math.ceil(option_num/len(materials))
    select_materials = [random.sample(material, option_num) for i in range(question_num)]
    if option_num == 1:
        select_materials = [i[0] for i in select_materials]

    # random.shuffle(material_index)
    # if len(material_index) >= question_num:
    #     material_index = material_index[:question_num]
    # else:
    #     times = int(question_num / len(materials))
    #     remain = question_num - len(materials) * times
    #     material_index = material_index * times + material_index[:remain]
    # select_materials = [materials[i] for i in material_index]
    return select_materials


def _generate(prompt, model='openai_3'):
    model = eval(model_name[model])()
    response = model(prompt)
    return response


def _generate_fake_word(word):
    word_vowels = []
    word_vowel = {}
    for vowels in vowels_list:
        if vowels in word:
            word_vowels.append(vowels)

    for i in range(len(word)):
        if word[i] in vowel_list:
            word_vowel[i] = word[i]

    if word_vowels:
        true_letters = word_vowels[random.randint(0, len(word_vowels) - 1)]
        fake_letters = [i for i in vowels_list if (len(i) == len(true_letters)) & (i != true_letters)]
        fake_letters = random.choice(fake_letters)
        word = word.split(true_letters)
        index = random.randint(0, len(word) - 2)
        for i in range(len(word) - 1):
            if i != index:
                word[i] = word[i] + true_letters
            else:
                word[i] = word[i] + fake_letters
        fake_word = ''.join(word)

    elif word_vowel:
        index = random.choice(list(word_vowel.keys()))
        true_letter = word[index]
        vowel_list_copy = vowel_list.copy()
        vowel_list_copy.pop(vowel_list_copy.index(true_letter))
        fake_letters = random.choice(vowel_list_copy)
        fake_word = word[:index] + fake_letters + word[index+1:]
    else:
        index = random.choice(range(len(word)))
        fake_word = word[:index] + word[index] + word[:index]

    return fake_word


class WordCompleteChoice(object):

    title = '请将选择正确的拼写补全单词'

    @staticmethod
    def generate(words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]:
        responses = []

        words = _materials_select(words, question_num)
        for word in words:

            word_vowels = []
            for vowels in vowels_list:
                if vowels in word:
                    word_vowels.append(vowels)

            if word_vowels:
                answer = word_vowels[random.randint(0, len(word_vowels) - 1)]

                stem = ''
                _stem = word.split(answer)
                index = random.randint(0, len(_stem) - 2)
                for i in range(len(_stem) - 1):
                    if i != index:
                        stem = stem + _stem[i] + answer
                    else:
                        stem = stem + _stem[i] + '_' * len(answer)
                stem = stem + _stem[-1]

                options = [i for i in vowels_list if (len(i) == len(answer)) & (i != answer)]
                if len(options) > option_num - 1:
                    random.shuffle(options)
                    options = options[:option_num - 1]
                options.append(answer)

            else:
                index = list(range(len(word)))
                num = random.randint(2, 3)
                num = len(word) - 3 if num > len(word) - 3 else num

                random.shuffle(index)
                ans_index = index[:num]
                ans_index.sort()
                answer = [word[i] + ',' for i in ans_index]
                answer = ''.join(answer)
                answer = answer[:-1]

                stem = ['_' if i in ans_index else word[i] for i in range(len(word))]
                stem = ''.join(stem)

                options = [answer]
                count = 1
                while count < option_num:
                    random.shuffle(index)
                    option_index = index[:num]
                    option = [word[i] + ',' for i in option_index]
                    option = ''.join(option)
                    option = option[:-1]
                    if option not in options:
                        options.append(option)
                        count += 1

            answer, options = normalize_options_and_answer(answer, options)

            question = stem + '\n' + options

            responses.append({'question': question, 'answer': answer})

        return responses


class WordCompleteBlank(object):

    title = '根据汉语意思补全单词'

    @classmethod
    def generate(cls, words: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]:

        responses = []
        _words = _materials_select(words, question_num, option_num)

        for words in _words:

            answer_list = []
            stem_list = []

            for word in words:
                word_vowels = []
                for vowel in vowels_list:
                    if vowel in word:
                        word_vowels.append(vowel)


                chinese = trans_en_to_cn(word)

                if word_vowels:
                    letters = word_vowels[random.randint(0, len(word_vowels) - 1)]

                    stem = chinese + ':'
                    _word = word.split(letters)
                    index = random.randint(0, len(_word) - 2)
                    for i in range(len(_word) - 1):
                        if i != index:
                            stem = stem + _word[i] + letters
                        else:
                            stem = stem + _word[i] + '_' * len(letters)
                    stem = stem + _word[-1]

                else:

                    num = random.randint(2, 3)
                    num = len(word) - 2 if num > len(word) - 2 else num
                    index = random.randint(0, (len(word)-num+1))
                    stem = chinese + ':' + word[:index] + '_' * num + word[index+num:]

                stem_list.append(stem)

                answer = chinese + ':' + word
                answer_list.append(answer)

            responses.append({'question': '     '.join(stem_list), 'answer': '      '.join(answer_list)})

        return responses


class WordRightSpellingChoice(object):

    title = '根据汉语意思选出拼写正确的单词'

    @classmethod
    def generate(cls, words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]:

        responses = []
        words = _materials_select(words, question_num)

        for word in words:
            stem = trans_en_to_cn(word)
            answer = word
            options = [_generate_fake_word(word) for i in range(option_num-1)]
            options.append(answer)
            answer, options = normalize_options_and_answer(answer, options)

            question = stem + '\n' + options

            responses.append({'question': question, 'answer': answer})

        return responses


class SentenceRecompositionBlank(object):

    title = '请将下列单词组成完整的一句话'

    generate_similar_sentence_prompt = 'Please generate a sentence with the same sentence structure as ' \
                                       '<{sentence}> using simple vocabulary'

    @classmethod
    def generate(cls, sentences: List[str], question_num: int = 1, option_num=None, model=None,
                 generate_mode: bool = False) -> List[Dict]:

        responses = []

        sentences = _materials_select(sentences, question_num)
        for sentence in sentences:
            if generate_mode:
                try:
                    prompt = 'f"' + cls.generate_similar_sentence_prompt + '"'
                    prompt = eval(prompt)
                    sent = _generate(prompt, model)
                    sent = re.split(r'[.?!]', sent)[0]
                except:
                    sent = sentence
            else:
                sent = sentence
            answer = sent

            words = sent.split(' ')
            words_num = len(words)
            stem = ['______'] * words_num
            # stem = stem[:-1]
            # symbol = words[-1][-1]
            for i in range(words_num):
                if words[i][-1] in ['.', '?', '!', '。', '？', '！']:
                    stem[i] = stem[i] + words[i][-1]
                    words[i] = words[i][:-1]

            words_copy = words.copy()
            while words_copy == words:
                random.shuffle(words)
            stem = ' '.join(stem)
            words = ', '.join(words)

            question = stem + '\n' + words
            responses.append({'question': question, 'answer': answer})

        return responses


class PhraseTranslatingMatchChoice(object):

    title = '请根据汉语意思选择正确的短语'

    generate_similar_phrase_prompt = 'Please generate {num} phrases with the same phrase structure as <{phrase}> by ' \
                                     'replacing a single word in this phrase. Please return the answer in a list. ' \
                                     'Try your best to use simple vocabulary when generating phrases.'

    translate_chinese_phrase_prompt = 'Please translate the following phrase to chinese: <{phrase}>'

    @classmethod
    def generate(cls, phrases: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]:

        responses = []
        phrases = _materials_select(phrases, question_num)
        num = option_num - 1
        for phrase in phrases:
            try:
                phrase = phrase[0].upper() + phrase[1:]
                option_prompt = 'f"' + cls.generate_similar_phrase_prompt + '"'               
                option_prompt = eval(option_prompt)
                option_response = _generate(option_prompt, model)
                options = response_to_options(option_response)
                options.append(phrase)
                answer, options = normalize_options_and_answer(phrase, options)
                stem_prompt = 'f"' + cls.translate_chinese_phrase_prompt + '"'
                stem_prompt = eval(stem_prompt)
                stem_response = _generate(stem_prompt, model)
                question = stem_response + '\n' + options
                responses.append({'question': question, 'answer': answer})
            except Exception as e:
                print(e)
        return responses


class SentenceCompleteChoice(object):

    title = '请选择正确的短语补全句子'

    generate_sentence_complete_question_prompt = 'Assuming you are an English teacher and a question consists of three ' \
                                                 'parts: the stem, options, and answer. please provide a multiple-choice ' \
                                                 'question based on the sentence <{sentence}>.The specific steps are: ' \
                                                 'Randomly blank out a part of the sentence and replace it with ____ to ' \
                                                 'create the stem.Generate {num} new phrases/words that are ' \
                                                 'structurally consistent with the blanked-out part and include the new ' \
                                                 'phrases/words and blanked-out part as the options.Use the blanked-out ' \
                                                 'part as the answer. For example:' \
                                                 'Stem: And now it is time to ____, we will leave our lovely school ' \
                                                 'Options: A: celebrate  B: say goodbye C: take a break D: graduate' \
                                                 'Answer: D: graduate ' \
 \


    @classmethod
    def generate(cls, sentences: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]:
        responses = []
        sentences = _materials_select(sentences, question_num)
        num = option_num - 1
        for sentence in sentences:
            try:
                prompt = 'f"' + cls.generate_sentence_complete_question_prompt + '"'
                prompt = eval(prompt)
                prompt = prompt.replace('.', '\n')

                response = _generate(prompt, model)
                question, answer = response_to_question(response)
                responses.append({'question': question, 'answer': answer})
            except Exception as e:
                print(e)
        return responses


class GrammarChoice(object):

    title = '请根据语法知识选择正确的选项补全句子'

    generate_grammar_question_prompt = 'Assuming you are an English teacher  and a question consists of three parts: the ' \
                                       'stem, options, and answer. Please provide a sentence completion type ' \
                                       'multiple-choice question to test students understanding of the grammar rule' \
                                       ' <{grammar}>.The specific steps are: Generate a sentence using this ' \
                                       'grammar rule with simple vocabulary. Blank out the part related to the grammar ' \
                                       'of this sentence. Use the sentence that has been blanked out as the stem.' \
                                       'Generate {num} phrases/words with the same meaning but different usage using ' \
                                       'the blanked-out part and include the new phrases/words and blanked-out part as ' \
                                       'the options.Use the blanked-out part as the correct answer.For example:' \
                                       'Stem: And now it is time to ____, we will leave our lovely school ' \
                                       'Options: A: celebrate  B: celebrating C: celebrated D: celebrates ' \
                                       'Answer: D: graduate'

    @classmethod
    def generate(cls, grammars: List[str], question_num: int = 1, option_num: int = 4, model=None) -> List[Dict]:
        responses = []

        num = option_num - 1
        grammars = _materials_select(grammars, question_num)
        for grammar in grammars:
            try:
                prompt = 'f"' + cls.generate_grammar_question_prompt + '"'
                prompt = eval(prompt)
                prompt = prompt.replace('.', '\n')
                response = _generate(prompt, model)
                question, answer = response_to_question(response)
                responses.append({'question': question,  'answer': answer})
            except Exception as e:
                print(e)
        return responses


def txt_to_list(txt):
    while txt[0] == '\n':
        txt = txt[1:]
    while txt[-1] == '\n':
        txt = txt[:-1]

    txt = txt.split('\n')
    txt = [i for i in txt if i is not None]
    return txt


# def generate_scope(progress: str, q_type):
#     """
#     :param progress:
#     :param q_type:
#     :return:
#     """
#
#     if progress == '期中':
#         progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7']
#     elif progress == '期末':
#         progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7', 'unit8', 'unit9',
#                     'unit10', 'unit11', 'unit12', 'unit13', 'unit14']
#     else:
#         progress = [progress]
#
#     scope = {'word': [], 'phrase': [], 'sentence': [], 'grammar': []}
#
#     root_path = Path(__file__).parent
#     for i in progress:
#         path = root_path.joinpath('material', i + '.txt')
#         with open(path, 'r', encoding='utf-8') as file:
#             content = file.read()
#             # scope = re.split(r'<word>|<phrase>|<sentence>|<grammar>', content)
#             _, word, phrase, sentence, grammar = re.split(r'<word>|<phrase>|<sentence>|<grammar>', content)
#             scope['word'].extend(txt_to_list(word))
#             scope['phrase'].extend(txt_to_list(phrase))
#             scope['sentence'].extend(txt_to_list(sentence))
#             scope['grammar'].extend(txt_to_list(grammar))
#     m_type = question_type_to_material_type[q_type]
#     return scope[m_type]


# def generate(progress, q_type, q_num, **kwargs):
#     """
#     :param progress:
#     :param q_type:
#     :param q_num:
#     :param kwargs:
#           model is a must when use the llm, for example:
#           model = 'openai_3'
#     :return:
#     """
#     materials = generate_scope(progress, q_type)
#     return eval(q_type).generate(materials, q_num, **kwargs)

def generate(materials, q_type, q_num, **kwargs):
    """
    :param materials:
    :param q_type:
    :param q_num:
    :param kwargs:
          model is a must when use the llm, for example:
          model = 'chatgpt_3.5'
    :return:
    """
    # materials = generate_scope(progress, q_type)
    return eval(question_type_to_class_type[q_type]).generate(materials, q_num, **kwargs)



# ans1 = generate_word_complete_question(['word', 'manager', 'answer', 'fight', 'jump', 'hihhttgrh'])

# ans2 = generate_phrase_select_question(['the Dragon Boat Festival'], 3)

# ans3 = generate_sentence_recomposition_question(['You really require a lot of talent and hard work to succeed.',
# 'I have a job interview tomorrow.], False)

# ans4 = generate_sentence_complete_question(['You really require a lot of talent and hard work to succeed.',
#                                             'I have a job interview tomorrow.'])

# ans5 = generate_grammar_question(['be supposed to', 'It is + adj. + 动词不定式'])
# print(ans5)