Spaces:
Runtime error
Runtime error
| from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig | |
| import torch | |
| import streamlit as st | |
| from openxlab.model import download | |
| from modelscope import snapshot_download | |
| import os | |
| # level = os.getenv('level') | |
| level = '7' | |
| with st.sidebar: | |
| st.markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)') | |
| max_length = st.slider("max_length", 0, 1024, 512, step=1) | |
| # system_prompt = st.text_input("System_Prompt", "") | |
| st.title("InternLM2-math-7B") | |
| st.caption("🚀 Powered By Shanghai Ai Lab") | |
| # 定义模型路径 | |
| ## ModelScope | |
| # model_id = 'Shanghai_AI_Laboratory/internlm2-chat-'+ str(level) +'b' | |
| # mode_name_or_path = snapshot_download(model_id, revision='master') | |
| mode_name_or_path = "internlm/internlm2-math-7b" | |
| # OpenXLab | |
| # model_repo = "OpenLMLab/internlm2-chat-7b" | |
| # mode_name_or_path = download(model_repo=model_repo) | |
| def get_model(): | |
| tokenizer = AutoTokenizer.from_pretrained(mode_name_or_path, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained(mode_name_or_path, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda() | |
| model.eval() | |
| return tokenizer, model | |
| tokenizer, model = get_model() | |
| if "messages" not in st.session_state: | |
| st.session_state["messages"] = [] | |
| for msg in st.session_state.messages: | |
| st.chat_message("user").write(msg[0]) | |
| st.chat_message("assistant").write(msg[1]) | |
| if prompt := st.chat_input(): | |
| st.chat_message("user").write(prompt) | |
| response, history = model.chat(tokenizer, prompt, meta_instruction='', history=st.session_state.messages) | |
| st.session_state.messages.append((prompt, response)) | |
| st.chat_message("assistant").write(response) | |
| # import os | |
| # os.system("pip uninstall -y gradio") | |
| # os.system("pip install gradio==3.43.0") | |
| # from lmdeploy.serve.gradio.turbomind_coupled import * | |
| # from lmdeploy.messages import TurbomindEngineConfig | |
| # from lmdeploy import ChatTemplateConfig | |
| # chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='') | |
| # backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', max_batch_size=1, cache_max_entry_count=0.05)#, model_format='awq') | |
| # model_path = 'internlm/internlm2-math-7b' | |
| # InterFace.async_engine = AsyncEngine( | |
| # model_path=model_path, | |
| # backend='turbomind', | |
| # backend_config=backend_config, | |
| # chat_template_config=chat_template, | |
| # tp=1) | |
| # async def reset_local_func(instruction_txtbox: gr.Textbox, | |
| # state_chatbot: Sequence, session_id: int): | |
| # """reset the session. | |
| # Args: | |
| # instruction_txtbox (str): user's prompt | |
| # state_chatbot (Sequence): the chatting history | |
| # session_id (int): the session id | |
| # """ | |
| # state_chatbot = [] | |
| # # end the session | |
| # with InterFace.lock: | |
| # InterFace.global_session_id += 1 | |
| # session_id = InterFace.global_session_id | |
| # return (state_chatbot, state_chatbot, gr.Textbox.update(value=''), session_id) | |
| # async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button, | |
| # reset_btn: gr.Button, session_id: int): | |
| # """stop the session. | |
| # Args: | |
| # instruction_txtbox (str): user's prompt | |
| # state_chatbot (Sequence): the chatting history | |
| # cancel_btn (gr.Button): the cancel button | |
| # reset_btn (gr.Button): the reset button | |
| # session_id (int): the session id | |
| # """ | |
| # yield (state_chatbot, disable_btn, disable_btn, session_id) | |
| # InterFace.async_engine.stop_session(session_id) | |
| # # pytorch backend does not support resume chat history now | |
| # if InterFace.async_engine.backend == 'pytorch': | |
| # yield (state_chatbot, disable_btn, enable_btn, session_id) | |
| # else: | |
| # with InterFace.lock: | |
| # InterFace.global_session_id += 1 | |
| # session_id = InterFace.global_session_id | |
| # messages = [] | |
| # for qa in state_chatbot: | |
| # messages.append(dict(role='user', content=qa[0])) | |
| # if qa[1] is not None: | |
| # messages.append(dict(role='assistant', content=qa[1])) | |
| # gen_config = GenerationConfig(max_new_tokens=0) | |
| # async for out in InterFace.async_engine.generate(messages, | |
| # session_id, | |
| # gen_config=gen_config, | |
| # stream_response=True, | |
| # sequence_start=True, | |
| # sequence_end=False): | |
| # pass | |
| # yield (state_chatbot, disable_btn, enable_btn, session_id) | |
| # with gr.Blocks(css=CSS, theme=THEME) as demo: | |
| # state_chatbot = gr.State([]) | |
| # state_session_id = gr.State(0) | |
| # with gr.Column(elem_id='container'): | |
| # gr.Markdown('## LMDeploy Playground') | |
| # gr.Markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)') | |
| # chatbot = gr.Chatbot( | |
| # elem_id='chatbot', | |
| # label=InterFace.async_engine.engine.model_name) | |
| # instruction_txtbox = gr.Textbox( | |
| # placeholder='Please input the instruction', | |
| # label='Instruction') | |
| # with gr.Row(): | |
| # cancel_btn = gr.Button(value='Cancel', interactive=False) | |
| # reset_btn = gr.Button(value='Reset') | |
| # with gr.Row(): | |
| # request_output_len = gr.Slider(1, | |
| # 1024, | |
| # value=512, | |
| # step=1, | |
| # label='Maximum new tokens') | |
| # top_p = gr.Slider(0.01, 1, value=1.0, step=0.01, label='Top_p') | |
| # temperature = gr.Slider(0.01, | |
| # 1.5, | |
| # value=0.01, | |
| # step=0.01, | |
| # label='Temperature') | |
| # send_event = instruction_txtbox.submit(chat_stream_local, [ | |
| # instruction_txtbox, state_chatbot, cancel_btn, reset_btn, | |
| # state_session_id, top_p, temperature, request_output_len | |
| # ], [state_chatbot, chatbot, cancel_btn, reset_btn]) | |
| # instruction_txtbox.submit( | |
| # lambda: gr.Textbox.update(value=''), | |
| # [], | |
| # [instruction_txtbox], | |
| # ) | |
| # cancel_btn.click( | |
| # cancel_local_func, | |
| # [state_chatbot, cancel_btn, reset_btn, state_session_id], | |
| # [state_chatbot, cancel_btn, reset_btn, state_session_id], | |
| # cancels=[send_event]) | |
| # reset_btn.click(reset_local_func, | |
| # [instruction_txtbox, state_chatbot, state_session_id], | |
| # [state_chatbot, chatbot, instruction_txtbox, state_session_id], | |
| # cancels=[send_event]) | |
| # def init(): | |
| # with InterFace.lock: | |
| # InterFace.global_session_id += 1 | |
| # new_session_id = InterFace.global_session_id | |
| # return new_session_id | |
| # demo.load(init, inputs=None, outputs=[state_session_id]) | |
| # # demo.queue(concurrency_count=InterFace.async_engine.instance_num, | |
| # # max_size=100).launch() | |
| # demo.queue(max_size=1000).launch(max_threads=InterFace.async_engine.instance_num) |