Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| from dataclasses import dataclass, asdict | |
| from ctransformers import AutoModelForCausalLM, AutoConfig | |
| class GenerationConfig: | |
| temperature: float | |
| top_k: int | |
| top_p: float | |
| repetition_penalty: float | |
| max_new_tokens: int | |
| seed: int | |
| reset: bool | |
| stream: bool | |
| threads: int | |
| stop: list[str] | |
| def format_prompt(user_prompt: str): | |
| return f"""### Instruction: | |
| {user_prompt} | |
| ### Response:""" | |
| def generate( | |
| llm: AutoModelForCausalLM, | |
| generation_config: GenerationConfig, | |
| user_prompt: str, | |
| ): | |
| """run model inference, will return a Generator if streaming is true""" | |
| return llm(format_prompt(user_prompt), **asdict(generation_config)) | |
| config = AutoConfig.from_pretrained( | |
| "teknium/Replit-v2-CodeInstruct-3B", context_length=2048 | |
| ) | |
| llm = AutoModelForCausalLM.from_pretrained( | |
| os.path.abspath("replit-v2-codeinstruct-3b.q4_1.bin"), | |
| model_type="replit", | |
| config=config, | |
| ) | |
| generation_config = GenerationConfig( | |
| temperature=0.2, | |
| top_k=50, | |
| top_p=0.9, | |
| repetition_penalty=1.0, | |
| max_new_tokens=512, # adjust as needed | |
| seed=42, | |
| reset=True, # reset history (cache) | |
| stream=True, # streaming per word/token | |
| threads=int(os.cpu_count() / 6), # adjust for your CPU | |
| stop=["<|endoftext|>"], | |
| ) | |
| user_prefix = "[user]: " | |
| assistant_prefix = f"[assistant]:" | |
| title = "Replit-v2-CodeInstruct-3b-ggml" | |
| description = "This space is an attempt to run the GGML 4 bit quantized version of 'Replit's CodeInstruct 3B' on a CPU" | |
| example_1 = "Write a python script for a function which calculates the factorial of the number inputted by user." | |
| example_2 = "Write a python script which prints 'you are logged in' only if the user inputs a number between 1-10" | |
| examples = [example_1, example_2] | |
| def generate_code(user_input): | |
| response = generate(llm, generation_config, user_input) | |
| code = "" | |
| for word in response: | |
| code = code + word | |
| return code | |
| UI = gr.Interface( | |
| fn=generate_code, | |
| inputs=gr.Textbox(label="user_prompt", placeholder="Ask your queries here...."), | |
| outputs=gr.Textbox(label="Assistant"), | |
| title=title, | |
| description=description, | |
| examples=examples | |
| ) | |
| UI.launch() |