Spaces:

futranbg
/

chat-hf

Sleeping

chat-hf / app.py

app: llm: more config

06342ef over 1 year ago

995 Bytes

	import os
	import gradio as gr
	import time
	from langchain.llms import CTransformers

	#checkpoint = "bigscience/bloomz" # english
	#checkpoint = "cmarkea/bloomz-3b-sft-chat"
	#checkpoint = "bigscience/bloomz-7b1-mt" # non english
	#checkpoint = os.getenv('HF_BLOOM_MODEL')
	llm_config = {
	'max_new_tokens': 256,
	'temperature' = 0.8,
	'top_p' = 0.5,
	'num_beams' = 2,
	'repetition_penalty': 1.1,
	}


	# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
	llm = CTransformers(model="TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF", model_file="wizardlm-1.0-uncensored-llama2-13b.Q4_0.gguf", config=llm_config)

	def response(prompt):
	txt = llm(prompt)
	return txt

	if __name__ == '__main__':

	title = "Chat"

	demo_status = "Demo is running on CPU"

	gr.Interface(response, inputs="text", outputs="text",
	title=title,
	).launch()