Spaces:

FlameF0X
/

SnowflakeCore-Demo-Inteface

Sleeping

App Files Files Community

SnowflakeCore-Demo-Inteface / app.py

FlameF0X

Update app.py

5734180 verified 5 months ago

raw

history blame

1.93 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from modeling_snowflake import Snowflake4CausalLM
	import torch

	# Load tokenizer and model
	MODEL_NAME = "FlameF0X/Snowflake-G0-stable"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16 # Use half precision for memory efficiency
	)
	model.eval()
	model.to("cuda" if torch.cuda.is_available() else "cpu")

	# --- Inference Function ---
	def generate_text(prompt, max_length=50):
	"""
	Generate text based on the input prompt using the trained model.
	"""
	# Tokenize the input prompt
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=384)
	input_ids = inputs["input_ids"].to(model.device)
	attention_mask = inputs["attention_mask"].to(model.device)

	# Generate output tokens
	with torch.no_grad():
	outputs = model.generate(
	input_ids=input_ids,
	attention_mask=attention_mask,
	max_length=max_length,
	pad_token_id=tokenizer.eos_token_id # Use EOS token for padding
	)

	# Decode the generated tokens
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return generated_text

	# --- Gradio Interface ---
	with gr.Blocks() as demo:
	gr.Markdown("# Snowflake-G0-stable Language Model")
	gr.Markdown("This is an enhanced transformer language model trained on the DialogMLM-50K dataset. Try it out below!")

	with gr.Row():
	input_prompt = gr.Textbox(label="Input Prompt", placeholder="Enter your text here...")
	output_text = gr.Textbox(label="Generated Text")

	submit_button = gr.Button("Generate")

	def on_submit(prompt):
	return generate_text(prompt)

	submit_button.click(on_submit, inputs=input_prompt, outputs=output_text)

	# Launch the app
	demo.launch()