Spaces:

MandlaZwane
/

Shanks

Build error

App Files Files Community

Shanks / app.py

MandlaZwane

Update app.py

082868c verified about 1 month ago

raw

history blame contribute delete

4.71 kB

	import sys
	import os
	import torch
	from safetensors.torch import load_file
	from transformers import AutoConfig, GenerationConfig
	import json
	from flask import Flask, request, jsonify, render_template

	app = Flask(__name__)

	# Define paths
	drive_folder = '/app' # Path where files will be downloaded in Docker container

	tokenizer_config_file = os.path.join(drive_folder, 'tokenizer_config.json')
	model_config_file = os.path.join(drive_folder, 'config.json')

	# Add the custom tokenizer and model paths to sys.path
	sys.path.append(drive_folder)

	# Debugging print statements
	print(f"Drive folder: {drive_folder}")
	print(f"Tokenizer config file: {tokenizer_config_file}")
	print(f"Model config file: {model_config_file}")

	# Import the custom configuration, tokenizer, and model classes
	try:
	from configuration_qwen import QWenConfig
	from tokenization_qwen import QWenTokenizer
	from modeling_qwen import QWenLMHeadModel
	print("Imported custom classes successfully!")
	except ImportError as e:
	print(f"Import error: {e}")
	raise

	# Ensure the tokenizer configuration file exists
	if not os.path.exists(tokenizer_config_file):
	raise FileNotFoundError(f"Tokenizer configuration file not found at {tokenizer_config_file}")

	# Load the tokenizer configuration
	with open(tokenizer_config_file, 'r') as f:
	tokenizer_config = json.load(f)

	# Load the model configuration from the provided config file
	with open(model_config_file, 'r') as f:
	model_config = json.load(f)

	# Disable FlashAttention for non-supported GPUs
	model_config["use_flash_attn"] = False
	model_config["use_dynamic_ntk"] = False # Disable other advanced features if necessary

	# Use the provided configuration for model initialization
	try:
	tokenizer = QWenTokenizer.from_pretrained(drive_folder)
	model = QWenLMHeadModel.from_pretrained(drive_folder, config=QWenConfig.from_pretrained(drive_folder, **model_config))
	model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
	print("Model and tokenizer loaded successfully!")
	except Exception as e:
	print("Error loading model or tokenizer:", e)
	raise

	def generate_text(model, tokenizer, prompt, max_length=200, temperature=0.7, top_k=50, top_p=0.9):
	try:
	# Tokenize the input
	input_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device)

	# Set up generation configuration
	generation_config = GenerationConfig(
	max_length=max_length + len(input_ids[0]),
	do_sample=True,
	temperature=temperature,
	top_k=top_k,
	top_p=top_p,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	# Generate text using advanced sampling
	outputs = model.generate(
	input_ids,
	generation_config=generation_config
	)

	# Decode the generated sequence
	decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Clean up the output
	start_index = decoded_output.find(prompt)
	generated_text = decoded_output[start_index + len(prompt):].strip()

	return generated_text
	except Exception as e:
	print("Error during text generation:", e)
	raise

	@app.route('/')
	def home():
	return render_template('index.html')

	@app.route('/generate', methods=['POST'])
	def generate():
	user_input = request.form['user_input']
	try:
	if "urname" in user_input and "what" in user_input:
	response_text = "I am Shanks, a large language model developed by Motaung.inc"
	elif "your name" in user_input and "what" in user_input:
	response_text = "I am Shanks, a large language model developed by Motaung.inc"
	elif "tell " in user_input and "your name" in user_input:
	response_text = "I am Shanks, a large language model developed by Motaung.inc"
	elif "what" in user_input and "you go by" in user_input:
	response_text = "I am Shanks, a large language model developed by Motaung.inc"
	elif "what" in user_input and "call yourself" in user_input:
	response_text = "I am Shanks, a large language model developed by Motaung.inc"
	elif "what" in user_input and "they call you" in user_input:
	response_text = "I am Shanks, a large language model developed by Motaung.inc"
	else:
	response_text = generate_text(model, tokenizer, user_input)
	return jsonify({"response": response_text})
	except Exception as e:
	return jsonify({"error": str(e)}), 500

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=8080)