Spaces:
Build error
Build error
File size: 4,710 Bytes
0b6a844 09c4880 0b6a844 d57ec2b 0b6a844 a616054 0b6a844 a616054 0b6a844 09c4880 0b6a844 09c4880 0b6a844 09c4880 0b6a844 09c4880 0b6a844 09c4880 0b6a844 09c4880 0b6a844 09c4880 0b6a844 6e45523 0b6a844 082868c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import sys
import os
import torch
from safetensors.torch import load_file
from transformers import AutoConfig, GenerationConfig
import json
from flask import Flask, request, jsonify, render_template
app = Flask(__name__)
# Define paths
drive_folder = '/app' # Path where files will be downloaded in Docker container
tokenizer_config_file = os.path.join(drive_folder, 'tokenizer_config.json')
model_config_file = os.path.join(drive_folder, 'config.json')
# Add the custom tokenizer and model paths to sys.path
sys.path.append(drive_folder)
# Debugging print statements
print(f"Drive folder: {drive_folder}")
print(f"Tokenizer config file: {tokenizer_config_file}")
print(f"Model config file: {model_config_file}")
# Import the custom configuration, tokenizer, and model classes
try:
from configuration_qwen import QWenConfig
from tokenization_qwen import QWenTokenizer
from modeling_qwen import QWenLMHeadModel
print("Imported custom classes successfully!")
except ImportError as e:
print(f"Import error: {e}")
raise
# Ensure the tokenizer configuration file exists
if not os.path.exists(tokenizer_config_file):
raise FileNotFoundError(f"Tokenizer configuration file not found at {tokenizer_config_file}")
# Load the tokenizer configuration
with open(tokenizer_config_file, 'r') as f:
tokenizer_config = json.load(f)
# Load the model configuration from the provided config file
with open(model_config_file, 'r') as f:
model_config = json.load(f)
# Disable FlashAttention for non-supported GPUs
model_config["use_flash_attn"] = False
model_config["use_dynamic_ntk"] = False # Disable other advanced features if necessary
# Use the provided configuration for model initialization
try:
tokenizer = QWenTokenizer.from_pretrained(drive_folder)
model = QWenLMHeadModel.from_pretrained(drive_folder, config=QWenConfig.from_pretrained(drive_folder, **model_config))
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
print("Model and tokenizer loaded successfully!")
except Exception as e:
print("Error loading model or tokenizer:", e)
raise
def generate_text(model, tokenizer, prompt, max_length=200, temperature=0.7, top_k=50, top_p=0.9):
try:
# Tokenize the input
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
# Set up generation configuration
generation_config = GenerationConfig(
max_length=max_length + len(input_ids[0]),
do_sample=True,
temperature=temperature,
top_k=top_k,
top_p=top_p,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Generate text using advanced sampling
outputs = model.generate(
input_ids,
generation_config=generation_config
)
# Decode the generated sequence
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clean up the output
start_index = decoded_output.find(prompt)
generated_text = decoded_output[start_index + len(prompt):].strip()
return generated_text
except Exception as e:
print("Error during text generation:", e)
raise
@app.route('/')
def home():
return render_template('index.html')
@app.route('/generate', methods=['POST'])
def generate():
user_input = request.form['user_input']
try:
if "urname" in user_input and "what" in user_input:
response_text = "I am Shanks, a large language model developed by Motaung.inc"
elif "your name" in user_input and "what" in user_input:
response_text = "I am Shanks, a large language model developed by Motaung.inc"
elif "tell " in user_input and "your name" in user_input:
response_text = "I am Shanks, a large language model developed by Motaung.inc"
elif "what" in user_input and "you go by" in user_input:
response_text = "I am Shanks, a large language model developed by Motaung.inc"
elif "what" in user_input and "call yourself" in user_input:
response_text = "I am Shanks, a large language model developed by Motaung.inc"
elif "what" in user_input and "they call you" in user_input:
response_text = "I am Shanks, a large language model developed by Motaung.inc"
else:
response_text = generate_text(model, tokenizer, user_input)
return jsonify({"response": response_text})
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)
|