Spaces:

MandlaZwane
/

Shanks

Build error

File size: 4,710 Bytes

import sys
import os
import torch
from safetensors.torch import load_file
from transformers import AutoConfig, GenerationConfig
import json
from flask import Flask, request, jsonify, render_template

app = Flask(__name__)

# Define paths
drive_folder = '/app'  # Path where files will be downloaded in Docker container

tokenizer_config_file = os.path.join(drive_folder, 'tokenizer_config.json')
model_config_file = os.path.join(drive_folder, 'config.json')

# Add the custom tokenizer and model paths to sys.path
sys.path.append(drive_folder)

# Debugging print statements
print(f"Drive folder: {drive_folder}")
print(f"Tokenizer config file: {tokenizer_config_file}")
print(f"Model config file: {model_config_file}")

# Import the custom configuration, tokenizer, and model classes
try:
    from configuration_qwen import QWenConfig
    from tokenization_qwen import QWenTokenizer
    from modeling_qwen import QWenLMHeadModel
    print("Imported custom classes successfully!")
except ImportError as e:
    print(f"Import error: {e}")
    raise

# Ensure the tokenizer configuration file exists
if not os.path.exists(tokenizer_config_file):
    raise FileNotFoundError(f"Tokenizer configuration file not found at {tokenizer_config_file}")

# Load the tokenizer configuration
with open(tokenizer_config_file, 'r') as f:
    tokenizer_config = json.load(f)

# Load the model configuration from the provided config file
with open(model_config_file, 'r') as f:
    model_config = json.load(f)

# Disable FlashAttention for non-supported GPUs
model_config["use_flash_attn"] = False
model_config["use_dynamic_ntk"] = False  # Disable other advanced features if necessary

# Use the provided configuration for model initialization
try:
    tokenizer = QWenTokenizer.from_pretrained(drive_folder)
    model = QWenLMHeadModel.from_pretrained(drive_folder, config=QWenConfig.from_pretrained(drive_folder, **model_config))
    model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    print("Model and tokenizer loaded successfully!")
except Exception as e:
    print("Error loading model or tokenizer:", e)
    raise

def generate_text(model, tokenizer, prompt, max_length=200, temperature=0.7, top_k=50, top_p=0.9):
    try:
        # Tokenize the input
        input_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
        
        # Set up generation configuration
        generation_config = GenerationConfig(
            max_length=max_length + len(input_ids[0]),
            do_sample=True,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
        
        # Generate text using advanced sampling
        outputs = model.generate(
            input_ids,
            generation_config=generation_config
        )
        
        # Decode the generated sequence
        decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Clean up the output
        start_index = decoded_output.find(prompt)
        generated_text = decoded_output[start_index + len(prompt):].strip()
        
        return generated_text
    except Exception as e:
        print("Error during text generation:", e)
        raise

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/generate', methods=['POST'])
def generate():
    user_input = request.form['user_input']
    try:
        if "urname" in user_input and "what" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "your name" in user_input and "what" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "tell " in user_input and "your name" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "what" in user_input and "you go by" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "what" in user_input and "call yourself" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "what" in user_input and "they call you" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        else:
            response_text = generate_text(model, tokenizer, user_input)
        return jsonify({"response": response_text})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8080)