File size: 4,710 Bytes
0b6a844
 
 
 
09c4880
 
 
0b6a844
 
 
 
d57ec2b
 
0b6a844
 
 
 
 
 
a616054
 
 
 
 
0b6a844
 
 
 
 
a616054
0b6a844
09c4880
 
0b6a844
 
 
09c4880
0b6a844
 
 
 
 
 
 
 
 
 
 
09c4880
0b6a844
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09c4880
0b6a844
 
 
 
 
 
 
 
 
 
09c4880
0b6a844
 
 
 
 
09c4880
0b6a844
 
 
 
 
 
09c4880
0b6a844
 
 
 
 
 
 
 
 
 
 
 
 
6e45523
 
 
 
 
 
 
 
 
 
 
0b6a844
 
 
 
 
 
 
 
082868c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import sys
import os
import torch
from safetensors.torch import load_file
from transformers import AutoConfig, GenerationConfig
import json
from flask import Flask, request, jsonify, render_template

app = Flask(__name__)

# Define paths
drive_folder = '/app'  # Path where files will be downloaded in Docker container

tokenizer_config_file = os.path.join(drive_folder, 'tokenizer_config.json')
model_config_file = os.path.join(drive_folder, 'config.json')

# Add the custom tokenizer and model paths to sys.path
sys.path.append(drive_folder)

# Debugging print statements
print(f"Drive folder: {drive_folder}")
print(f"Tokenizer config file: {tokenizer_config_file}")
print(f"Model config file: {model_config_file}")

# Import the custom configuration, tokenizer, and model classes
try:
    from configuration_qwen import QWenConfig
    from tokenization_qwen import QWenTokenizer
    from modeling_qwen import QWenLMHeadModel
    print("Imported custom classes successfully!")
except ImportError as e:
    print(f"Import error: {e}")
    raise

# Ensure the tokenizer configuration file exists
if not os.path.exists(tokenizer_config_file):
    raise FileNotFoundError(f"Tokenizer configuration file not found at {tokenizer_config_file}")

# Load the tokenizer configuration
with open(tokenizer_config_file, 'r') as f:
    tokenizer_config = json.load(f)

# Load the model configuration from the provided config file
with open(model_config_file, 'r') as f:
    model_config = json.load(f)

# Disable FlashAttention for non-supported GPUs
model_config["use_flash_attn"] = False
model_config["use_dynamic_ntk"] = False  # Disable other advanced features if necessary

# Use the provided configuration for model initialization
try:
    tokenizer = QWenTokenizer.from_pretrained(drive_folder)
    model = QWenLMHeadModel.from_pretrained(drive_folder, config=QWenConfig.from_pretrained(drive_folder, **model_config))
    model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    print("Model and tokenizer loaded successfully!")
except Exception as e:
    print("Error loading model or tokenizer:", e)
    raise

def generate_text(model, tokenizer, prompt, max_length=200, temperature=0.7, top_k=50, top_p=0.9):
    try:
        # Tokenize the input
        input_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
        
        # Set up generation configuration
        generation_config = GenerationConfig(
            max_length=max_length + len(input_ids[0]),
            do_sample=True,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
        
        # Generate text using advanced sampling
        outputs = model.generate(
            input_ids,
            generation_config=generation_config
        )
        
        # Decode the generated sequence
        decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Clean up the output
        start_index = decoded_output.find(prompt)
        generated_text = decoded_output[start_index + len(prompt):].strip()
        
        return generated_text
    except Exception as e:
        print("Error during text generation:", e)
        raise

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/generate', methods=['POST'])
def generate():
    user_input = request.form['user_input']
    try:
        if "urname" in user_input and "what" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "your name" in user_input and "what" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "tell " in user_input and "your name" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "what" in user_input and "you go by" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "what" in user_input and "call yourself" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        elif "what" in user_input and "they call you" in user_input:
            response_text = "I am Shanks, a large language model developed by Motaung.inc"
        else:
            response_text = generate_text(model, tokenizer, user_input)
        return jsonify({"response": response_text})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8080)