Upload 13 files
Browse files- README.md +27 -0
- app.py +61 -0
- document.txt +95 -0
- download_model.py +7 -0
- finetune/app.py +29 -0
- finetune/dataset.json +7 -0
- finetune/finetune.py +86 -0
- finetune/loss_plot.png +0 -0
- finetune/templates/index.html +156 -0
- inference.py +24 -0
- knowledge_base.txt +3 -0
- requirements.txt +9 -0
- templates/index.html +156 -0
README.md
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Tiny Llama Project Guide
|
2 |
+
This repository provides a comprehensive guide for students and researchers to experiment with the TinyLlama-1.1B-Chat-v1.0 model, an open-source language model developed by the TinyLlama organization. The goal is to enable accessible AI experimentation without any fees or personal information requirements.
|
3 |
+
Model Details
|
4 |
+
|
5 |
+
Model: TinyLlama-1.1B-Chat-v1.0
|
6 |
+
Source: Hugging Face - TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
7 |
+
Organization: TinyLlama
|
8 |
+
Description: A lightweight, efficient 1.1B parameter model optimized for chat and text generation tasks, suitable for low-resource environments like laptops with 16GB RAM.
|
9 |
+
License: Refer to the model's official Hugging Face page for licensing details (typically Apache 2.0).
|
10 |
+
|
11 |
+
Resources
|
12 |
+
|
13 |
+
Code: Includes scripts for downloading the model, fine-tuning, and running a Flask-based chat UI.
|
14 |
+
Dataset: A small JSON dataset for fine-tuning tests.
|
15 |
+
Loss Plot: Training loss plot from fine-tuning (loss_plot.png).
|
16 |
+
|
17 |
+
|
18 |
+
Usage
|
19 |
+
This repository provides:
|
20 |
+
|
21 |
+
A Flask app for local inference with a user-friendly chat interface.
|
22 |
+
Fine-tuning scripts using LoRA for efficient training.
|
23 |
+
Detailed setup instructions in document.txt.
|
24 |
+
|
25 |
+
Note: Model weights are not included in this repository. Users must download them from the official Hugging Face repository using their access token.
|
26 |
+
Attribution
|
27 |
+
This project uses the TinyLlama-1.1B-Chat-v1.0 model by the TinyLlama organization. All credits for the model go to the original authors. For more details, visit the TinyLlama Hugging Face page.
|
app.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, render_template
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
import torch
|
4 |
+
import re
|
5 |
+
|
6 |
+
app = Flask(__name__)
|
7 |
+
|
8 |
+
# Load model and tokenizer
|
9 |
+
model_path = "./tinyllama_model"
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(model_path)
|
12 |
+
device = torch.device("cpu")
|
13 |
+
model.to(device)
|
14 |
+
|
15 |
+
# Load simple knowledge base
|
16 |
+
knowledge_base = {}
|
17 |
+
try:
|
18 |
+
with open("knowledge_base.txt", "r") as f:
|
19 |
+
for line in f:
|
20 |
+
if ":" in line:
|
21 |
+
key, value = line.strip().split(":", 1)
|
22 |
+
knowledge_base[key.lower()] = value.strip()
|
23 |
+
except FileNotFoundError:
|
24 |
+
knowledge_base = {
|
25 |
+
"ceo of meta": "Mark Zuckerberg",
|
26 |
+
"founder of meta": "Mark Zuckerberg",
|
27 |
+
"meta founding year": "2004"
|
28 |
+
}
|
29 |
+
|
30 |
+
@app.route('/')
|
31 |
+
def home():
|
32 |
+
return render_template('index.html')
|
33 |
+
|
34 |
+
@app.route('/generate', methods=['POST'])
|
35 |
+
def generate():
|
36 |
+
user_input = request.form['prompt'].strip().lower()
|
37 |
+
|
38 |
+
# Check knowledge base for factual answers
|
39 |
+
response = None
|
40 |
+
for key in knowledge_base:
|
41 |
+
if key in user_input:
|
42 |
+
response = knowledge_base[key]
|
43 |
+
break
|
44 |
+
|
45 |
+
if not response:
|
46 |
+
# Create chat prompt template
|
47 |
+
prompt = f"<|SYSTEM|> You are a helpful assistant providing accurate and concise answers. Avoid fabricating details or repeating user input unnecessarily. If unsure, say so. <|USER|> {user_input} <|ASSISTANT|> "
|
48 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
49 |
+
outputs = model.generate(**inputs, max_length=100, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
|
50 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
51 |
+
# Clean response
|
52 |
+
response = generated_text.split("<|ASSISTANT|> ")[-1] if "<|ASSISTANT|> " in generated_text else generated_text
|
53 |
+
response = re.sub(r"\s+", " ", response).strip()
|
54 |
+
# Avoid repetitive or irrelevant output
|
55 |
+
if user_input in response.lower() and len(response.split()) < 10:
|
56 |
+
response = "I'm not sure how to respond to that. Could you clarify or ask something else?"
|
57 |
+
|
58 |
+
return response
|
59 |
+
|
60 |
+
if __name__ == '__main__':
|
61 |
+
app.run(debug=True)
|
document.txt
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Tiny Llama Project Guide: Running TinyLlama-1.1B-Chat-v1.0 Locally
|
2 |
+
|
3 |
+
This document provides a step-by-step guide to run the TinyLlama-1.1B-Chat-v1.0 model locally on a laptop with 16GB RAM, i5 processor, and Windows OS. The guide includes setting up the environment, downloading the model, fine-tuning, and running a Flask-based chat UI.
|
4 |
+
|
5 |
+
---
|
6 |
+
|
7 |
+
System Requirements
|
8 |
+
Operating System: Windows
|
9 |
+
RAM: 16GB
|
10 |
+
Processor: Intel i5 or equivalent
|
11 |
+
Python Version: 3.10.9
|
12 |
+
- IDE: Visual Studio Code (VS Code)
|
13 |
+
- Internet: Required for downloading model and libraries
|
14 |
+
|
15 |
+
---
|
16 |
+
|
17 |
+
Step-by-Step Setup
|
18 |
+
|
19 |
+
1. Install Python 3.10.9
|
20 |
+
- Download and install Python 3.10.9 from https://www.python.org/downloads/release/python-3109/.
|
21 |
+
- Ensure Python and pip are added to your system PATH.
|
22 |
+
|
23 |
+
2. Set Up a Virtual Environment
|
24 |
+
- Open VS Code terminal in your project directory (e.g., C:\path\to\TinyLlama-1.1B).
|
25 |
+
- Run:
|
26 |
+
```
|
27 |
+
python -m venv venv
|
28 |
+
.\venv\Scripts\activate
|
29 |
+
```
|
30 |
+
|
31 |
+
3. Install Required Libraries
|
32 |
+
- In the activated virtual environment, run:
|
33 |
+
```
|
34 |
+
pip install transformers torch huggingface_hub datasets peft trl accelerate flask matplotlib
|
35 |
+
```
|
36 |
+
- This installs libraries for model handling, fine-tuning, Flask app, and plotting.
|
37 |
+
|
38 |
+
4. Download the TinyLlama Model
|
39 |
+
- Create a file `download_model.py` with the following code:
|
40 |
+
```python
|
41 |
+
from huggingface_hub import login, snapshot_download
|
42 |
+
login(token="YOUR_ACCESS_TOKEN_HERE")
|
43 |
+
snapshot_download(repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0", local_dir="./tinyllama_model")
|
44 |
+
```
|
45 |
+
- Replace `YOUR_ACCESS_TOKEN_HERE` with your Hugging Face access token (get it from https://huggingface.co/settings/tokens).
|
46 |
+
- Run: `python download_model.py`
|
47 |
+
- Model weights will be saved in the `tinyllama_model` folder.
|
48 |
+
|
49 |
+
5. Run Inference with Flask UI
|
50 |
+
- Create a `finetune` folder in your project directory.
|
51 |
+
- Copy `app.py` and `templates/index.html` from the repository to the `finetune` folder.
|
52 |
+
- Run: `python app.py`
|
53 |
+
- Open http://127.0.0.1:5000 in your browser to access the chat UI.
|
54 |
+
- Enter prompts to interact with the model.
|
55 |
+
|
56 |
+
6. Fine-Tune the Model (Optional)
|
57 |
+
- In the `finetune` folder, ensure `dataset.json` and `finetune.py` are present.
|
58 |
+
- Run: `python finetune.py`
|
59 |
+
- Fine-tuned weights will be saved in `finetune/finetuned_weights`.
|
60 |
+
- Update `app.py` to point to `./finetuned_weights` for inference with the fine-tuned model.
|
61 |
+
- Check `loss_plot.png` for training loss visualization.
|
62 |
+
|
63 |
+
7. View Training Metrics
|
64 |
+
- After fine-tuning, check the console for final train loss and learning rate.
|
65 |
+
- Open `loss_plot.png` in the `finetune` folder for a graphical view of training loss.
|
66 |
+
|
67 |
+
---
|
68 |
+
|
69 |
+
Project Structure
|
70 |
+
- `tinyllama_model/`: Model weights downloaded from Hugging Face.
|
71 |
+
- `finetune/`: Contains fine-tuning scripts and fine-tuned weights.
|
72 |
+
- `dataset.json`: Small dataset for fine-tuning.
|
73 |
+
- `finetune.py`: Fine-tuning script with LoRA.
|
74 |
+
- `app.py`: Flask app for inference.
|
75 |
+
- `templates/index.html`: Chat UI.
|
76 |
+
- `loss_plot.png`: Training loss plot.
|
77 |
+
- `requirements.txt`: List of required libraries.
|
78 |
+
- `document.txt`: This guide.
|
79 |
+
- `README.md`: Project overview.
|
80 |
+
|
81 |
+
---
|
82 |
+
|
83 |
+
Attribution
|
84 |
+
- **Model**: TinyLlama-1.1B-Chat-v1.0
|
85 |
+
- **Source**: https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
86 |
+
- **Organization**: TinyLlama
|
87 |
+
- **License**: Check the model's Hugging Face page for licensing details.
|
88 |
+
|
89 |
+
---
|
90 |
+
|
91 |
+
Notes
|
92 |
+
- Model weights are not included in this repository to respect licensing terms.
|
93 |
+
- Download the model directly from Hugging Face using your access token.
|
94 |
+
- Ensure sufficient disk space (~2-3GB) for model weights and fine-tuned weights.
|
95 |
+
- For support, refer to the TinyLlama Hugging Face page or community forums.
|
download_model.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import login, snapshot_download
|
2 |
+
|
3 |
+
# Log in to Hugging Face
|
4 |
+
login(token="hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
|
5 |
+
|
6 |
+
# Download TinyLlama-1.1B model to local folder
|
7 |
+
snapshot_download(repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0", local_dir="./tinyllama_model")
|
finetune/app.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, render_template
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
import torch
|
4 |
+
|
5 |
+
app = Flask(__name__)
|
6 |
+
|
7 |
+
# Load fine-tuned model and tokenizer
|
8 |
+
model_path = "./finetuned_weights"
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
10 |
+
model = AutoModelForCausalLM.from_pretrained(model_path)
|
11 |
+
device = torch.device("cpu")
|
12 |
+
model.to(device)
|
13 |
+
|
14 |
+
@app.route('/')
|
15 |
+
def home():
|
16 |
+
return render_template('index.html')
|
17 |
+
|
18 |
+
@app.route('/generate', methods=['POST'])
|
19 |
+
def generate():
|
20 |
+
user_input = request.form['prompt'].strip()
|
21 |
+
prompt = f"<|USER|> {user_input} <|ASSISTANT|> "
|
22 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
23 |
+
outputs = model.generate(**inputs, max_length=100, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
|
24 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
25 |
+
response = generated_text.split("<|ASSISTANT|> ")[-1] if "<|ASSISTANT|> " in generated_text else generated_text
|
26 |
+
return response
|
27 |
+
|
28 |
+
if __name__ == '__main__':
|
29 |
+
app.run(debug=True)
|
finetune/dataset.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{"prompt": "What is the capital of France?", "response": "The capital of France is Paris."},
|
3 |
+
{"prompt": "Who is the CEO of Tesla?", "response": "The CEO of Tesla is Elon Musk."},
|
4 |
+
{"prompt": "What is 2 + 2?", "response": "2 + 2 equals 4."},
|
5 |
+
{"prompt": "What is Python?", "response": "Python is a high-level programming language."},
|
6 |
+
{"prompt": "Who founded Microsoft?", "response": "Microsoft was founded by Bill Gates and Paul Allen."}
|
7 |
+
]
|
finetune/finetune.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
|
2 |
+
from datasets import Dataset
|
3 |
+
from peft import LoraConfig, get_peft_model
|
4 |
+
import torch
|
5 |
+
import json
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
|
8 |
+
# Load model and tokenizer
|
9 |
+
model_path = "../tinyllama_model"
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(model_path)
|
12 |
+
|
13 |
+
# Set pad token
|
14 |
+
tokenizer.pad_token = tokenizer.eos_token
|
15 |
+
model.config.pad_token_id = tokenizer.eos_token_id
|
16 |
+
|
17 |
+
# Load dataset from JSON
|
18 |
+
with open("dataset.json", "r") as f:
|
19 |
+
data = json.load(f)
|
20 |
+
dataset = Dataset.from_list(data)
|
21 |
+
|
22 |
+
# Tokenize dataset and include labels
|
23 |
+
def tokenize_function(examples):
|
24 |
+
inputs = [f"<|USER|> {p} <|ASSISTANT|> {r}" for p, r in zip(examples["prompt"], examples["response"])]
|
25 |
+
tokenized = tokenizer(inputs, padding="max_length", truncation=True, max_length=128, return_tensors="pt")
|
26 |
+
tokenized["labels"] = tokenized["input_ids"].clone()
|
27 |
+
return tokenized
|
28 |
+
|
29 |
+
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["prompt", "response"])
|
30 |
+
|
31 |
+
# Configure LoRA for efficient fine-tuning
|
32 |
+
lora_config = LoraConfig(
|
33 |
+
r=8,
|
34 |
+
lora_alpha=32,
|
35 |
+
target_modules=["q_proj", "v_proj"],
|
36 |
+
lora_dropout=0.1,
|
37 |
+
bias="none",
|
38 |
+
task_type="CAUSAL_LM"
|
39 |
+
)
|
40 |
+
model = get_peft_model(model, lora_config)
|
41 |
+
|
42 |
+
# Training arguments
|
43 |
+
training_args = TrainingArguments(
|
44 |
+
output_dir="./finetuned_weights",
|
45 |
+
num_train_epochs=3,
|
46 |
+
per_device_train_batch_size=1,
|
47 |
+
save_strategy="epoch",
|
48 |
+
logging_steps=1,
|
49 |
+
learning_rate=2e-4,
|
50 |
+
fp16=False,
|
51 |
+
report_to="none"
|
52 |
+
)
|
53 |
+
|
54 |
+
# Trainer (no validation dataset due to small size)
|
55 |
+
trainer = Trainer(
|
56 |
+
model=model,
|
57 |
+
args=training_args,
|
58 |
+
train_dataset=tokenized_dataset,
|
59 |
+
)
|
60 |
+
|
61 |
+
# Fine-tune model
|
62 |
+
train_result = trainer.train()
|
63 |
+
|
64 |
+
# Save fine-tuned weights
|
65 |
+
model.save_pretrained("./finetuned_weights")
|
66 |
+
tokenizer.save_pretrained("./finetuned_weights")
|
67 |
+
|
68 |
+
# Extract metrics
|
69 |
+
train_loss = [log["loss"] for log in trainer.state.log_history if "loss" in log]
|
70 |
+
learning_rate = [log["learning_rate"] for log in trainer.state.log_history if "learning_rate" in log]
|
71 |
+
|
72 |
+
# Print final metrics
|
73 |
+
print(f"Final Train Loss: {train_loss[-1] if train_loss else 'N/A'}")
|
74 |
+
print(f"Final Learning Rate: {learning_rate[-1] if learning_rate else 'N/A'}")
|
75 |
+
|
76 |
+
# Plot train loss
|
77 |
+
plt.figure(figsize=(10, 6))
|
78 |
+
if train_loss:
|
79 |
+
plt.plot(range(len(train_loss)), train_loss, label="Train Loss", color="#2563eb")
|
80 |
+
plt.xlabel("Steps")
|
81 |
+
plt.ylabel("Loss")
|
82 |
+
plt.title("Training Loss")
|
83 |
+
plt.legend()
|
84 |
+
plt.grid()
|
85 |
+
plt.savefig("loss_plot.png")
|
86 |
+
plt.show()
|
finetune/loss_plot.png
ADDED
![]() |
finetune/templates/index.html
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>TinyLlama Chat</title>
|
7 |
+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
|
8 |
+
<style>
|
9 |
+
body {
|
10 |
+
background: linear-gradient(135deg, #1e3a8a, #3b82f6);
|
11 |
+
min-height: 100vh;
|
12 |
+
font-family: 'Arial', sans-serif;
|
13 |
+
}
|
14 |
+
.chat-container {
|
15 |
+
max-width: 700px;
|
16 |
+
margin: 2rem auto;
|
17 |
+
background: white;
|
18 |
+
border-radius: 1rem;
|
19 |
+
box-shadow: 0 10px 20px rgba(0, 0, 0, 0.2);
|
20 |
+
overflow: hidden;
|
21 |
+
}
|
22 |
+
.chat-header {
|
23 |
+
background: #2563eb;
|
24 |
+
color: white;
|
25 |
+
padding: 1rem;
|
26 |
+
text-align: center;
|
27 |
+
font-size: 1.5rem;
|
28 |
+
font-weight: bold;
|
29 |
+
}
|
30 |
+
.chat-body {
|
31 |
+
max-height: 500px;
|
32 |
+
overflow-y: auto;
|
33 |
+
padding: 1rem;
|
34 |
+
}
|
35 |
+
.message {
|
36 |
+
margin: 0.5rem 0;
|
37 |
+
padding: 0.75rem;
|
38 |
+
border-radius: 0.5rem;
|
39 |
+
max-width: 80%;
|
40 |
+
}
|
41 |
+
.user-message {
|
42 |
+
background: #dbeafe;
|
43 |
+
margin-left: auto;
|
44 |
+
text-align: right;
|
45 |
+
}
|
46 |
+
.ai-message {
|
47 |
+
background: #f3f4f6;
|
48 |
+
margin-right: auto;
|
49 |
+
}
|
50 |
+
.input-container {
|
51 |
+
display: flex;
|
52 |
+
padding: 1rem;
|
53 |
+
background: #f9fafb;
|
54 |
+
border-top: 1px solid #e5e7eb;
|
55 |
+
}
|
56 |
+
.input-container input {
|
57 |
+
flex: 1;
|
58 |
+
padding: 0.75rem;
|
59 |
+
border: 1px solid #d1d5db;
|
60 |
+
border-radius: 0.5rem 0 0 0.5rem;
|
61 |
+
outline: none;
|
62 |
+
}
|
63 |
+
.input-container button {
|
64 |
+
padding: 0.75rem 1.5rem;
|
65 |
+
background: #2563eb;
|
66 |
+
color: white;
|
67 |
+
border: none;
|
68 |
+
border-radius: 0 0.5rem 0.5rem 0;
|
69 |
+
cursor: pointer;
|
70 |
+
transition: background 0.3s;
|
71 |
+
}
|
72 |
+
.input-container button:hover {
|
73 |
+
background: #1e40af;
|
74 |
+
}
|
75 |
+
.loading {
|
76 |
+
display: none;
|
77 |
+
margin: 1rem auto;
|
78 |
+
width: 40px;
|
79 |
+
height: 40px;
|
80 |
+
border: 4px solid #f3f3f3;
|
81 |
+
border-top: 4px solid #2563eb;
|
82 |
+
border-radius: 50%;
|
83 |
+
animation: spin 1s linear infinite;
|
84 |
+
}
|
85 |
+
@keyframes spin {
|
86 |
+
0% { transform: rotate(0deg); }
|
87 |
+
100% { transform: rotate(360deg); }
|
88 |
+
}
|
89 |
+
.attribution {
|
90 |
+
text-align: center;
|
91 |
+
margin: 1rem;
|
92 |
+
font-size: 0.9rem;
|
93 |
+
color: #f3f4f6;
|
94 |
+
}
|
95 |
+
</style>
|
96 |
+
</head>
|
97 |
+
<body>
|
98 |
+
<div class="chat-container">
|
99 |
+
<div class="chat-header">TinyLlama Chat</div>
|
100 |
+
<div class="chat-body" id="chat-body">
|
101 |
+
<div class="message ai-message">Hello! How can I assist you today?</div>
|
102 |
+
</div>
|
103 |
+
<div class="input-container">
|
104 |
+
<input type="text" id="prompt" placeholder="Type your message...">
|
105 |
+
<button onclick="sendMessage()">Send</button>
|
106 |
+
</div>
|
107 |
+
<div class="loading" id="loading"></div>
|
108 |
+
</div>
|
109 |
+
<div class="attribution">
|
110 |
+
Powered by </a href="https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0" target="_blank">TinyLlama-1.1B-Chat-v1.0</a> by TinyLlama
|
111 |
+
</div>
|
112 |
+
<script>
|
113 |
+
async function sendMessage() {
|
114 |
+
const promptInput = document.getElementById('prompt');
|
115 |
+
const chatBody = document.getElementById('chat-body');
|
116 |
+
const loading = document.getElementById('loading');
|
117 |
+
const prompt = promptInput.value.trim();
|
118 |
+
|
119 |
+
if (!prompt) return;
|
120 |
+
|
121 |
+
// Add user message
|
122 |
+
const userMessage = document.createElement('div');
|
123 |
+
userMessage.className = 'message user-message';
|
124 |
+
userMessage.textContent = prompt;
|
125 |
+
chatBody.appendChild(userMessage);
|
126 |
+
|
127 |
+
// Show loading animation
|
128 |
+
loading.style.display = 'block';
|
129 |
+
promptInput.value = '';
|
130 |
+
chatBody.scrollTop = chatBody.scrollHeight;
|
131 |
+
|
132 |
+
// Send request to Flask
|
133 |
+
const response = await fetch('/generate', {
|
134 |
+
method: 'POST',
|
135 |
+
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
136 |
+
body: `prompt=${encodeURIComponent(prompt)}`
|
137 |
+
});
|
138 |
+
const aiResponse = await response.text();
|
139 |
+
|
140 |
+
// Hide loading animation
|
141 |
+
loading.style.display = 'none';
|
142 |
+
|
143 |
+
// Add AI response
|
144 |
+
const aiMessage = document.createElement('div');
|
145 |
+
aiMessage.className = 'message ai-message';
|
146 |
+
aiMessage.textContent = aiResponse;
|
147 |
+
chatBody.appendChild(aiMessage);
|
148 |
+
chatBody.scrollTop = chatBody.scrollHeight;
|
149 |
+
}
|
150 |
+
|
151 |
+
document.getElementById('prompt').addEventListener('keypress', (e) => {
|
152 |
+
if (e.key === 'Enter') sendMessage();
|
153 |
+
});
|
154 |
+
</script>
|
155 |
+
</body>
|
156 |
+
</html>
|
inference.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
2 |
+
import torch
|
3 |
+
|
4 |
+
# Load the model and tokenizer
|
5 |
+
model_path = "./tinyllama_model"
|
6 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
7 |
+
model = AutoModelForCausalLM.from_pretrained(model_path)
|
8 |
+
|
9 |
+
# Set device to CPU
|
10 |
+
device = torch.device("cpu")
|
11 |
+
model.to(device)
|
12 |
+
|
13 |
+
# Input prompt
|
14 |
+
prompt = "Hello, how can I assist you today?"
|
15 |
+
|
16 |
+
# Tokenize input
|
17 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
18 |
+
|
19 |
+
# Generate text
|
20 |
+
outputs = model.generate(**inputs, max_length=100, num_return_sequences=1)
|
21 |
+
|
22 |
+
# Decode and print output
|
23 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
24 |
+
print(generated_text)
|
knowledge_base.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
ceo of meta:Mark Zuckerberg
|
2 |
+
founder of meta:Mark Zuckerberg
|
3 |
+
meta founding year:2004
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.44.2
|
2 |
+
torch==2.4.1
|
3 |
+
huggingface_hub==0.25.1
|
4 |
+
datasets==3.0.0
|
5 |
+
peft==0.12.0
|
6 |
+
trl==0.11.1
|
7 |
+
accelerate==0.33.0
|
8 |
+
flask==3.0.3
|
9 |
+
matplotlib==3.9.2
|
templates/index.html
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>TinyLlama Chat</title>
|
7 |
+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
|
8 |
+
<style>
|
9 |
+
body {
|
10 |
+
background: linear-gradient(135deg, #1e3a8a, #3b82f6);
|
11 |
+
min-height: 100vh;
|
12 |
+
font-family: 'Arial', sans-serif;
|
13 |
+
}
|
14 |
+
.chat-container {
|
15 |
+
max-width: 700px;
|
16 |
+
margin: 2rem auto;
|
17 |
+
background: white;
|
18 |
+
border-radius: 1rem;
|
19 |
+
box-shadow: 0 10px 20px rgba(0, 0, 0, 0.2);
|
20 |
+
overflow: hidden;
|
21 |
+
}
|
22 |
+
.chat-header {
|
23 |
+
background: #2563eb;
|
24 |
+
color: white;
|
25 |
+
padding: 1rem;
|
26 |
+
text-align: center;
|
27 |
+
font-size: 1.5rem;
|
28 |
+
font-weight: bold;
|
29 |
+
}
|
30 |
+
.chat-body {
|
31 |
+
max-height: 500px;
|
32 |
+
overflow-y: auto;
|
33 |
+
padding: 1rem;
|
34 |
+
}
|
35 |
+
.message {
|
36 |
+
margin: 0.5rem 0;
|
37 |
+
padding: 0.75rem;
|
38 |
+
border-radius: 0.5rem;
|
39 |
+
max-width: 80%;
|
40 |
+
}
|
41 |
+
.user-message {
|
42 |
+
background: #dbeafe;
|
43 |
+
margin-left: auto;
|
44 |
+
text-align: right;
|
45 |
+
}
|
46 |
+
.ai-message {
|
47 |
+
background: #f3f4f6;
|
48 |
+
margin-right: auto;
|
49 |
+
}
|
50 |
+
.input-container {
|
51 |
+
display: flex;
|
52 |
+
padding: 1rem;
|
53 |
+
background: #f9fafb;
|
54 |
+
border-top: 1px solid #e5e7eb;
|
55 |
+
}
|
56 |
+
.input-container input {
|
57 |
+
flex: 1;
|
58 |
+
padding: 0.75rem;
|
59 |
+
border: 1px solid #d1d5db;
|
60 |
+
border-radius: 0.5rem 0 0 0.5rem;
|
61 |
+
outline: none;
|
62 |
+
}
|
63 |
+
.input-container button {
|
64 |
+
padding: 0.75rem 1.5rem;
|
65 |
+
background: #2563eb;
|
66 |
+
color: white;
|
67 |
+
border: none;
|
68 |
+
border-radius: 0 0.5rem 0.5rem 0;
|
69 |
+
cursor: pointer;
|
70 |
+
transition: background 0.3s;
|
71 |
+
}
|
72 |
+
.input-container button:hover {
|
73 |
+
background: #1e40af;
|
74 |
+
}
|
75 |
+
.loading {
|
76 |
+
display: none;
|
77 |
+
margin: 1rem auto;
|
78 |
+
width: 40px;
|
79 |
+
height: 40px;
|
80 |
+
border: 4px solid #f3f3f3;
|
81 |
+
border-top: 4px solid #2563eb;
|
82 |
+
border-radius: 50%;
|
83 |
+
animation: spin 1s linear infinite;
|
84 |
+
}
|
85 |
+
@keyframes spin {
|
86 |
+
0% { transform: rotate(0deg); }
|
87 |
+
100% { transform: rotate(360deg); }
|
88 |
+
}
|
89 |
+
.attribution {
|
90 |
+
text-align: center;
|
91 |
+
margin: 1rem;
|
92 |
+
font-size: 0.9rem;
|
93 |
+
color: #f3f4f6;
|
94 |
+
}
|
95 |
+
</style>
|
96 |
+
</head>
|
97 |
+
<body>
|
98 |
+
<div class="chat-container">
|
99 |
+
<div class="chat-header">TinyLlama Chat</div>
|
100 |
+
<div class="chat-body" id="chat-body">
|
101 |
+
<div class="message ai-message">Hello! How can I assist you today?</div>
|
102 |
+
</div>
|
103 |
+
<div class="input-container">
|
104 |
+
<input type="text" id="prompt" placeholder="Type your message...">
|
105 |
+
<button onclick="sendMessage()">Send</button>
|
106 |
+
</div>
|
107 |
+
<div class="loading" id="loading"></div>
|
108 |
+
</div>
|
109 |
+
<div class="attribution">
|
110 |
+
Powered by </a href="https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0" target="_blank">TinyLlama-1.1B-Chat-v1.0</a> by TinyLlama
|
111 |
+
</div>
|
112 |
+
<script>
|
113 |
+
async function sendMessage() {
|
114 |
+
const promptInput = document.getElementById('prompt');
|
115 |
+
const chatBody = document.getElementById('chat-body');
|
116 |
+
const loading = document.getElementById('loading');
|
117 |
+
const prompt = promptInput.value.trim();
|
118 |
+
|
119 |
+
if (!prompt) return;
|
120 |
+
|
121 |
+
// Add user message
|
122 |
+
const userMessage = document.createElement('div');
|
123 |
+
userMessage.className = 'message user-message';
|
124 |
+
userMessage.textContent = prompt;
|
125 |
+
chatBody.appendChild(userMessage);
|
126 |
+
|
127 |
+
// Show loading animation
|
128 |
+
loading.style.display = 'block';
|
129 |
+
promptInput.value = '';
|
130 |
+
chatBody.scrollTop = chatBody.scrollHeight;
|
131 |
+
|
132 |
+
// Send request to Flask
|
133 |
+
const response = await fetch('/generate', {
|
134 |
+
method: 'POST',
|
135 |
+
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
136 |
+
body: `prompt=${encodeURIComponent(prompt)}`
|
137 |
+
});
|
138 |
+
const aiResponse = await response.text();
|
139 |
+
|
140 |
+
// Hide loading animation
|
141 |
+
loading.style.display = 'none';
|
142 |
+
|
143 |
+
// Add AI response
|
144 |
+
const aiMessage = document.createElement('div');
|
145 |
+
aiMessage.className = 'message ai-message';
|
146 |
+
aiMessage.textContent = aiResponse;
|
147 |
+
chatBody.appendChild(aiMessage);
|
148 |
+
chatBody.scrollTop = chatBody.scrollHeight;
|
149 |
+
}
|
150 |
+
|
151 |
+
document.getElementById('prompt').addEventListener('keypress', (e) => {
|
152 |
+
if (e.key === 'Enter') sendMessage();
|
153 |
+
});
|
154 |
+
</script>
|
155 |
+
</body>
|
156 |
+
</html>
|