Update README.md
Browse files
README.md
CHANGED
@@ -1,13 +1,20 @@
|
|
1 |
---
|
2 |
license: mit
|
3 |
---
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
5 |
```
|
6 |
optimum-cli export neuron --model NousResearch/Meta-Llama-3.1-8B-Instruct --batch_size 1 --sequence_length 4096 --num_cores 2 --auto_cast_type fp16 ./models-hf/NousResearch/Meta-Llama-3.1-8B-Instruct
|
7 |
|
8 |
```
|
9 |
|
10 |
-
|
|
|
|
|
11 |
```
|
12 |
docker run -it --name llama-31 --rm \
|
13 |
-p 8080:80 \
|
@@ -26,7 +33,7 @@ docker run -it --name llama-31 --rm \
|
|
26 |
|
27 |
```
|
28 |
|
29 |
-
test
|
30 |
```
|
31 |
curl 127.0.0.1:8080/generate -X POST -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json'
|
32 |
```
|
|
|
1 |
---
|
2 |
license: mit
|
3 |
---
|
4 |
+
### environment
|
5 |
+
optimum-neuron 0.0.25.dev0
|
6 |
+
neuron 2.20.0
|
7 |
+
transformers-neuronx 0.12.313
|
8 |
+
transformers 4.43.2
|
9 |
+
### export
|
10 |
```
|
11 |
optimum-cli export neuron --model NousResearch/Meta-Llama-3.1-8B-Instruct --batch_size 1 --sequence_length 4096 --num_cores 2 --auto_cast_type fp16 ./models-hf/NousResearch/Meta-Llama-3.1-8B-Instruct
|
12 |
|
13 |
```
|
14 |
|
15 |
+
|
16 |
+
|
17 |
+
### run
|
18 |
```
|
19 |
docker run -it --name llama-31 --rm \
|
20 |
-p 8080:80 \
|
|
|
33 |
|
34 |
```
|
35 |
|
36 |
+
### test
|
37 |
```
|
38 |
curl 127.0.0.1:8080/generate -X POST -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json'
|
39 |
```
|