Update README.md
Browse files
README.md
CHANGED
@@ -52,7 +52,7 @@ The 12 evaluation tasks are summarized below (as per [InfiniteBench]((https://gi
|
|
52 |
| Retrieve.KV | Synthetic | 500 | 89.9k | 22.7 | Finding the corresponding value from a dictionary and a key. |
|
53 |
|
54 |
|
55 |
-
##
|
56 |
On an AWS `g5.48xlarge` instance, upgrade vLLM to the latest version as per [documentation on vLLM](https://vllm.readthedocs.io/en/latest/).
|
57 |
|
58 |
### Start the server
|
@@ -93,54 +93,73 @@ print("Chat completion results:")
|
|
93 |
print(chat_completion)
|
94 |
```
|
95 |
|
96 |
-
### Deploy the
|
97 |
-
To deploy MegaBeam-Mistral-7B-300k on a SageMaker endpoint, please follow
|
98 |
|
99 |
-
|
100 |
-
#Requires: [sagemaker](https://pypi.org/project/sagemaker/) 2.192.1 or later.
|
101 |
-
pip install -U sagemaker
|
102 |
-
```
|
103 |
|
104 |
```python
|
105 |
import sagemaker
|
106 |
-
from sagemaker
|
107 |
-
import time
|
108 |
|
109 |
sagemaker_session = sagemaker.Session()
|
110 |
region = sagemaker_session.boto_region_name
|
111 |
role = sagemaker.get_execution_role()
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
)
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
135 |
)
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
141 |
)
|
|
|
142 |
```
|
143 |
|
|
|
144 |
## Limitations ##
|
145 |
Before using the MegaBeam-Mistral-7B-300k model, it is important to perform your own independent assessment, and take measures to ensure that your use would comply with your own specific quality control practices and standards, and that your use would comply with the local rules, laws, regulations, licenses and terms that apply to you, and your content.
|
146 |
|
|
|
52 |
| Retrieve.KV | Synthetic | 500 | 89.9k | 22.7 | Finding the corresponding value from a dictionary and a key. |
|
53 |
|
54 |
|
55 |
+
## Serve MegaBeam-Mistral-7B-300k on EC2 instances ##
|
56 |
On an AWS `g5.48xlarge` instance, upgrade vLLM to the latest version as per [documentation on vLLM](https://vllm.readthedocs.io/en/latest/).
|
57 |
|
58 |
### Start the server
|
|
|
93 |
print(chat_completion)
|
94 |
```
|
95 |
|
96 |
+
### Deploy the model on a SageMaker Endpoint ###
|
97 |
+
To deploy MegaBeam-Mistral-7B-300k on a SageMaker endpoint, please follow this [SageMaker DJL deployment guide](https://docs.djl.ai/docs/demos/aws/sagemaker/large-model-inference/sample-llm/vllm_deploy_mistral_7b.html).
|
98 |
|
99 |
+
Run the following Python statements in a SageMaker notebook (with each block running in a separate cell)
|
|
|
|
|
|
|
100 |
|
101 |
```python
|
102 |
import sagemaker
|
103 |
+
from sagemaker import Model, image_uris, serializers, deserializers
|
|
|
104 |
|
105 |
sagemaker_session = sagemaker.Session()
|
106 |
region = sagemaker_session.boto_region_name
|
107 |
role = sagemaker.get_execution_role()
|
108 |
|
109 |
+
# run the following statement in a notebook cell
|
110 |
+
%%writefile serving.properties
|
111 |
+
engine=Python
|
112 |
+
option.model_id=amazon/MegaBeam-Mistral-7B-300k
|
113 |
+
option.dtype=bf16
|
114 |
+
option.task=text-generation
|
115 |
+
option.rolling_batch=vllm
|
116 |
+
option.tensor_parallel_degree=8
|
117 |
+
option.device_map=auto
|
118 |
+
|
119 |
+
# run the following statement in a notebook cell
|
120 |
+
%%sh
|
121 |
+
mkdir mymodel
|
122 |
+
mv serving.properties mymodel/
|
123 |
+
tar czvf mymodel.tar.gz mymodel/
|
124 |
+
rm -rf mymodel
|
125 |
+
|
126 |
+
image_uri = image_uris.retrieve(
|
127 |
+
framework="djl-deepspeed",
|
128 |
+
region=region,
|
129 |
+
version="0.27.0"
|
130 |
)
|
131 |
|
132 |
+
s3_code_prefix = "megaBeam-mistral-7b-300k/code"
|
133 |
+
bucket = sagemaker_session.default_bucket() # bucket to house artifacts
|
134 |
+
code_artifact = sagemaker_session.upload_data("mymodel.tar.gz", bucket, s3_code_prefix)
|
135 |
+
print(f"S3 Code or Model tar ball uploaded to --- > {code_artifact}")
|
136 |
+
model = Model(image_uri=image_uri, model_data=code_artifact, role=role)
|
137 |
+
|
138 |
+
instance_type = "ml.g5.48xlarge"
|
139 |
+
endpoint_name = sagemaker.utils.name_from_base("megaBeam-mistral-7b-300k")
|
140 |
+
model.deploy(initial_instance_count=1,
|
141 |
+
instance_type=instance_type,
|
142 |
+
endpoint_name=endpoint_name
|
143 |
+
)
|
144 |
+
|
145 |
+
# our requests and responses will be in json format so we specify the serializer and the deserializer
|
146 |
+
predictor = sagemaker.Predictor(
|
147 |
+
endpoint_name=endpoint_name,
|
148 |
+
sagemaker_session=sagemaker_session,
|
149 |
+
serializer=serializers.JSONSerializer(),
|
150 |
)
|
151 |
+
|
152 |
+
# test the endpoint
|
153 |
+
input_str = """<s>[INST] What is your favourite condiment? [/INST]
|
154 |
+
Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!</s> "
|
155 |
+
[INST] Do you have mayonnaise recipes? [/INST]"""
|
156 |
+
predictor.predict(
|
157 |
+
{"inputs": input_str, "parameters": {"max_new_tokens": 75}}
|
158 |
)
|
159 |
+
|
160 |
```
|
161 |
|
162 |
+
|
163 |
## Limitations ##
|
164 |
Before using the MegaBeam-Mistral-7B-300k model, it is important to perform your own independent assessment, and take measures to ensure that your use would comply with your own specific quality control practices and standards, and that your use would comply with the local rules, laws, regulations, licenses and terms that apply to you, and your content.
|
165 |
|