Update api_usage.py
Browse files- api_usage.py +174 -22
api_usage.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
import requests
|
|
|
|
| 2 |
import os
|
| 3 |
import anthropic
|
| 4 |
from datetime import datetime
|
| 5 |
-
import
|
|
|
|
|
|
|
| 6 |
|
| 7 |
BASE_URL = 'https://api.openai.com/v1'
|
| 8 |
GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
|
|
@@ -87,14 +90,17 @@ def get_subscription(key, org_list):
|
|
| 87 |
"rpm": rpm,
|
| 88 |
"tpm": tpm,
|
| 89 |
"quota": quota}
|
| 90 |
-
|
| 91 |
-
def
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
req_body = {"model": model, "
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
| 98 |
result = r.json()
|
| 99 |
if "error" in result:
|
| 100 |
e = result.get("error", {}).get("code", "")
|
|
@@ -105,8 +111,8 @@ def format_status(list_models_avai, headers):
|
|
| 105 |
_rpm = '{:,}'.format(rpm_num).replace(',', ' ')
|
| 106 |
_tpm = '{:,}'.format(tpm_num).replace(',', ' ')
|
| 107 |
_tpm_left = '{:,}'.format(tpm_left).replace(',', ' ')
|
| 108 |
-
|
| 109 |
-
|
| 110 |
dictCount = 0
|
| 111 |
dictLength = len(TOKEN_LIMIT_PER_TIER_GPT4)
|
| 112 |
|
|
@@ -118,15 +124,33 @@ def format_status(list_models_avai, headers):
|
|
| 118 |
else:
|
| 119 |
dictCount+=1
|
| 120 |
if dictCount == dictLength:
|
| 121 |
-
|
| 122 |
-
elif model == GPT_TYPES[0] and
|
| 123 |
-
|
| 124 |
-
else:
|
| 125 |
-
continue
|
| 126 |
else:
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
rpm_str = ""
|
| 131 |
tpm_str = ""
|
| 132 |
for i in range(len(rpm)):
|
|
@@ -177,7 +201,7 @@ def check_key_ant_availability(ant):
|
|
| 177 |
#print(e.__cause__) # an underlying Exception, likely raised within httpx.
|
| 178 |
return False, "Error: The server could not be reached", ""
|
| 179 |
except anthropic.RateLimitError as e:
|
| 180 |
-
return True, "Error: 429, rate limited; we should back off a bit(retry 5 times failed)
|
| 181 |
except anthropic.APIStatusError as e:
|
| 182 |
err_msg = e.response.json().get('error', {}).get('message', '')
|
| 183 |
return False, f"Error: {e.status_code}, {err_msg}", ""
|
|
@@ -295,7 +319,7 @@ def get_azure_status(endpoint, api_key, deployments_list):
|
|
| 295 |
has_turbo = True
|
| 296 |
|
| 297 |
if not list_model: #has_32k == False and has_gpt4 == False and has_turbo == False:
|
| 298 |
-
return "No GPT
|
| 299 |
else:
|
| 300 |
if has_gpt4:
|
| 301 |
has_gpt4turbo = check_gpt4turbo(endpoint, api_key, list_model['gpt-4'])
|
|
@@ -335,7 +359,7 @@ def check_key_mistral_availability(key):
|
|
| 335 |
return False
|
| 336 |
return True
|
| 337 |
except:
|
| 338 |
-
return "Error while making request
|
| 339 |
|
| 340 |
def check_mistral_quota(key):
|
| 341 |
try:
|
|
@@ -353,6 +377,134 @@ def check_mistral_quota(key):
|
|
| 353 |
except:
|
| 354 |
return "Error while making request."
|
| 355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
if __name__ == "__main__":
|
| 357 |
key = os.getenv("OPENAI_API_KEY")
|
| 358 |
key_ant = os.getenv("ANTHROPIC_API_KEY")
|
|
|
|
| 1 |
import requests
|
| 2 |
+
import json
|
| 3 |
import os
|
| 4 |
import anthropic
|
| 5 |
from datetime import datetime
|
| 6 |
+
import boto3
|
| 7 |
+
import botocore.exceptions
|
| 8 |
+
import concurrent.futures
|
| 9 |
|
| 10 |
BASE_URL = 'https://api.openai.com/v1'
|
| 11 |
GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
|
|
|
|
| 90 |
"rpm": rpm,
|
| 91 |
"tpm": tpm,
|
| 92 |
"quota": quota}
|
| 93 |
+
|
| 94 |
+
def send_oai_completions(oai_stuff):
|
| 95 |
+
session = oai_stuff[0]
|
| 96 |
+
headers = oai_stuff[1]
|
| 97 |
+
model = oai_stuff[2]
|
| 98 |
+
try:
|
| 99 |
+
req_body = {"model": model, "max_tokens": 1}
|
| 100 |
+
rpm_string = ""
|
| 101 |
+
tpm_string = ""
|
| 102 |
+
quota_string = ""
|
| 103 |
+
r = session.post(f"{BASE_URL}/chat/completions", headers=headers, json=req_body, timeout=10)
|
| 104 |
result = r.json()
|
| 105 |
if "error" in result:
|
| 106 |
e = result.get("error", {}).get("code", "")
|
|
|
|
| 111 |
_rpm = '{:,}'.format(rpm_num).replace(',', ' ')
|
| 112 |
_tpm = '{:,}'.format(tpm_num).replace(',', ' ')
|
| 113 |
_tpm_left = '{:,}'.format(tpm_left).replace(',', ' ')
|
| 114 |
+
rpm_string = f"{_rpm} ({model})"
|
| 115 |
+
tpm_string = f"{_tpm} ({_tpm_left} left, {model})"
|
| 116 |
dictCount = 0
|
| 117 |
dictLength = len(TOKEN_LIMIT_PER_TIER_GPT4)
|
| 118 |
|
|
|
|
| 124 |
else:
|
| 125 |
dictCount+=1
|
| 126 |
if dictCount == dictLength:
|
| 127 |
+
quota_string = "yes | custom-tier"
|
| 128 |
+
elif model == GPT_TYPES[0] and quota_string == "":
|
| 129 |
+
quota_string = check_key_tier(rpm_num, tpm_num, TOKEN_LIMIT_PER_TIER_TURBO, headers)
|
|
|
|
|
|
|
| 130 |
else:
|
| 131 |
+
rpm_string = f"0 ({model})"
|
| 132 |
+
tpm_string = f"0 ({model})"
|
| 133 |
+
quota_string = e
|
| 134 |
+
return rpm_string, tpm_string, quota_string
|
| 135 |
+
except Exception as e:
|
| 136 |
+
#print(e)
|
| 137 |
+
return "", "", ""
|
| 138 |
+
|
| 139 |
+
def helper_oai(oai_stuff):
|
| 140 |
+
return send_oai_completions(oai_stuff)
|
| 141 |
+
|
| 142 |
+
def format_status(list_models_avai, headers):
|
| 143 |
+
rpm = []
|
| 144 |
+
tpm = []
|
| 145 |
+
quota = ""
|
| 146 |
+
r = requests.Session()
|
| 147 |
+
args = [(r, headers, model) for model in list_models_avai]
|
| 148 |
+
with concurrent.futures.ThreadPoolExecutor() as executer:
|
| 149 |
+
for result in executer.map(helper_oai, args):
|
| 150 |
+
rpm.append(result[0])
|
| 151 |
+
tpm.append(result[1])
|
| 152 |
+
if result[2]:
|
| 153 |
+
quota = result[2]
|
| 154 |
rpm_str = ""
|
| 155 |
tpm_str = ""
|
| 156 |
for i in range(len(rpm)):
|
|
|
|
| 201 |
#print(e.__cause__) # an underlying Exception, likely raised within httpx.
|
| 202 |
return False, "Error: The server could not be reached", ""
|
| 203 |
except anthropic.RateLimitError as e:
|
| 204 |
+
return True, "Error: 429, rate limited; we should back off a bit(retry 5 times failed)", ""
|
| 205 |
except anthropic.APIStatusError as e:
|
| 206 |
err_msg = e.response.json().get('error', {}).get('message', '')
|
| 207 |
return False, f"Error: {e.status_code}, {err_msg}", ""
|
|
|
|
| 319 |
has_turbo = True
|
| 320 |
|
| 321 |
if not list_model: #has_32k == False and has_gpt4 == False and has_turbo == False:
|
| 322 |
+
return "No GPT deployment to check", has_32k, has_gpt4turbo, has_gpt4, has_turbo
|
| 323 |
else:
|
| 324 |
if has_gpt4:
|
| 325 |
has_gpt4turbo = check_gpt4turbo(endpoint, api_key, list_model['gpt-4'])
|
|
|
|
| 359 |
return False
|
| 360 |
return True
|
| 361 |
except:
|
| 362 |
+
return "Error while making request"
|
| 363 |
|
| 364 |
def check_mistral_quota(key):
|
| 365 |
try:
|
|
|
|
| 377 |
except:
|
| 378 |
return "Error while making request."
|
| 379 |
|
| 380 |
+
def check_key_replicate_availability(key):
|
| 381 |
+
try:
|
| 382 |
+
url = 'https://api.replicate.com/v1/account'
|
| 383 |
+
headers = {'Authorization': f'Token {key}'}
|
| 384 |
+
|
| 385 |
+
rq = requests.get(url, headers=headers)
|
| 386 |
+
info = rq.json()
|
| 387 |
+
if rq.status_code == 401:
|
| 388 |
+
return False, "", ""
|
| 389 |
+
|
| 390 |
+
url = 'https://api.replicate.com/v1/hardware'
|
| 391 |
+
rq = requests.get(url, headers=headers)
|
| 392 |
+
result = rq.json()
|
| 393 |
+
hardware = []
|
| 394 |
+
if result:
|
| 395 |
+
hardware = [res['name'] for res in result]
|
| 396 |
+
return True, info, hardware
|
| 397 |
+
except:
|
| 398 |
+
return "Unknown", "", "Error while making request"
|
| 399 |
+
|
| 400 |
+
def check_key_aws_availability(key):
|
| 401 |
+
access_id = key.split(':')[0]
|
| 402 |
+
access_secret = key.split(':')[1]
|
| 403 |
+
|
| 404 |
+
root = False
|
| 405 |
+
admin = False
|
| 406 |
+
billing = False
|
| 407 |
+
quarantine = False
|
| 408 |
+
iam_users_perm = False
|
| 409 |
+
iam_policies_perm = False
|
| 410 |
+
|
| 411 |
+
session = boto3.Session(
|
| 412 |
+
aws_access_key_id=access_id,
|
| 413 |
+
aws_secret_access_key=access_secret
|
| 414 |
+
)
|
| 415 |
+
|
| 416 |
+
iam = session.client('iam')
|
| 417 |
+
|
| 418 |
+
username = check_username(session)
|
| 419 |
+
#print(username)
|
| 420 |
+
if not username[0]:
|
| 421 |
+
return False, "", "", "", "", username[1], ""
|
| 422 |
+
|
| 423 |
+
if username[0] == 'root':
|
| 424 |
+
root = True
|
| 425 |
+
admin = True
|
| 426 |
+
|
| 427 |
+
if not root:
|
| 428 |
+
policies = check_policy(iam, username[0])
|
| 429 |
+
if policies[0]:
|
| 430 |
+
for policy in policies[1]:
|
| 431 |
+
if policy['PolicyName'] == 'AdministratorAccess':
|
| 432 |
+
admin = True
|
| 433 |
+
if policy['PolicyName'] == 'AWSCompromisedKeyQuarantineV2':
|
| 434 |
+
quarantine = True
|
| 435 |
+
|
| 436 |
+
enable_region = check_bedrock_invoke(session)
|
| 437 |
+
cost = check_aws_billing(session)
|
| 438 |
+
if enable_region:
|
| 439 |
+
return True, username[0], root, admin, quarantine, enable_region, cost
|
| 440 |
+
if root or admin:
|
| 441 |
+
return True, username[0], root, admin, quarantine, "No region has claude enabled yet", cost
|
| 442 |
+
return True, username[0], root, admin, quarantine, "Not enough permission to activate claude bedrock", cost
|
| 443 |
+
|
| 444 |
+
def check_username(session):
|
| 445 |
+
try:
|
| 446 |
+
sts = session.client('sts')
|
| 447 |
+
sts_iden = sts.get_caller_identity()
|
| 448 |
+
if len(sts_iden['Arn'].split('/')) > 1:
|
| 449 |
+
return sts_iden['Arn'].split('/')[1], "Valid"
|
| 450 |
+
|
| 451 |
+
return sts_iden['Arn'].split(':')[5], "Valid"
|
| 452 |
+
except botocore.exceptions.ClientError as error:
|
| 453 |
+
return False, error.response['Error']['Code']
|
| 454 |
+
|
| 455 |
+
def check_policy(iam, username):
|
| 456 |
+
try:
|
| 457 |
+
iam_policies = iam.list_attached_user_policies(UserName=username)
|
| 458 |
+
return True, iam_policies['AttachedPolicies']
|
| 459 |
+
except botocore.exceptions.ClientError as error:
|
| 460 |
+
return False, error.response['Error']['Code']
|
| 461 |
+
|
| 462 |
+
def invoke_claude(session, region):
|
| 463 |
+
try:
|
| 464 |
+
bedrock_runtime = session.client("bedrock-runtime", region_name=region)
|
| 465 |
+
body = json.dumps({
|
| 466 |
+
"prompt": "\n\nHuman:\n\nAssistant:",
|
| 467 |
+
"max_tokens_to_sample": 0
|
| 468 |
+
})
|
| 469 |
+
response = bedrock_runtime.invoke_model(body=body, modelId="anthropic.claude-v2:1")
|
| 470 |
+
except bedrock_runtime.exceptions.ValidationException as error:
|
| 471 |
+
#print(error.response['Error'])
|
| 472 |
+
return region
|
| 473 |
+
except bedrock_runtime.exceptions.AccessDeniedException as error:
|
| 474 |
+
#print(error.response['Error'])
|
| 475 |
+
return
|
| 476 |
+
except bedrock_runtime.exceptions.ResourceNotFoundException as error:
|
| 477 |
+
#print(error.response['Error'])
|
| 478 |
+
return
|
| 479 |
+
except Exception as e:
|
| 480 |
+
#print(e)
|
| 481 |
+
return
|
| 482 |
+
|
| 483 |
+
def check_bedrock_invoke(session):
|
| 484 |
+
regions = ['us-east-1', 'us-west-2', 'eu-central-1', 'ap-southeast-1', 'ap-northeast-1']
|
| 485 |
+
enable_region = []
|
| 486 |
+
with concurrent.futures.ThreadPoolExecutor() as executer:
|
| 487 |
+
futures = [executer.submit(invoke_claude, session, region) for region in regions]
|
| 488 |
+
for future in concurrent.futures.as_completed(futures):
|
| 489 |
+
if future.result():
|
| 490 |
+
enable_region.append(future.result())
|
| 491 |
+
return enable_region
|
| 492 |
+
|
| 493 |
+
def check_aws_billing(session):
|
| 494 |
+
try:
|
| 495 |
+
ce = session.client('ce')
|
| 496 |
+
now = datetime.now()
|
| 497 |
+
start_date = now.replace(day=1).strftime('%Y-%m-%d')
|
| 498 |
+
end_date = (now.replace(day=1, month=now.month % 12 + 1, year=now.year + (now.month // 12)).strftime('%Y-%m-%d'))
|
| 499 |
+
ce_cost = ce.get_cost_and_usage(
|
| 500 |
+
TimePeriod={ 'Start': start_date, 'End': end_date },
|
| 501 |
+
Granularity='MONTHLY',
|
| 502 |
+
Metrics=['BlendedCost']
|
| 503 |
+
)
|
| 504 |
+
return ce_cost['ResultsByTime']
|
| 505 |
+
except botocore.exceptions.ClientError as error:
|
| 506 |
+
return error.response['Error']['Message']
|
| 507 |
+
|
| 508 |
if __name__ == "__main__":
|
| 509 |
key = os.getenv("OPENAI_API_KEY")
|
| 510 |
key_ant = os.getenv("ANTHROPIC_API_KEY")
|