Spaces:
Running
Running
import json | |
import random | |
import time | |
from datetime import datetime | |
import os | |
import requests | |
from datasets import load_dataset | |
from time import sleep | |
TEMPLATE_JSON=json.loads(""" | |
{ | |
"resource_spans": [ | |
{ | |
"scope_spans": [ | |
{ | |
"spans": [ | |
{ | |
"trace_id": "NQ01459b3A+aAHE+JwGWNQ==", | |
"end_time_unix_nano": "1725721375827041000", | |
"span_id": "0PCGpTowmVo=", | |
"kind": "SPAN_KIND_CLIENT", | |
"name": "openai.chat", | |
"start_time_unix_nano": "1725721375188928425", | |
"attributes": [ | |
{ | |
"value": { | |
"string_value": "chat" | |
}, | |
"key": "llm.request.type" | |
}, | |
{ | |
"value": { | |
"string_value": "OpenAI" | |
}, | |
"key": "llm.vendor" | |
}, | |
{ | |
"value": { | |
"string_value": "gpt-3.5-turbo" | |
}, | |
"key": "llm.request.model" | |
}, | |
{ | |
"value": { | |
"int_value": "100" | |
}, | |
"key": "llm.request.max_tokens" | |
}, | |
{ | |
"value": { | |
"double_value": 0.5 | |
}, | |
"key": "llm.temperature" | |
}, | |
{ | |
"value": { | |
"string_value": "None" | |
}, | |
"key": "llm.headers" | |
}, | |
{ | |
"value": { | |
"string_value": "system" | |
}, | |
"key": "llm.prompts.0.role" | |
}, | |
{ | |
"value": { | |
"string_value": "You are Responsible AI assistant to the user. " | |
}, | |
"key": "llm.prompts.0.content" | |
}, | |
{ | |
"value": { | |
"string_value": "user" | |
}, | |
"key": "llm.prompts.1.role" | |
}, | |
{ | |
"value": { | |
"string_value": "hello this my test message" | |
}, | |
"key": "llm.prompts.1.content" | |
}, | |
{ | |
"value": { | |
"string_value": "gpt-3.5-turbo-0125" | |
}, | |
"key": "llm.response.model" | |
}, | |
{ | |
"value": { | |
"int_value": "35" | |
}, | |
"key": "llm.usage.total_tokens" | |
}, | |
{ | |
"value": { | |
"int_value": "9" | |
}, | |
"key": "llm.usage.completion_tokens" | |
}, | |
{ | |
"value": { | |
"int_value": "26" | |
}, | |
"key": "llm.usage.prompt_tokens" | |
}, | |
{ | |
"value": { | |
"string_value": "stop" | |
}, | |
"key": "llm.completions.0.finish_reason" | |
}, | |
{ | |
"value": { | |
"string_value": "assistant" | |
}, | |
"key": "llm.completions.0.role" | |
}, | |
{ | |
"value": { | |
"string_value": "Hello! How can I assist you today?" | |
}, | |
"key": "llm.completions.0.content" | |
} | |
], | |
"status": {} | |
} | |
], | |
"scope": { | |
"name": "opentelemetry.instrumentation.openai.v1", | |
"version": "0.10.4" | |
} | |
} | |
], | |
"resource": { | |
"attributes": [ | |
{ | |
"value": { | |
"string_value": "llm-chat-app" | |
}, | |
"key": "service.name" | |
} | |
] | |
} | |
} | |
] | |
} | |
""") | |
def generate_random_id(size): | |
return ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=', k=size)) | |
def generate_json_from_template(template, provider, model, service_name, message): | |
# Generate random trace_id and span_id | |
trace_id = generate_random_id(22) | |
span_id = generate_random_id(12) | |
# Get current time in nanoseconds | |
current_time_ns = int(time.time() * 1e9) | |
# Update trace_id, span_id, times, provider, model, and service_name | |
template['resource_spans'][0]['scope_spans'][0]['spans'][0]['trace_id'] = trace_id | |
template['resource_spans'][0]['scope_spans'][0]['spans'][0]['span_id'] = span_id | |
template['resource_spans'][0]['scope_spans'][0]['spans'][0]['start_time_unix_nano'] = str(current_time_ns) | |
template['resource_spans'][0]['scope_spans'][0]['spans'][0]['end_time_unix_nano'] = str(current_time_ns + random.randint(100000000, 1000000000)) # Random duration | |
# Update provider, model, and service_name information in attributes | |
for attr in template['resource_spans'][0]['scope_spans'][0]['spans'][0]['attributes']: | |
if attr['key'] == 'llm.vendor': | |
attr['value']['string_value'] = provider | |
elif attr['key'] == 'llm.request.model': | |
attr['value']['string_value'] = model | |
elif attr['key'] == 'llm.prompts.1.content': # Update user message content | |
attr['value']['string_value'] = message | |
# Update service_name in the resource attributes | |
for attr in template['resource_spans'][0]['resource']['attributes']: | |
if attr['key'] == 'service.name': | |
attr['value']['string_value'] = service_name | |
# Return the modified JSON | |
return json.dumps(template) | |
def send_json_to_remote(json_data): | |
# Get environment variables | |
base_url = os.getenv('TRACELOOP_BASE_URL') | |
api_key = os.getenv('TRACELOOP_API_KEY') | |
if not base_url or not api_key: | |
raise EnvironmentError("TRACELOOP_BASE_URL or TRACELOOP_API_KEY is not set in environment variables.") | |
# Set the headers and URL | |
url = f"{base_url}/v1/traces" | |
headers = { | |
'Content-Type': 'application/json', | |
'Authorization': f"Bearer {api_key}" | |
} | |
# Send the POST request | |
response = requests.post(url, headers=headers, data=json_data) | |
# Check the response status | |
if response.status_code == 200: | |
print("Data successfully sent!") | |
else: | |
print(f"Failed to send data. Status Code: {response.status_code}, Response: {response.text}") | |
return (response.status_code, response.text) | |
def send_message(message): | |
_apps = ["fintechgpt", "healthgpt", "mydoc", "knowledge-centre", "assistantgpt"] | |
# Compute exponential weights for service names | |
factor = 2 # Control the steepness of the exponential decrease | |
app_weights = [factor ** -i for i in range(len(_apps))] | |
# Select a service_name randomly based on the exponential weights | |
service_name = random.choices(_apps, weights=app_weights, k=1)[0] | |
# Define providers and corresponding models with weights | |
providers_models = { | |
"Meta AI": (["LLaMA 65B", "LLaMA 33B"], 10), | |
"Mistral": (["Mistral 7B"], 8), | |
"Anthropic": (["Claude 3.5 Sonnet"], 15), | |
"OpenAI": (["GPT-3", "GPT-3.5", "GPT-4", "GPT-4o"], 25), | |
"Google": (["Gemini Ultra", "Gemini Pro", "Gemini Nano", "Lamda", "Palm"], 20), | |
"Databricks": (["Dolly"], 5), | |
"IBM AI": (["Watson NLP"], 5), | |
"Azure AI": (["Azure OpenAI", "Custom GPT-3.5"], 7), | |
"Snowflake": (["Snowflake GPT"], 3), | |
"Krutrim": (["Krutrim LLM"], 2), | |
"Baidu": (["Ernie 4.0"], 10), | |
"Stability AI": (["StableLM 3B", "StableLM 7B"], 4), | |
"Microsoft": (["Orca", "Phi-1"], 8) | |
} | |
# Extract providers and their weights | |
providers = list(providers_models.keys()) | |
provider_weights = [providers_models[provider][1] for provider in providers] | |
# Randomly select a provider based on weights | |
selected_provider = random.choices(providers, weights=provider_weights, k=1)[0] | |
# Randomly select a model from the selected provider | |
selected_model = random.choice(providers_models[selected_provider][0]) | |
# Generate the JSON with the selected provider, model, and service_name | |
output_json = generate_json_from_template(TEMPLATE_JSON, selected_provider, selected_model, service_name, message) | |
# Send the JSON to the remote server | |
return send_json_to_remote(output_json) | |
def main(): | |
prompt_injection_dataset = load_dataset("deepset/prompt-injections") | |
toxicity_prompts = load_dataset("allenai/real-toxicity-prompts") | |
for i in range(2): | |
# Collect messages | |
prompt_injection_sample = prompt_injection_dataset["train"].shuffle(seed=42).select(range(10)) | |
toxicity_sample = toxicity_prompts["train"].shuffle(seed=42).select(range(10)) | |
plain_messages = ["this is test conversation" for _ in range(10)] | |
# Combine all messages into a single list | |
all_messages = [msg["text"] for msg in prompt_injection_sample] + \ | |
[msg["prompt"]["text"] for msg in toxicity_sample] + \ | |
plain_messages | |
# Shuffle the combined list to mix message types | |
random.shuffle(all_messages) | |
# Send each message | |
for message in all_messages: | |
print(f"Sending Message {message}") | |
send_message(message) | |
sleep(random.uniform(2, 4)) # Random sleep between 0.5 to 2 seconds | |
if __name__ == "__main__": | |
main() | |