import json import random import time from datetime import datetime import os import requests from datasets import load_dataset from time import sleep TEMPLATE_JSON=json.loads(""" { "resource_spans": [ { "scope_spans": [ { "spans": [ { "trace_id": "NQ01459b3A+aAHE+JwGWNQ==", "end_time_unix_nano": "1725721375827041000", "span_id": "0PCGpTowmVo=", "kind": "SPAN_KIND_CLIENT", "name": "openai.chat", "start_time_unix_nano": "1725721375188928425", "attributes": [ { "value": { "string_value": "chat" }, "key": "llm.request.type" }, { "value": { "string_value": "OpenAI" }, "key": "llm.vendor" }, { "value": { "string_value": "gpt-3.5-turbo" }, "key": "llm.request.model" }, { "value": { "int_value": "100" }, "key": "llm.request.max_tokens" }, { "value": { "double_value": 0.5 }, "key": "llm.temperature" }, { "value": { "string_value": "None" }, "key": "llm.headers" }, { "value": { "string_value": "system" }, "key": "llm.prompts.0.role" }, { "value": { "string_value": "You are Responsible AI assistant to the user. " }, "key": "llm.prompts.0.content" }, { "value": { "string_value": "user" }, "key": "llm.prompts.1.role" }, { "value": { "string_value": "hello this my test message" }, "key": "llm.prompts.1.content" }, { "value": { "string_value": "gpt-3.5-turbo-0125" }, "key": "llm.response.model" }, { "value": { "int_value": "35" }, "key": "llm.usage.total_tokens" }, { "value": { "int_value": "9" }, "key": "llm.usage.completion_tokens" }, { "value": { "int_value": "26" }, "key": "llm.usage.prompt_tokens" }, { "value": { "string_value": "stop" }, "key": "llm.completions.0.finish_reason" }, { "value": { "string_value": "assistant" }, "key": "llm.completions.0.role" }, { "value": { "string_value": "Hello! How can I assist you today?" }, "key": "llm.completions.0.content" } ], "status": {} } ], "scope": { "name": "opentelemetry.instrumentation.openai.v1", "version": "0.10.4" } } ], "resource": { "attributes": [ { "value": { "string_value": "llm-chat-app" }, "key": "service.name" } ] } } ] } """) def generate_random_id(size): return ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=', k=size)) def generate_json_from_template(template, provider, model, service_name, message): # Generate random trace_id and span_id trace_id = generate_random_id(22) span_id = generate_random_id(12) # Get current time in nanoseconds current_time_ns = int(time.time() * 1e9) # Update trace_id, span_id, times, provider, model, and service_name template['resource_spans'][0]['scope_spans'][0]['spans'][0]['trace_id'] = trace_id template['resource_spans'][0]['scope_spans'][0]['spans'][0]['span_id'] = span_id template['resource_spans'][0]['scope_spans'][0]['spans'][0]['start_time_unix_nano'] = str(current_time_ns) template['resource_spans'][0]['scope_spans'][0]['spans'][0]['end_time_unix_nano'] = str(current_time_ns + random.randint(100000000, 1000000000)) # Random duration # Update provider, model, and service_name information in attributes for attr in template['resource_spans'][0]['scope_spans'][0]['spans'][0]['attributes']: if attr['key'] == 'llm.vendor': attr['value']['string_value'] = provider elif attr['key'] == 'llm.request.model': attr['value']['string_value'] = model elif attr['key'] == 'llm.prompts.1.content': # Update user message content attr['value']['string_value'] = message # Update service_name in the resource attributes for attr in template['resource_spans'][0]['resource']['attributes']: if attr['key'] == 'service.name': attr['value']['string_value'] = service_name # Return the modified JSON return json.dumps(template) def send_json_to_remote(json_data): # Get environment variables base_url = os.getenv('TRACELOOP_BASE_URL') api_key = os.getenv('TRACELOOP_API_KEY') if not base_url or not api_key: raise EnvironmentError("TRACELOOP_BASE_URL or TRACELOOP_API_KEY is not set in environment variables.") # Set the headers and URL url = f"{base_url}/v1/traces" headers = { 'Content-Type': 'application/json', 'Authorization': f"Bearer {api_key}" } # Send the POST request response = requests.post(url, headers=headers, data=json_data) # Check the response status if response.status_code == 200: print("Data successfully sent!") else: print(f"Failed to send data. Status Code: {response.status_code}, Response: {response.text}") return (response.status_code, response.text) def send_message(message): _apps = ["fintechgpt", "healthgpt", "mydoc", "knowledge-centre", "assistantgpt"] # Compute exponential weights for service names factor = 2 # Control the steepness of the exponential decrease app_weights = [factor ** -i for i in range(len(_apps))] # Select a service_name randomly based on the exponential weights service_name = random.choices(_apps, weights=app_weights, k=1)[0] # Define providers and corresponding models with weights providers_models = { "Meta AI": (["LLaMA 65B", "LLaMA 33B"], 10), "Mistral": (["Mistral 7B"], 8), "Anthropic": (["Claude 3.5 Sonnet"], 15), "OpenAI": (["GPT-3", "GPT-3.5", "GPT-4", "GPT-4o"], 25), "Google": (["Gemini Ultra", "Gemini Pro", "Gemini Nano", "Lamda", "Palm"], 20), "Databricks": (["Dolly"], 5), "IBM AI": (["Watson NLP"], 5), "Azure AI": (["Azure OpenAI", "Custom GPT-3.5"], 7), "Snowflake": (["Snowflake GPT"], 3), "Krutrim": (["Krutrim LLM"], 2), "Baidu": (["Ernie 4.0"], 10), "Stability AI": (["StableLM 3B", "StableLM 7B"], 4), "Microsoft": (["Orca", "Phi-1"], 8) } # Extract providers and their weights providers = list(providers_models.keys()) provider_weights = [providers_models[provider][1] for provider in providers] # Randomly select a provider based on weights selected_provider = random.choices(providers, weights=provider_weights, k=1)[0] # Randomly select a model from the selected provider selected_model = random.choice(providers_models[selected_provider][0]) # Generate the JSON with the selected provider, model, and service_name output_json = generate_json_from_template(TEMPLATE_JSON, selected_provider, selected_model, service_name, message) # Send the JSON to the remote server return send_json_to_remote(output_json) def main(): prompt_injection_dataset = load_dataset("deepset/prompt-injections") toxicity_prompts = load_dataset("allenai/real-toxicity-prompts") for i in range(2): # Collect messages prompt_injection_sample = prompt_injection_dataset["train"].shuffle(seed=42).select(range(10)) toxicity_sample = toxicity_prompts["train"].shuffle(seed=42).select(range(10)) plain_messages = ["this is test conversation" for _ in range(10)] # Combine all messages into a single list all_messages = [msg["text"] for msg in prompt_injection_sample] + \ [msg["prompt"]["text"] for msg in toxicity_sample] + \ plain_messages # Shuffle the combined list to mix message types random.shuffle(all_messages) # Send each message for message in all_messages: print(f"Sending Message {message}") send_message(message) sleep(random.uniform(2, 4)) # Random sleep between 0.5 to 2 seconds if __name__ == "__main__": main()