First_agent_template

Running

Louis Delmas

feat: 🎸 generate random sound agent tool

d206ca1 13 days ago

2.26 kB

	from smolagents import CodeAgent,HfApiModel,tool
	import torch
	import yaml
	from tools.final_answer import FinalAnswerTool
	import numpy as np
	import random
	from scipy.signal import square, sawtooth

	from Gradio_UI import GradioUI

	@tool
	def generate_random_sound(duration: float = 1.0) -> torch.Tensor:
	"""Generates a random sound with varying frequency and waveform and returns a torch tensor.

	Args:
	duration: Length of the sound in seconds (default: 1.0)
	"""
	# It seems the playback is about 3x slower, so we'll adjust the duration
	adjusted_duration = duration / 3

	# Sample rate
	sample_rate = 44100
	num_samples = int(sample_rate * adjusted_duration)
	t = np.linspace(0, adjusted_duration, num_samples, endpoint=False)

	# Random frequency between 100 and 10000 Hz
	frequency = random.uniform(100, 10000)

	# Random waveform selection
	waveform = random.choice(['sine', 'square', 'sawtooth', 'triangle'])

	if waveform == 'sine':
	signal = np.sin(2 * np.pi * frequency * t)
	elif waveform == 'square':
	signal = square(2 * np.pi * frequency * t)
	elif waveform == 'sawtooth':
	signal = sawtooth(2 * np.pi * frequency * t)
	else: # triangle
	signal = sawtooth(2 * np.pi * frequency * t, width=0.5) # Triangle is a symmetric sawtooth

	# Apply fade out
	fade = np.exp(-3 * t / adjusted_duration) # Normalize fade over duration
	signal = signal * fade

	# Normalize to prevent clipping
	signal = signal / np.max(np.abs(signal))

	# Convert to torch tensor
	tensor_signal = torch.from_numpy(signal.astype(np.float32))

	return tensor_signal

	final_answer = FinalAnswerTool()

	model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'

	model = HfApiModel(
	max_tokens=2096,
	temperature=0.5,
	model_id=model_id,
	custom_role_conversions=None,
	)

	with open("prompts.yaml", 'r') as stream:
	prompt_templates = yaml.safe_load(stream)

	agent = CodeAgent(
	model=model,
	tools=[final_answer, generate_random_sound],
	max_steps=6,
	verbosity_level=1,
	grammar=None,
	planning_interval=None,
	name=None,
	description=None,
	prompt_templates=prompt_templates
	)


	GradioUI(agent).launch()