Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions demo/tests/Blog/Command/StreamCommandTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@
use Symfony\AI\Agent\AgentInterface;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Metadata\Metadata;
use Symfony\AI\Platform\Result\DeferredResult;
use Symfony\AI\Platform\Result\InMemoryRawResult;
use Symfony\AI\Platform\Result\RawResultInterface;
use Symfony\AI\Platform\Result\ResultInterface;
use Symfony\AI\Platform\Result\TextResult;
use Symfony\AI\Platform\Speech\Speech;
use Symfony\AI\Platform\Test\PlainConverter;
use Symfony\Component\Console\Input\ArrayInput;
use Symfony\Component\Console\Output\BufferedOutput;
use Symfony\Component\Console\Style\SymfonyStyle;
Expand Down Expand Up @@ -52,6 +57,15 @@ public function getRawResult(): ?RawResultInterface
public function setRawResult(RawResultInterface $rawResult): void
{
}

public function addSpeech(Speech $speech): void
{
}

public function getSpeech(string $identifier): Speech
{
return new Speech([], new DeferredResult(new PlainConverter(new TextResult('foo')), new InMemoryRawResult()), 'bar');
}
});

$input = new ArrayInput([]);
Expand Down
58 changes: 58 additions & 0 deletions docs/components/platform.rst
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,64 @@ This allows fast and isolated testing of AI-powered features without relying on

This requires `cURL` and the `ext-curl` extension to be installed.

Speech support
~~~~~~~~~~~~~~

Using speech to send messages / receive answers as audio is a common use case when integrating agents and/or chats.

Speech support can be enable using ``Symfony\AI\Platform\Speech\SpeechProviderListener``::

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Speech\SpeechConfiguration;
use Symfony\AI\Platform\Speech\SpeechProviderListener;
use Symfony\Component\EventDispatcher\EventDispatcher;

$eventDispatcher = new EventDispatcher();
$eventDispatcher->addSubscriber(new SpeechProviderListener([
new ElevenLabsSpeechProvider(PlatformFactory::create(
apiKey: $elevenLabsApiKey,
httpClient: http_client(),
speechConfiguration: new SpeechConfiguration(
ttsModel: 'eleven_multilingual_v2',
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
sttModel: 'eleven_multilingual_v2'
)),
),
], []));

$platform = OpenAiPlatformFactory::create($openAiApiKey, httpClient: HttpClient::create(), eventDispatcher: $eventDispatcher);

$agent = new Agent($platform, 'gpt-4o');
$answer = $agent->call(new MessageBag(
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
));

echo $answer->getSpeech('elevenlabs')->asBinary();

When using the bundle, the configuration allows to configure models and voices::

ai:
platform:
elevenlabs:
api_key: '%env(ELEVEN_LABS_API_KEY)%'

speech:
elevenlabs:
platform: 'ai.platform.elevenlabs'
tts_model: 'eleven_multilingual_v2'
tts_voice: '%env(ELEVEN_LABS_VOICE_IDENTIFIER)%'
tts_extra_options:
foo: bar

.. note::

Please be aware that enabling speech support requires to define corresponding platforms.

Code Examples
~~~~~~~~~~~~~

Expand Down
10 changes: 10 additions & 0 deletions examples/speech/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Speech Examples

Speech is mainly used to transform text to audio and vice versa, it can also be used to create an audio to audio pipeline.

To run the examples, you can use additional tools like (mpg123)[https://www.mpg123.de/]:

```bash
php speech/agent-eleven-labs-speech-tts.php | mpg123 -
php speech/agent-eleven-labs-speech-sts.php | mpg123 -
```
48 changes: 48 additions & 0 deletions examples/speech/agent-eleven-labs-speech-sts.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechPlatform;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
use Symfony\AI\Platform\Message\Content\Audio;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Speech\SpeechListener;
use Symfony\Component\EventDispatcher\EventDispatcher;

require_once dirname(__DIR__).'/bootstrap.php';

$elevenLabsPlatform = new ElevenLabsSpeechPlatform(
PlatformFactory::create(
apiKey: env('ELEVEN_LABS_API_KEY'),
httpClient: http_client(),
),
[
'ttsModel' => 'eleven_multilingual_v2',
'ttsVoice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
'sttModel' => 'eleven_multilingual_v2',
],
);

$eventDispatcher = new EventDispatcher();
$eventDispatcher->addSubscriber(new SpeechListener([
$elevenLabsPlatform,
]));

$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);

$agent = new Agent($platform, 'gpt-4o');
$answer = $agent->call(new MessageBag(
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
));

echo $answer->getSpeech('elevenlabs')->asBinary();
44 changes: 44 additions & 0 deletions examples/speech/agent-eleven-labs-speech-stt.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechPlatform;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
use Symfony\AI\Platform\Message\Content\Audio;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Speech\SpeechListener;
use Symfony\Component\EventDispatcher\EventDispatcher;

require_once dirname(__DIR__).'/bootstrap.php';

$eventDispatcher = new EventDispatcher();
$eventDispatcher->addSubscriber(new SpeechListener([
new ElevenLabsSpeechPlatform(
PlatformFactory::create(
apiKey: env('ELEVEN_LABS_API_KEY'),
httpClient: http_client(),
),
[
'sttModel' => 'eleven_multilingual_v2',
],
),
]));

$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);

$agent = new Agent($platform, 'gpt-4o');
$answer = $agent->call(new MessageBag(
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
));

echo $answer->getContent();
46 changes: 46 additions & 0 deletions examples/speech/agent-eleven-labs-speech-tts.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechPlatform;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Speech\SpeechListener;
use Symfony\Component\EventDispatcher\EventDispatcher;

require_once dirname(__DIR__).'/bootstrap.php';

$elevenLabsPlatform = new ElevenLabsSpeechPlatform(
PlatformFactory::create(
apiKey: env('ELEVEN_LABS_API_KEY'),
httpClient: http_client(),
),
[
'ttsModel' => 'eleven_multilingual_v2',
'ttsVoice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
],
);

$eventDispatcher = new EventDispatcher();
$eventDispatcher->addSubscriber(new SpeechListener([
$elevenLabsPlatform,
]));

$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);

$agent = new Agent($platform, 'gpt-4o');
$answer = $agent->call(new MessageBag(
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
));

echo $answer->getSpeech('elevenlabs')->asBinary();
12 changes: 12 additions & 0 deletions src/agent/src/Output.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Result\ResultInterface;
use Symfony\AI\Platform\Speech\Speech;

/**
* @author Christopher Hertel <[email protected]>
Expand All @@ -27,6 +28,7 @@ public function __construct(
private ResultInterface $result,
private readonly MessageBag $messageBag,
private readonly array $options = [],
private ?Speech $speech = null,
) {
}

Expand Down Expand Up @@ -57,4 +59,14 @@ public function getOptions(): array
{
return $this->options;
}

public function setSpeech(?Speech $speech): void
{
$this->speech = $speech;
}

public function getSpeech(): ?Speech
{
return $this->speech;
}
}
1 change: 1 addition & 0 deletions src/ai-bundle/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ CHANGELOG
- Token usage metadata in agent results including prompt, completion, total, cached, and thinking tokens
- Rate limit information tracking for supported platforms
* Add support for configuring chats and message stores
* Add support for configuring speeches
17 changes: 17 additions & 0 deletions src/ai-bundle/config/options.php
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,23 @@
->booleanNode('api_catalog')
->info('If set, the ElevenLabs API will be used to build the catalog and retrieve models information, using this option leads to additional HTTP calls')
->end()
->arrayNode('speech')
->children()
->stringNode('tts_model')->end()
->stringNode('tts_voice')->end()
->arrayNode('tts_options')
->scalarPrototype()
->defaultValue([])
->end()
->end()
->stringNode('stt_model')->end()
->arrayNode('stt_options')
->scalarPrototype()
->defaultValue([])
->end()
->end()
->end()
->end()
->end()
->end()
->arrayNode('gemini')
Expand Down
8 changes: 8 additions & 0 deletions src/ai-bundle/config/services.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
use Symfony\AI\Platform\Message\TemplateRenderer\TemplateRendererRegistry;
use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface;
use Symfony\AI\Platform\Serializer\StructuredOutputSerializer;
use Symfony\AI\Platform\Speech\SpeechListener;
use Symfony\AI\Platform\StructuredOutput\PlatformSubscriber;
use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactory;
use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactoryInterface;
Expand Down Expand Up @@ -263,5 +264,12 @@
tagged_locator('ai.message_store', 'name'),
])
->tag('console.command')

// listeners
->set('ai.speech.listener', SpeechListener::class)
->args([
tagged_iterator('ai.platform.speech', 'name'),
])
->tag('kernel.event_subscriber')
;
};
27 changes: 26 additions & 1 deletion src/ai-bundle/src/AiBundle.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
use Symfony\AI\Platform\Bridge\DeepSeek\PlatformFactory as DeepSeekPlatformFactory;
use Symfony\AI\Platform\Bridge\DockerModelRunner\PlatformFactory as DockerModelRunnerPlatformFactory;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsApiCatalog;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechPlatform;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory;
use Symfony\AI\Platform\Bridge\Gemini\PlatformFactory as GeminiPlatformFactory;
use Symfony\AI\Platform\Bridge\Generic\PlatformFactory as GenericPlatformFactory;
Expand All @@ -82,6 +83,7 @@
use Symfony\AI\Platform\Platform;
use Symfony\AI\Platform\PlatformInterface;
use Symfony\AI\Platform\ResultConverterInterface;
use Symfony\AI\Platform\Speech\SpeechPlatformInterface;
use Symfony\AI\Store\Bridge\AzureSearch\SearchStore as AzureSearchStore;
use Symfony\AI\Store\Bridge\Cache\Store as CacheStore;
use Symfony\AI\Store\Bridge\ChromaDb\Store as ChromaDbStore;
Expand Down Expand Up @@ -262,6 +264,12 @@ public function loadExtension(array $config, ContainerConfigurator $container, C
}
}

$speechPlatforms = array_keys($builder->findTaggedServiceIds('ai.platform.speech'));

if ([] === $speechPlatforms) {
$builder->removeDefinition('ai.speech.listener');
}

foreach ($config['vectorizer'] ?? [] as $vectorizerName => $vectorizer) {
$this->processVectorizerConfig($vectorizerName, $vectorizer, $builder);
}
Expand Down Expand Up @@ -491,6 +499,24 @@ private function processPlatformConfig(string $type, array $platform, ContainerB
$container->setDefinition('ai.platform.model_catalog.'.$type, $catalogDefinition);
}

if (\array_key_exists('speech', $platform) && [] !== $platform['speech']) {
$decoratedPlatform = new Definition(ElevenLabsSpeechPlatform::class);
$decoratedPlatform
->setLazy(true)
->setDecoratedService('ai.platform.'.$type)
->setArguments([
new Reference('.inner'),
$platform['speech'],
])
->addTag('proxy', ['interface' => PlatformInterface::class])
->addTag('proxy', ['interface' => SpeechPlatformInterface::class])
->addTag('ai.platform.speech', ['name' => $type])
;

$container->setDefinition('ai.platform.speech.'.$type, $decoratedPlatform);
$container->registerAliasForArgument('ai.platform.speech.'.$type, SpeechPlatformInterface::class, $type);
}

$definition = (new Definition(Platform::class))
->setFactory(ElevenLabsPlatformFactory::class.'::create')
->setLazy(true)
Expand Down Expand Up @@ -551,7 +577,6 @@ private function processPlatformConfig(string $type, array $platform, ContainerB
$config['api_key'],
new Reference($config['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE),
new Reference($config['model_catalog'], ContainerInterface::NULL_ON_INVALID_REFERENCE),
null,
new Reference('event_dispatcher'),
$config['supports_completions'],
$config['supports_embeddings'],
Expand Down
1 change: 0 additions & 1 deletion src/ai-bundle/src/Profiler/TraceablePlatform.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
namespace Symfony\AI\AiBundle\Profiler;

use Symfony\AI\Platform\Message\Content\File;
use Symfony\AI\Platform\Model;
use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface;
use Symfony\AI\Platform\PlatformInterface;
use Symfony\AI\Platform\Result\DeferredResult;
Expand Down
Loading
Loading