Skip to content

Commit 0a8a7b1

Browse files
committed
feat: add logic to synthesize audio
1 parent 3467267 commit 0a8a7b1

File tree

3 files changed

+137
-15
lines changed

3 files changed

+137
-15
lines changed

includes/Classifai/Features/TextToSpeech.php

+24
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use Classifai\Services\LanguageProcessing;
66
use Classifai\Providers\Azure\Speech;
77
use Classifai\Providers\OpenAI\TextToSpeech as OpenAITTS;
8+
use Classifai\Normalizer;
89
use WP_REST_Server;
910
use WP_REST_Request;
1011
use WP_Error;
@@ -44,6 +45,14 @@ class TextToSpeech extends Feature {
4445
*/
4546
const DISPLAY_GENERATED_AUDIO = '_classifai_display_generated_audio';
4647

48+
/**
49+
* Meta key to get/set the audio hash that helps to indicate if there is any need
50+
* for the audio file to be regenerated or not.
51+
*
52+
* @var string
53+
*/
54+
const AUDIO_HASH_KEY = '_classifai_post_audio_hash';
55+
4756
/**
4857
* Constructor.
4958
*/
@@ -802,6 +811,21 @@ public function get_audio_generation_subsequent_state( $post = null ): bool {
802811
return apply_filters( 'classifai_audio_generation_subsequent_state', false, get_post( $post ) );
803812
}
804813

814+
/**
815+
* Normalizes the post content for text to speech generation.
816+
*
817+
* @param int $post_id The post ID.
818+
*
819+
* @return string The normalized post content.
820+
*/
821+
public function normalize_post_content( int $post_id ): string {
822+
$normalizer = new Normalizer();
823+
$post = get_post( $post_id );
824+
$post_content = $normalizer->normalize_content( $post->post_content, $post->post_title, $post_id );
825+
826+
return $post_content;
827+
}
828+
805829
/**
806830
* Generates feature setting data required for migration from
807831
* ClassifAI < 3.0.0 to 3.0.0

includes/Classifai/Providers/Azure/Speech.php

+3-14
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
namespace Classifai\Providers\Azure;
77

88
use Classifai\Providers\Provider;
9-
use Classifai\Normalizer;
109
use Classifai\Features\TextToSpeech;
1110
use stdClass;
1211
use WP_Http;
@@ -30,14 +29,6 @@ class Speech extends Provider {
3029
*/
3130
const API_PATH = 'cognitiveservices/v1';
3231

33-
/**
34-
* Meta key to get/set the audio hash that helps to indicate if there is any need
35-
* for the audio file to be regenerated or not.
36-
*
37-
* @var string
38-
*/
39-
const AUDIO_HASH_KEY = '_classifai_post_audio_hash';
40-
4132
/**
4233
* Azure Text to Speech constructor.
4334
*
@@ -337,12 +328,10 @@ public function synthesize_speech( int $post_id ) {
337328
);
338329
}
339330

340-
$normalizer = new Normalizer();
341331
$feature = new TextToSpeech();
342332
$settings = $feature->get_settings();
343-
$post = get_post( $post_id );
344-
$post_content = $normalizer->normalize_content( $post->post_content, $post->post_title, $post_id );
345-
$content_hash = get_post_meta( $post_id, self::AUDIO_HASH_KEY, true );
333+
$post_content = $feature->normalize_post_content( $post_id );
334+
$content_hash = get_post_meta( $post_id, TextToSpeech::AUDIO_HASH_KEY, true );
346335
$saved_attachment_id = (int) get_post_meta( $post_id, $feature::AUDIO_ID_KEY, true );
347336

348337
// Don't regenerate the audio file it it already exists and the content hasn't changed.
@@ -415,7 +404,7 @@ public function synthesize_speech( int $post_id ) {
415404
);
416405
}
417406

418-
update_post_meta( $post_id, self::AUDIO_HASH_KEY, md5( $post_content ) );
407+
update_post_meta( $post_id, TextToSpeech::AUDIO_HASH_KEY, md5( $post_content ) );
419408

420409
return $response_body;
421410
}

includes/Classifai/Providers/OpenAI/TextToSpeech.php

+110-1
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@
77

88
use Classifai\Providers\Provider;
99
use Classifai\Features\TextToSpeech as FeatureTextToSpeech;
10+
use WP_Error;
1011

1112
class TextToSpeech extends Provider {
1213
use OpenAI;
1314

14-
const ID = 'openai_text-to-speech';
15+
const ID = 'openai_text_to_speech';
1516

1617
/**
1718
* OpenAI Text to Speech URL.
@@ -209,4 +210,112 @@ public function sanitize_settings( array $new_settings ): array {
209210

210211
return $new_settings;
211212
}
213+
214+
/**
215+
* Common entry point for all REST endpoints for this provider.
216+
*
217+
* @param int $post_id The post ID we're processing.
218+
* @param string $route_to_call The name of the route we're going to be processing.
219+
* @param array $args Optional arguments to pass to the route.
220+
* @return array|string|WP_Error
221+
*/
222+
public function rest_endpoint_callback( $post_id, string $route_to_call = '', array $args = [] ) {
223+
if ( ! $post_id || ! get_post( $post_id ) ) {
224+
return new WP_Error( 'post_id_required', esc_html__( 'A valid post ID is required.', 'classifai' ) );
225+
}
226+
227+
$route_to_call = strtolower( $route_to_call );
228+
$return = '';
229+
230+
// Handle all of our routes.
231+
switch ( $route_to_call ) {
232+
case 'synthesize':
233+
$return = $this->synthesize_speech( $post_id, $args );
234+
break;
235+
}
236+
237+
return $return;
238+
}
239+
240+
/**
241+
* Synthesizes speech from a post item.
242+
*
243+
* @param int $post_id Post ID.
244+
* @return string|WP_Error
245+
*/
246+
public function synthesize_speech( int $post_id ) {
247+
if ( empty( $post_id ) ) {
248+
return new WP_Error(
249+
'openai_text_to_speech_post_id_missing',
250+
esc_html__( 'Post ID missing.', 'classifai' )
251+
);
252+
}
253+
254+
// We skip the user cap check if running under WP-CLI.
255+
if ( ! current_user_can( 'edit_post', $post_id ) && ( ! defined( 'WP_CLI' ) || ! WP_CLI ) ) {
256+
return new WP_Error(
257+
'openai_text_to_speech_user_not_authorized',
258+
esc_html__( 'Unauthorized user.', 'classifai' )
259+
);
260+
}
261+
262+
$feature = new FeatureTextToSpeech();
263+
$settings = $feature->get_settings();
264+
$post_content = $feature->normalize_post_content( $post_id );
265+
$content_hash = get_post_meta( $post_id, FeatureTextToSpeech::AUDIO_HASH_KEY, true );
266+
$saved_attachment_id = (int) get_post_meta( $post_id, $feature::AUDIO_ID_KEY, true );
267+
268+
// Don't regenerate the audio file it it already exists and the content hasn't changed.
269+
if ( $saved_attachment_id ) {
270+
271+
// Check if the audio file exists.
272+
$audio_attachment_url = wp_get_attachment_url( $saved_attachment_id );
273+
274+
if ( $audio_attachment_url && ! empty( $content_hash ) && ( md5( $post_content ) === $content_hash ) ) {
275+
return $saved_attachment_id;
276+
}
277+
}
278+
279+
// Create the request body to synthesize speech from text.
280+
$request_body = array(
281+
'model' => $settings[ static::ID ]['tts_model'],
282+
'voice' => $settings[ static::ID ]['voice'],
283+
'input' => $post_content,
284+
);
285+
286+
// Request parameters.
287+
$request_params = array(
288+
'method' => 'POST',
289+
'body' => wp_json_encode( $request_body ),
290+
'timeout' => 60, // phpcs:ignore WordPressVIPMinimum.Performance.RemoteRequestTimeout.timeout_timeout
291+
'headers' => array(
292+
'Authorization' => 'Bearer ' . $settings[ static::ID ]['api_key'],
293+
'Content-Type' => 'application/json',
294+
),
295+
);
296+
297+
$response = wp_remote_post( $this->api_url, $request_params );
298+
299+
if ( is_wp_error( $response ) ) {
300+
return new WP_Error(
301+
'openai_text_to_speech_http_error',
302+
esc_html( $response->get_error_message() )
303+
);
304+
}
305+
306+
$code = wp_remote_retrieve_response_code( $response );
307+
$response_body = wp_remote_retrieve_body( $response );
308+
309+
// return error if HTTP status code is not 200.
310+
if ( \WP_Http::OK !== $code ) {
311+
return new WP_Error(
312+
'openai_text_to_speech_unsuccessful_request',
313+
esc_html__( 'HTTP request unsuccessful.', 'classifai' )
314+
);
315+
}
316+
317+
update_post_meta( $post_id, FeatureTextToSpeech::AUDIO_HASH_KEY, md5( $post_content ) );
318+
319+
return $response_body;
320+
}
212321
}

0 commit comments

Comments
 (0)