|
| 1 | +/** |
| 2 | + * Copyright (c) 2010-2024 Contributors to the openHAB project |
| 3 | + * |
| 4 | + * See the NOTICE file(s) distributed with this work for additional |
| 5 | + * information. |
| 6 | + * |
| 7 | + * This program and the accompanying materials are made available under the |
| 8 | + * terms of the Eclipse Public License 2.0 which is available at |
| 9 | + * http://www.eclipse.org/legal/epl-2.0 |
| 10 | + * |
| 11 | + * SPDX-License-Identifier: EPL-2.0 |
| 12 | + */ |
| 13 | +package org.openhab.voice.openaitts.internal; |
| 14 | + |
| 15 | +import static org.openhab.voice.openaitts.internal.OpenAITTSConstants.*; |
| 16 | + |
| 17 | +import java.util.Locale; |
| 18 | +import java.util.Map; |
| 19 | +import java.util.Set; |
| 20 | +import java.util.concurrent.ExecutionException; |
| 21 | +import java.util.concurrent.TimeUnit; |
| 22 | +import java.util.concurrent.TimeoutException; |
| 23 | +import java.util.stream.Collectors; |
| 24 | +import java.util.stream.Stream; |
| 25 | + |
| 26 | +import org.eclipse.jdt.annotation.NonNullByDefault; |
| 27 | +import org.eclipse.jdt.annotation.Nullable; |
| 28 | +import org.eclipse.jetty.client.HttpClient; |
| 29 | +import org.eclipse.jetty.client.api.ContentResponse; |
| 30 | +import org.eclipse.jetty.client.util.StringContentProvider; |
| 31 | +import org.eclipse.jetty.http.HttpMethod; |
| 32 | +import org.eclipse.jetty.http.HttpStatus; |
| 33 | +import org.openhab.core.audio.AudioFormat; |
| 34 | +import org.openhab.core.audio.AudioStream; |
| 35 | +import org.openhab.core.audio.ByteArrayAudioStream; |
| 36 | +import org.openhab.core.config.core.ConfigurableService; |
| 37 | +import org.openhab.core.config.core.Configuration; |
| 38 | +import org.openhab.core.io.net.http.HttpClientFactory; |
| 39 | +import org.openhab.core.voice.AbstractCachedTTSService; |
| 40 | +import org.openhab.core.voice.TTSCache; |
| 41 | +import org.openhab.core.voice.TTSException; |
| 42 | +import org.openhab.core.voice.TTSService; |
| 43 | +import org.openhab.core.voice.Voice; |
| 44 | +import org.osgi.framework.Constants; |
| 45 | +import org.osgi.service.component.annotations.Activate; |
| 46 | +import org.osgi.service.component.annotations.Component; |
| 47 | +import org.osgi.service.component.annotations.Modified; |
| 48 | +import org.osgi.service.component.annotations.Reference; |
| 49 | +import org.slf4j.Logger; |
| 50 | +import org.slf4j.LoggerFactory; |
| 51 | + |
| 52 | +import com.google.gson.Gson; |
| 53 | +import com.google.gson.JsonObject; |
| 54 | + |
| 55 | +/** |
| 56 | + * @author Artur Fedjukevits - Initial contribution |
| 57 | + * API documentation: https://platform.openai.com/docs/guides/text-to-speech |
| 58 | + */ |
| 59 | +@Component(configurationPid = TTS_SERVICE_PID, property = Constants.SERVICE_PID + "=" |
| 60 | + + TTS_SERVICE_PID, service = TTSService.class) |
| 61 | +@ConfigurableService(category = "voice", label = "OpenAI TTS Service", description_uri = "voice:" + TTS_SERVICE_ID) |
| 62 | + |
| 63 | +@NonNullByDefault |
| 64 | +public class OpenAITTSService extends AbstractCachedTTSService { |
| 65 | + |
| 66 | + private static final int REQUEST_TIMEOUT_MS = 10_000; |
| 67 | + private final Logger logger = LoggerFactory.getLogger(OpenAITTSService.class); |
| 68 | + private OpenAITTSConfiguration config = new OpenAITTSConfiguration(); |
| 69 | + private final HttpClient httpClient; |
| 70 | + private final Gson gson = new Gson(); |
| 71 | + private static final Set<Voice> VOICES = Stream.of("nova", "alloy", "echo", "fable", "onyx", "shimmer") |
| 72 | + .map(OpenAITTSVoice::new).collect(Collectors.toSet()); |
| 73 | + |
| 74 | + @Activate |
| 75 | + public OpenAITTSService(@Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache, |
| 76 | + Map<String, Object> config) { |
| 77 | + super(ttsCache); |
| 78 | + this.httpClient = httpClientFactory.getCommonHttpClient(); |
| 79 | + } |
| 80 | + |
| 81 | + @Activate |
| 82 | + protected void activate(Map<String, Object> config) { |
| 83 | + this.config = new Configuration(config).as(OpenAITTSConfiguration.class); |
| 84 | + } |
| 85 | + |
| 86 | + @Modified |
| 87 | + protected void modified(Map<String, Object> config) { |
| 88 | + this.config = new Configuration(config).as(OpenAITTSConfiguration.class); |
| 89 | + } |
| 90 | + |
| 91 | + @Override |
| 92 | + public Set<AudioFormat> getSupportedFormats() { |
| 93 | + return Set.of(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, 64000, 44100L)); |
| 94 | + } |
| 95 | + |
| 96 | + @Override |
| 97 | + public String getId() { |
| 98 | + return TTS_SERVICE_ID; |
| 99 | + } |
| 100 | + |
| 101 | + @Override |
| 102 | + public String getLabel(@Nullable Locale locale) { |
| 103 | + return "OpenAI TTS Service"; |
| 104 | + } |
| 105 | + |
| 106 | + @Override |
| 107 | + public Set<Voice> getAvailableVoices() { |
| 108 | + return VOICES; |
| 109 | + } |
| 110 | + |
| 111 | + /** |
| 112 | + * Synthesizes the given text to audio data using the OpenAI API |
| 113 | + * |
| 114 | + * @param text The text to synthesize |
| 115 | + * @param voice The voice to use |
| 116 | + * @param requestedFormat The requested audio format |
| 117 | + * @return The synthesized audio data |
| 118 | + * @throws TTSException If the synthesis fails |
| 119 | + */ |
| 120 | + @Override |
| 121 | + public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException { |
| 122 | + JsonObject content = new JsonObject(); |
| 123 | + content.addProperty("model", config.model); |
| 124 | + content.addProperty("input", text); |
| 125 | + content.addProperty("voice", voice.getLabel().toLowerCase()); |
| 126 | + content.addProperty("speed", config.speed); |
| 127 | + |
| 128 | + String queryJson = gson.toJson(content); |
| 129 | + |
| 130 | + try { |
| 131 | + ContentResponse response = httpClient.newRequest(config.apiUrl).method(HttpMethod.POST) |
| 132 | + .timeout(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS) |
| 133 | + .header("Authorization", "Bearer " + config.apiKey).header("Content-Type", "application/json") |
| 134 | + .content(new StringContentProvider(queryJson)).send(); |
| 135 | + |
| 136 | + if (response.getStatus() == HttpStatus.OK_200) { |
| 137 | + return new ByteArrayAudioStream(response.getContent(), requestedFormat); |
| 138 | + } else { |
| 139 | + logger.error("Request resulted in HTTP {} with message: {}", response.getStatus(), |
| 140 | + response.getReason()); |
| 141 | + throw new TTSException("Failed to generate audio data"); |
| 142 | + } |
| 143 | + } catch (InterruptedException | TimeoutException | ExecutionException e) { |
| 144 | + logger.error("Request to OpenAI failed: {}", e.getMessage(), e); |
| 145 | + throw new TTSException("Failed to generate audio data"); |
| 146 | + } |
| 147 | + } |
| 148 | +} |
0 commit comments