Initial commit
This commit is contained in:
360
backends/text-to-speech/emscripten/emscripten-text-to-speech.cpp
Normal file
360
backends/text-to-speech/emscripten/emscripten-text-to-speech.cpp
Normal file
@@ -0,0 +1,360 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
// Disable symbol overrides so that we can use system headers.
|
||||
#define FORBIDDEN_SYMBOL_EXCEPTION_FILE
|
||||
#define FORBIDDEN_SYMBOL_EXCEPTION_getenv
|
||||
|
||||
#include "backends/text-to-speech/emscripten/emscripten-text-to-speech.h"
|
||||
|
||||
#if defined(USE_TTS) && defined(EMSCRIPTEN)
|
||||
#include <emscripten.h>
|
||||
|
||||
#include "common/config-manager.h"
|
||||
#include "common/system.h"
|
||||
#include "common/translation.h"
|
||||
#include "common/ustr.h"
|
||||
#include "common/debug.h"
|
||||
|
||||
EM_JS(void, ttsInit, (), {
|
||||
/*
|
||||
* Voices can come from the browser, the operating system or cloud services. This means we sometimes get
|
||||
* an incomplete or empty list on first call getVoices().
|
||||
* Best practice is to listen to the 'voiceschanged' event and update the list of voices when it fires.
|
||||
*/
|
||||
globalThis['ttsVoiceMap'] = {};
|
||||
globalThis['ttsUtteranceQueue'] = [];
|
||||
const refreshVoices = () => {
|
||||
globalThis['ttsVoiceMap'] = {};
|
||||
var cnt = 0;
|
||||
voices = window.speechSynthesis.getVoices();
|
||||
Array.from(voices).forEach((voice) => {
|
||||
if (!(voice.lang in globalThis['ttsVoiceMap'])) {
|
||||
globalThis['ttsVoiceMap'][voice.lang] = {};
|
||||
}
|
||||
globalThis['ttsVoiceMap'][voice.lang][voice.name] = voice;
|
||||
cnt++;
|
||||
});
|
||||
console.log("Found %d voices",cnt);
|
||||
};
|
||||
|
||||
if ('onvoiceschanged' in speechSynthesis) {
|
||||
speechSynthesis.onvoiceschanged = refreshVoices;
|
||||
}
|
||||
refreshVoices();
|
||||
});
|
||||
|
||||
EmscriptenTextToSpeechManager::EmscriptenTextToSpeechManager() {
|
||||
ttsInit();
|
||||
#ifdef USE_TRANSLATION
|
||||
setLanguage(TransMan.getCurrentLanguage());
|
||||
#else
|
||||
setLanguage("en");
|
||||
#endif
|
||||
}
|
||||
|
||||
EmscriptenTextToSpeechManager::~EmscriptenTextToSpeechManager() {
|
||||
stop();
|
||||
}
|
||||
|
||||
|
||||
EM_JS(bool, _ttsSay, (const char *text, const char *voice_name, const char *voice_lang, int pitch, int rate, int volume, int action), {
|
||||
voice_name = UTF8ToString(voice_name);
|
||||
voice_lang = UTF8ToString(voice_lang);
|
||||
if (!(voice_lang in globalThis['ttsVoiceMap'] && voice_name in globalThis['ttsVoiceMap'][voice_lang])){
|
||||
console.error("_ttsSay: Voice not found");
|
||||
return false;
|
||||
}
|
||||
text = UTF8ToString(text);
|
||||
if (text === "") {
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Possible actions are:
|
||||
* INTERRUPT - interrupts the current speech
|
||||
* INTERRUPT_NO_REPEAT - interrupts the speech (deletes the whole queue),
|
||||
* if the str is the same as the string currently being said,
|
||||
* it lets the current string finish.
|
||||
* QUEUE - queues the speech
|
||||
* QUEUE_NO_REPEAT - queues the speech only if the str is different than
|
||||
* the last string in the queue (or the string, that is currently
|
||||
* being said if the queue is empty)
|
||||
* DROP - does nothing if there is anything being said at the moment
|
||||
*/
|
||||
const Actions = Object.freeze({
|
||||
INTERRUPT: 0,
|
||||
INTERRUPT_NO_REPEAT: 1,
|
||||
QUEUE: 2,
|
||||
QUEUE_NO_REPEAT: 3
|
||||
});
|
||||
console.assert(action <= 3,"_ttsSay: Illegal Action: %d",action);// DROP is handled on the native side so we should only have 0-3.
|
||||
|
||||
if (action == Actions.QUEUE_NO_REPEAT &&
|
||||
globalThis['ttsUtteranceQueue'].length > 0 && globalThis['ttsUtteranceQueue'][globalThis['ttsUtteranceQueue'].length-1].text == text) {
|
||||
console.debug("_ttsSay: Skipping duplicate utterance (QUEUE_NO_REPEAT)");
|
||||
return false;
|
||||
}
|
||||
// INTERRUPT_NO_REPEAT with a matching string - empty queue but let the current string finish
|
||||
if (action == Actions.INTERRUPT_NO_REPEAT && globalThis['ttsUtteranceQueue'].length > 0 && globalThis['ttsUtteranceQueue'][0].text == text){
|
||||
globalThis['ttsUtteranceQueue'] = globalThis['ttsUtteranceQueue'].slice(0,1);
|
||||
return false;
|
||||
}
|
||||
// interrupt or INTERRUPT_NO_REPEAT with a non-matching string (or no string talking) - empty queue, cancel all talking
|
||||
if (action == Actions.INTERRUPT || action == Actions.INTERRUPT_NO_REPEAT ) {
|
||||
globalThis['ttsUtteranceQueue'] = [];//globalThis['ttsUtteranceQueue'].slice(0,1);
|
||||
window.speechSynthesis.cancel();
|
||||
|
||||
}
|
||||
// queue and speak next utterance
|
||||
voice = globalThis['ttsVoiceMap'][voice_lang][voice_name];
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
utterance.onend = function(event) { // this is triggered when an utterance completes speaking
|
||||
if (globalThis['ttsUtteranceQueue'][0] == event.target){
|
||||
globalThis['ttsUtteranceQueue'].shift(); //remove utterance that was just spoken
|
||||
}
|
||||
if (globalThis['ttsUtteranceQueue'].length > 0 && !window.speechSynthesis.speaking){ // speak next utterance if nothing is being spoken
|
||||
window.speechSynthesis.speak(globalThis['ttsUtteranceQueue'][0]);
|
||||
}
|
||||
};
|
||||
utterance.onerror = function(event) { // this includes canceled utterances (so not just errors)
|
||||
if (globalThis['ttsUtteranceQueue'][0] == event.target){
|
||||
globalThis['ttsUtteranceQueue'].shift(); //remove utterance that was just spoken
|
||||
}
|
||||
if (globalThis['ttsUtteranceQueue'].length > 0 && !window.speechSynthesis.speaking){ // speak next utterance if nothing is being spoken
|
||||
window.speechSynthesis.speak(globalThis['ttsUtteranceQueue'][0]);
|
||||
}
|
||||
};
|
||||
/*
|
||||
* TODO: we could do INTERRUPT_NO_REPEAT and INTERRUPT handling on boundaries, but it's not reliable
|
||||
* remote voices don't have onboundary event: https://issues.chromium.org/issues/41195426
|
||||
*
|
||||
* utterance.onboundary = function(event){
|
||||
* console.log(event);
|
||||
* };
|
||||
*/
|
||||
utterance.voice = voice;
|
||||
utterance.volume = volume / 100; // linearly adjust 0 to 100 -> 0 to 1
|
||||
utterance.pitch = (pitch + 100) / 100; // linearly adjust -100 to 100 (0 default) -> 0 to 2 (1 default)
|
||||
utterance.rate = rate > 0 ? 1 + (rate / (100 - 9)) : 0.1 + (rate + 100) / (100 / 0.9); // linearly adjust -100 to 100 (0 default) -> 0.1 to 10 (1 default)
|
||||
|
||||
console.debug("Pushing to queue: %s",text);
|
||||
globalThis['ttsUtteranceQueue'].push(utterance);
|
||||
if (globalThis['ttsUtteranceQueue'].length == 1){
|
||||
console.debug("Speaking %s",text);
|
||||
window.speechSynthesis.speak(utterance);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
bool EmscriptenTextToSpeechManager::say(const Common::U32String &str, Action action) {
|
||||
assert(_ttsState->_enabled);
|
||||
|
||||
Common::String strUtf8 = str.encode();
|
||||
debug(5, "Saying %s (%d)", strUtf8.c_str(), action);
|
||||
|
||||
if (isSpeaking() && action == DROP) {
|
||||
debug(5, "EmscriptenTextToSpeechManager::say - Not saying '%s' as action=DROP and already speaking", strUtf8.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
char *voice_name = ((char **)_ttsState->_availableVoices[_ttsState->_activeVoice].getData())[0];
|
||||
char *voice_lang = ((char **)_ttsState->_availableVoices[_ttsState->_activeVoice].getData())[1];
|
||||
return _ttsSay(strUtf8.c_str(), voice_name, voice_lang, _ttsState->_pitch, _ttsState->_rate, _ttsState->_volume, action);
|
||||
}
|
||||
|
||||
EM_JS(char **, _ttsGetVoices, (), {
|
||||
voices = Array.from(Object.values(globalThis['ttsVoiceMap'])).map(Object.values).flat() // flatten voice map
|
||||
.sort((a,b) => a.default === b.default ? a.name.localeCompare(b.name):a.default?-1:1) // first default, then alphabetically
|
||||
.map(voice=>[voice.name,voice.lang])
|
||||
.flat();
|
||||
voices.push(""); // we need this to find the end of the array on the native side.
|
||||
|
||||
// convert the strings to C strings
|
||||
var c_strings = voices.map((s) => {
|
||||
var size = lengthBytesUTF8(s) + 1;
|
||||
var ret = Module._malloc(size);
|
||||
stringToUTF8Array(s, HEAP8, ret, size);
|
||||
return ret;
|
||||
});
|
||||
|
||||
var ret_arr = Module._malloc(c_strings.length * 4); // 4-bytes per pointer
|
||||
c_strings.forEach((ptr, i) => { Module.setValue(ret_arr + i * 4, ptr, "i32"); }); // populate return array
|
||||
return ret_arr;
|
||||
});
|
||||
|
||||
void EmscriptenTextToSpeechManager::updateVoices() {
|
||||
_ttsState->_availableVoices.clear();
|
||||
char **ttsVoices = _ttsGetVoices();
|
||||
char **iter = ttsVoices;
|
||||
Common::Array<char *> names;
|
||||
while (strcmp(*iter, "") != 0) {
|
||||
char *c_name = *iter++;
|
||||
char *c_lang = *iter++;
|
||||
Common::String language = Common::String(c_lang);
|
||||
if (_ttsState->_language == language.substr(0, 2)) {
|
||||
int idx = -1;
|
||||
for (int i = 0; i < names.size(); i++) {
|
||||
if (strcmp(names[i], c_name) == 0) {
|
||||
idx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
names.push_back(c_name);
|
||||
Common::String name;
|
||||
// some systems have the same voice multiple times for the same language (e.g. en-US and en-GB),
|
||||
// in that case we should add the locale to the name
|
||||
if (idx == -1) {
|
||||
name = Common::String(c_name);
|
||||
} else {
|
||||
name = Common::String::format("%s (%s)", c_name, language.substr(3, 2).c_str());
|
||||
// some systems have identical name/language/locale pairs multiple times (seems a bug), we just skip that case (e.g. macOS Safari for "Samantha (en_US)" )
|
||||
char *other_name = ((char **)_ttsState->_availableVoices[idx].getData())[0];
|
||||
char *other_lang = ((char **)_ttsState->_availableVoices[idx].getData())[1];
|
||||
Common::String other_new = Common::String::format("%s (%s)", other_name, Common::String(other_lang).substr(3, 2).c_str());
|
||||
if (other_new == name) {
|
||||
warning("Skipping duplicate voice %s %s", c_name, c_lang);
|
||||
continue;
|
||||
} else {
|
||||
warning("Adding duplicate voice %s %s", _ttsState->_availableVoices[idx].getDescription().c_str(), name.c_str());
|
||||
_ttsState->_availableVoices[idx].setDescription(other_new);
|
||||
}
|
||||
}
|
||||
char **data_p = new char *[] { c_name, c_lang };
|
||||
Common::TTSVoice voice(Common::TTSVoice::UNKNOWN_GENDER, Common::TTSVoice::UNKNOWN_AGE, (void *)data_p, name);
|
||||
_ttsState->_availableVoices.push_back(voice);
|
||||
}
|
||||
}
|
||||
free(ttsVoices);
|
||||
|
||||
if (_ttsState->_availableVoices.empty()) {
|
||||
warning("No voice is available for language: %s", _ttsState->_language.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
EM_JS(void, _ttsStop, (), {
|
||||
window.speechSynthesis.cancel();
|
||||
});
|
||||
|
||||
bool EmscriptenTextToSpeechManager::stop() {
|
||||
_ttsStop();
|
||||
return true;
|
||||
}
|
||||
|
||||
EM_ASYNC_JS(void, _ttsPause, (), {
|
||||
if(window.speechSynthesis.paused){
|
||||
} else if(window.speechSynthesis.speaking && globalThis['ttsUtteranceQueue'].length > 0){
|
||||
// browsers don't pause immediately, so we have to wait for the pause event if there's something being spoken
|
||||
await (async () => {
|
||||
return new Promise((resolve, reject) => {
|
||||
setTimeout(() => { resolve(); }, 300);
|
||||
globalThis['ttsUtteranceQueue'][0].onpause = (event) =>{ resolve(event)};
|
||||
window.speechSynthesis.pause();
|
||||
});
|
||||
})();
|
||||
} else {
|
||||
assert(globalThis['ttsUtteranceQueue'].length == 0);
|
||||
window.speechSynthesis.pause();
|
||||
}
|
||||
return;
|
||||
});
|
||||
|
||||
bool EmscriptenTextToSpeechManager::pause() {
|
||||
if (isPaused())
|
||||
return false;
|
||||
_ttsPause();
|
||||
return true;
|
||||
}
|
||||
|
||||
EM_JS(void, _ttsResume, (), {
|
||||
window.speechSynthesis.resume();
|
||||
});
|
||||
|
||||
bool EmscriptenTextToSpeechManager::resume() {
|
||||
if (!isPaused())
|
||||
return false;
|
||||
_ttsResume();
|
||||
return true;
|
||||
}
|
||||
|
||||
EM_JS(bool, _ttsIsSpeaking, (), {
|
||||
return window.speechSynthesis.speaking;
|
||||
});
|
||||
|
||||
bool EmscriptenTextToSpeechManager::isSpeaking() {
|
||||
return _ttsIsSpeaking();
|
||||
}
|
||||
|
||||
EM_JS(bool, _ttsIsPaused, (), {
|
||||
console.debug("_ttsIsPaused: Checking if speech synthesis is paused %s",window.speechSynthesis.paused ? "true" : "false");
|
||||
return window.speechSynthesis.paused;
|
||||
});
|
||||
|
||||
bool EmscriptenTextToSpeechManager::isPaused() {
|
||||
return _ttsIsPaused();
|
||||
}
|
||||
|
||||
bool EmscriptenTextToSpeechManager::isReady() {
|
||||
if (_ttsState->_availableVoices.empty())
|
||||
return false;
|
||||
if (!isPaused() && !isSpeaking())
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
void EmscriptenTextToSpeechManager::setVoice(unsigned index) {
|
||||
assert(!_ttsState->_enabled || index < _ttsState->_availableVoices.size());
|
||||
_ttsState->_activeVoice = index;
|
||||
return;
|
||||
}
|
||||
|
||||
void EmscriptenTextToSpeechManager::setRate(int rate) {
|
||||
assert(rate >= -100 && rate <= 100);
|
||||
_ttsState->_rate = rate;
|
||||
}
|
||||
|
||||
void EmscriptenTextToSpeechManager::setPitch(int pitch) {
|
||||
assert(pitch >= -100 && pitch <= 100);
|
||||
_ttsState->_pitch = pitch;
|
||||
}
|
||||
|
||||
void EmscriptenTextToSpeechManager::setVolume(unsigned volume) {
|
||||
assert(volume <= 100);
|
||||
_ttsState->_volume = volume;
|
||||
}
|
||||
|
||||
void EmscriptenTextToSpeechManager::setLanguage(Common::String language) {
|
||||
debug(5, "EmscriptenTextToSpeechManager::setLanguage to %s", language.c_str());
|
||||
if (_ttsState->_language != language.substr(0, 2) || _ttsState->_availableVoices.empty()) {
|
||||
debug(5, "EmscriptenTextToSpeechManager::setLanguage - Update voices");
|
||||
updateVoices();
|
||||
setVoice(0);
|
||||
}
|
||||
Common::TextToSpeechManager::setLanguage(language);
|
||||
}
|
||||
|
||||
void EmscriptenTextToSpeechManager::freeVoiceData(void *data) {
|
||||
free(((char **)data)[0]);
|
||||
free(((char **)data)[1]);
|
||||
free(data);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,69 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef BACKENDS_TEXT_TO_SPEECH_EMSCRIPTEN_H
|
||||
#define BACKENDS_TEXT_TO_SPEECH_EMSCRIPTEN_H
|
||||
|
||||
#include "common/scummsys.h"
|
||||
|
||||
#if defined(USE_TTS) && defined(EMSCRIPTEN)
|
||||
|
||||
#include "common/list.h"
|
||||
#include "common/str.h"
|
||||
#include "common/text-to-speech.h"
|
||||
#include "common/ustr.h"
|
||||
|
||||
class EmscriptenTextToSpeechManager final : public Common::TextToSpeechManager {
|
||||
public:
|
||||
EmscriptenTextToSpeechManager();
|
||||
~EmscriptenTextToSpeechManager() override;
|
||||
|
||||
bool say(const Common::U32String &str, Action action) override;
|
||||
|
||||
bool stop() override;
|
||||
bool pause() override;
|
||||
bool resume() override;
|
||||
|
||||
bool isSpeaking() override;
|
||||
bool isPaused() override;
|
||||
bool isReady() override;
|
||||
|
||||
void setVoice(unsigned index) override;
|
||||
|
||||
void setRate(int rate) override;
|
||||
|
||||
void setPitch(int pitch) override;
|
||||
|
||||
void setVolume(unsigned volume) override;
|
||||
|
||||
void setLanguage(Common::String language) override;
|
||||
|
||||
void freeVoiceData(void *data) override;
|
||||
|
||||
void updateVoicesPublic() { updateVoices(); };
|
||||
|
||||
private:
|
||||
void updateVoices() override;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // BACKENDS_TEXT_TO_SPEECH_EMSCRIPTEN_H
|
||||
368
backends/text-to-speech/linux/linux-text-to-speech.cpp
Normal file
368
backends/text-to-speech/linux/linux-text-to-speech.cpp
Normal file
@@ -0,0 +1,368 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
// Disable symbol overrides so that we can use system headers.
|
||||
#define FORBIDDEN_SYMBOL_ALLOW_ALL
|
||||
|
||||
#include "backends/text-to-speech/linux/linux-text-to-speech.h"
|
||||
|
||||
#if defined(USE_TTS) && defined(USE_SPEECH_DISPATCHER) && defined(POSIX)
|
||||
#include <speech-dispatcher/libspeechd.h>
|
||||
|
||||
#include "common/translation.h"
|
||||
#include "common/system.h"
|
||||
#include "common/ustr.h"
|
||||
#include "common/config-manager.h"
|
||||
|
||||
SPDConnection *_connection;
|
||||
|
||||
void speech_begin_callback(size_t msg_id, size_t client_id, SPDNotificationType state){
|
||||
SpeechDispatcherManager *manager =
|
||||
static_cast<SpeechDispatcherManager *> (g_system->getTextToSpeechManager());
|
||||
manager->updateState(SpeechDispatcherManager::SPEECH_BEGUN);
|
||||
}
|
||||
|
||||
void speech_end_callback(size_t msg_id, size_t client_id, SPDNotificationType state){
|
||||
SpeechDispatcherManager *manager =
|
||||
static_cast<SpeechDispatcherManager *> (g_system->getTextToSpeechManager());
|
||||
manager->updateState(SpeechDispatcherManager::SPEECH_ENDED);
|
||||
}
|
||||
|
||||
void speech_cancel_callback(size_t msg_id, size_t client_id, SPDNotificationType state){
|
||||
SpeechDispatcherManager *manager =
|
||||
static_cast<SpeechDispatcherManager *> (g_system->getTextToSpeechManager());
|
||||
manager->updateState(SpeechDispatcherManager::SPEECH_CANCELED);
|
||||
}
|
||||
|
||||
void speech_resume_callback(size_t msg_id, size_t client_id, SPDNotificationType state){
|
||||
SpeechDispatcherManager *manager =
|
||||
static_cast<SpeechDispatcherManager *> (g_system->getTextToSpeechManager());
|
||||
manager->updateState(SpeechDispatcherManager::SPEECH_RESUMED);
|
||||
}
|
||||
|
||||
void speech_pause_callback(size_t msg_id, size_t client_id, SPDNotificationType state){
|
||||
SpeechDispatcherManager *manager =
|
||||
static_cast<SpeechDispatcherManager *> (g_system->getTextToSpeechManager());
|
||||
manager->updateState(SpeechDispatcherManager::SPEECH_PAUSED);
|
||||
}
|
||||
|
||||
|
||||
void *SpeechDispatcherManager::startSpeech(void *p) {
|
||||
StartSpeechParams *params = (StartSpeechParams *) p;
|
||||
pthread_mutex_lock(params->mutex);
|
||||
if (!_connection || g_system->getTextToSpeechManager()->isPaused() ||
|
||||
params->speechQueue->front().empty()) {
|
||||
pthread_mutex_unlock(params->mutex);
|
||||
return NULL;
|
||||
}
|
||||
if (spd_say(_connection, SPD_MESSAGE, params->speechQueue->front().c_str()) == -1) {
|
||||
// close the connection
|
||||
if (_connection != 0) {
|
||||
spd_close(_connection);
|
||||
_connection = 0;
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(params->mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SpeechDispatcherManager::SpeechDispatcherManager()
|
||||
: _speechState(READY) {
|
||||
pthread_mutex_init(&_speechMutex, NULL);
|
||||
_params.mutex = &_speechMutex;
|
||||
_params.speechQueue = &_speechQueue;
|
||||
_threadCreated = false;
|
||||
init();
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::init() {
|
||||
_connection = spd_open("ScummVM", "main", NULL, SPD_MODE_THREADED);
|
||||
if (_connection == 0) {
|
||||
_speechState = BROKEN;
|
||||
warning("Couldn't initialize text to speech through speech-dispatcher");
|
||||
return;
|
||||
}
|
||||
|
||||
_connection->callback_begin = speech_begin_callback;
|
||||
spd_set_notification_on(_connection, SPD_BEGIN);
|
||||
_connection->callback_end = speech_end_callback;
|
||||
spd_set_notification_on(_connection, SPD_END);
|
||||
_connection->callback_cancel = speech_cancel_callback;
|
||||
spd_set_notification_on(_connection, SPD_CANCEL);
|
||||
_connection->callback_resume = speech_resume_callback;
|
||||
spd_set_notification_on(_connection, SPD_RESUME);
|
||||
_connection->callback_pause = speech_pause_callback;
|
||||
spd_set_notification_on(_connection, SPD_PAUSE);
|
||||
|
||||
updateVoices();
|
||||
_ttsState->_activeVoice = 0;
|
||||
#ifdef USE_TRANSLATION
|
||||
setLanguage(TransMan.getCurrentLanguage());
|
||||
#else
|
||||
setLanguage("en");
|
||||
#endif
|
||||
_speechQueue.clear();
|
||||
}
|
||||
|
||||
SpeechDispatcherManager::~SpeechDispatcherManager() {
|
||||
stop();
|
||||
|
||||
clearState();
|
||||
|
||||
if (_connection != 0)
|
||||
spd_close(_connection);
|
||||
if (_threadCreated)
|
||||
pthread_join(_thread, NULL);
|
||||
pthread_mutex_destroy(&_speechMutex);
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::updateState(SpeechDispatcherManager::SpeechEvent event) {
|
||||
if (_speechState == BROKEN)
|
||||
return;
|
||||
switch(event) {
|
||||
case SPEECH_ENDED:
|
||||
pthread_mutex_lock(&_speechMutex);
|
||||
_speechQueue.pop_front();
|
||||
if (_speechQueue.empty())
|
||||
_speechState = READY;
|
||||
else {
|
||||
// reinitialize if needed
|
||||
if (!_connection)
|
||||
init();
|
||||
if (_speechState != BROKEN) {
|
||||
if (_threadCreated)
|
||||
pthread_join(_thread, NULL);
|
||||
_threadCreated = true;
|
||||
if (pthread_create(&_thread, NULL, startSpeech, &_params)) {
|
||||
_threadCreated = false;
|
||||
warning("TTS: Cannot start new speech");
|
||||
}
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&_speechMutex);
|
||||
break;
|
||||
case SPEECH_PAUSED:
|
||||
_speechState = PAUSED;
|
||||
break;
|
||||
case SPEECH_CANCELED:
|
||||
if (_speechState != PAUSED) {
|
||||
_speechState = READY;
|
||||
}
|
||||
break;
|
||||
case SPEECH_RESUMED:
|
||||
break;
|
||||
case SPEECH_BEGUN:
|
||||
_speechState = SPEAKING;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool SpeechDispatcherManager::say(const Common::U32String &str, Action action) {
|
||||
|
||||
pthread_mutex_lock(&_speechMutex);
|
||||
// reinitialize if needed
|
||||
if (!_connection)
|
||||
init();
|
||||
|
||||
if (_speechState == BROKEN) {
|
||||
pthread_mutex_unlock(&_speechMutex);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (action == DROP && isSpeaking()) {
|
||||
pthread_mutex_unlock(&_speechMutex);
|
||||
return true;
|
||||
}
|
||||
|
||||
Common::String strUtf8 = str.encode();
|
||||
|
||||
if (!_speechQueue.empty() && action == INTERRUPT_NO_REPEAT &&
|
||||
_speechQueue.front() == strUtf8 && isSpeaking()) {
|
||||
_speechQueue.clear();
|
||||
_speechQueue.push_back(strUtf8);
|
||||
pthread_mutex_unlock(&_speechMutex);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!_speechQueue.empty() && action == QUEUE_NO_REPEAT &&
|
||||
_speechQueue.back() == strUtf8 && isSpeaking()) {
|
||||
pthread_mutex_unlock(&_speechMutex);
|
||||
return true;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&_speechMutex);
|
||||
if (isSpeaking() && (action == INTERRUPT || action == INTERRUPT_NO_REPEAT))
|
||||
stop();
|
||||
if (!strUtf8.empty()) {
|
||||
pthread_mutex_lock(&_speechMutex);
|
||||
_speechQueue.push_back(strUtf8);
|
||||
pthread_mutex_unlock(&_speechMutex);
|
||||
if (isReady()) {
|
||||
_speechState = SPEAKING;
|
||||
startSpeech((void *)(&_params));
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpeechDispatcherManager::stop() {
|
||||
if (_speechState == READY || _speechState == BROKEN)
|
||||
return true;
|
||||
_speechState = READY;
|
||||
pthread_mutex_lock(&_speechMutex);
|
||||
_speechQueue.clear();
|
||||
bool result = spd_cancel(_connection) == -1;
|
||||
pthread_mutex_unlock(&_speechMutex);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool SpeechDispatcherManager::pause() {
|
||||
if (_speechState == READY || _speechState == PAUSED || _speechState == BROKEN)
|
||||
return true;
|
||||
pthread_mutex_lock(&_speechMutex);
|
||||
_speechState = PAUSED;
|
||||
bool result = spd_cancel_all(_connection) == -1;
|
||||
pthread_mutex_unlock(&_speechMutex);
|
||||
if (result)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpeechDispatcherManager::resume() {
|
||||
if (_speechState == READY || _speechState == SPEAKING || _speechState == BROKEN)
|
||||
return true;
|
||||
// If there is a thread from before pause() waiting, let it finish (it shouln't
|
||||
// do anything). There shouldn't be any other threads getting created,
|
||||
// because the speech is paused, so we don't need to synchronize
|
||||
if (_threadCreated) {
|
||||
pthread_join(_thread, NULL);
|
||||
_threadCreated = false;
|
||||
}
|
||||
_speechState = PAUSED;
|
||||
if (!_speechQueue.empty()) {
|
||||
_speechState = SPEAKING;
|
||||
startSpeech((void *) &_params);
|
||||
}
|
||||
else
|
||||
_speechState = READY;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpeechDispatcherManager::isSpeaking() {
|
||||
return _speechState == SPEAKING;
|
||||
}
|
||||
|
||||
bool SpeechDispatcherManager::isPaused() {
|
||||
return _speechState == PAUSED;
|
||||
}
|
||||
|
||||
bool SpeechDispatcherManager::isReady() {
|
||||
return _speechState == READY;
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::setVoice(unsigned index) {
|
||||
if (_speechState == BROKEN)
|
||||
return;
|
||||
assert(index < _ttsState->_availableVoices.size());
|
||||
Common::TTSVoice voice = _ttsState->_availableVoices[index];
|
||||
spd_set_voice_type(_connection, *(SPDVoiceType *)(voice.getData()));
|
||||
_ttsState->_activeVoice = index;
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::setRate(int rate) {
|
||||
if (_speechState == BROKEN)
|
||||
return;
|
||||
assert(rate >= -100 && rate <= 100);
|
||||
spd_set_voice_rate(_connection, rate);
|
||||
_ttsState->_rate = rate;
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::setPitch(int pitch) {
|
||||
if (_speechState == BROKEN)
|
||||
return;
|
||||
assert(pitch >= -100 && pitch <= 100);
|
||||
spd_set_voice_pitch(_connection, pitch);
|
||||
_ttsState->_pitch = pitch;
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::setVolume(unsigned volume) {
|
||||
if (_speechState == BROKEN)
|
||||
return;
|
||||
assert(volume <= 100);
|
||||
spd_set_volume(_connection, (volume - 50) * 2);
|
||||
_ttsState->_volume = volume;
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::setLanguage(Common::String language) {
|
||||
if (_speechState == BROKEN)
|
||||
return;
|
||||
Common::TextToSpeechManager::setLanguage(language);
|
||||
spd_set_language(_connection, _ttsState->_language.c_str());
|
||||
setVoice(_ttsState->_activeVoice);
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::createVoice(int typeNumber, Common::TTSVoice::Gender gender, Common::TTSVoice::Age age, char *description) {
|
||||
// This pointer will point to data needed for voice switching. It is stored
|
||||
// in the Common::TTSVoice and it is freed by freeVoiceData() once it
|
||||
// is not needed.
|
||||
SPDVoiceType *type = (SPDVoiceType *) malloc(sizeof(SPDVoiceType));
|
||||
*type = static_cast<SPDVoiceType>(typeNumber);
|
||||
Common::TTSVoice voice(gender, age, (void *) type, description);
|
||||
_ttsState->_availableVoices.push_back(voice);
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::updateVoices() {
|
||||
if (_speechState == BROKEN)
|
||||
return;
|
||||
/* just use these voices:
|
||||
SPD_MALE1, SPD_MALE2, SPD_MALE3,
|
||||
SPD_FEMALE1, SPD_FEMALE2, SPD_FEMALE3,
|
||||
SPD_CHILD_MALE, SPD_CHILD_FEMALE
|
||||
|
||||
it depends on the user to map them to the right voices in speech-dispatcher
|
||||
configuration
|
||||
*/
|
||||
_ttsState->_availableVoices.clear();
|
||||
|
||||
char **voiceInfo = spd_list_voices(_connection);
|
||||
|
||||
createVoice(SPD_MALE1, Common::TTSVoice::MALE, Common::TTSVoice::ADULT, voiceInfo[0]);
|
||||
createVoice(SPD_MALE2, Common::TTSVoice::MALE, Common::TTSVoice::ADULT, voiceInfo[1]);
|
||||
createVoice(SPD_MALE3, Common::TTSVoice::MALE, Common::TTSVoice::ADULT, voiceInfo[2]);
|
||||
createVoice(SPD_FEMALE1, Common::TTSVoice::FEMALE, Common::TTSVoice::ADULT, voiceInfo[3]);
|
||||
createVoice(SPD_FEMALE2, Common::TTSVoice::FEMALE, Common::TTSVoice::ADULT, voiceInfo[4]);
|
||||
createVoice(SPD_FEMALE3, Common::TTSVoice::FEMALE, Common::TTSVoice::ADULT, voiceInfo[5]);
|
||||
createVoice(SPD_CHILD_MALE, Common::TTSVoice::MALE, Common::TTSVoice::CHILD, voiceInfo[6]);
|
||||
createVoice(SPD_CHILD_FEMALE, Common::TTSVoice::FEMALE, Common::TTSVoice::CHILD, voiceInfo[7]);
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
free(voiceInfo[i]);
|
||||
|
||||
free(voiceInfo);
|
||||
}
|
||||
|
||||
void SpeechDispatcherManager::freeVoiceData(void *data) {
|
||||
free(data);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
99
backends/text-to-speech/linux/linux-text-to-speech.h
Normal file
99
backends/text-to-speech/linux/linux-text-to-speech.h
Normal file
@@ -0,0 +1,99 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef BACKENDS_TEXT_TO_SPEECH_LINUX_H
|
||||
#define BACKENDS_TEXT_TO_SPEECH_LINUX_H
|
||||
|
||||
#include "common/scummsys.h"
|
||||
|
||||
#if defined(USE_TTS) && defined(USE_SPEECH_DISPATCHER) && defined(POSIX)
|
||||
|
||||
#include "common/text-to-speech.h"
|
||||
#include "common/str.h"
|
||||
#include "common/ustr.h"
|
||||
#include "common/list.h"
|
||||
#include "common/mutex.h"
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
struct StartSpeechParams {
|
||||
pthread_mutex_t *mutex;
|
||||
Common::List<Common::String> *speechQueue;
|
||||
};
|
||||
|
||||
class SpeechDispatcherManager : public Common::TextToSpeechManager {
|
||||
public:
|
||||
enum SpeechState {
|
||||
READY,
|
||||
PAUSED,
|
||||
SPEAKING,
|
||||
BROKEN
|
||||
};
|
||||
|
||||
enum SpeechEvent {
|
||||
SPEECH_ENDED,
|
||||
SPEECH_PAUSED,
|
||||
SPEECH_CANCELED,
|
||||
SPEECH_RESUMED,
|
||||
SPEECH_BEGUN
|
||||
};
|
||||
|
||||
SpeechDispatcherManager();
|
||||
~SpeechDispatcherManager() override;
|
||||
|
||||
bool say(const Common::U32String &str, Action action) override;
|
||||
|
||||
bool stop() override;
|
||||
bool pause() override;
|
||||
bool resume() override;
|
||||
|
||||
bool isSpeaking() override;
|
||||
bool isPaused() override;
|
||||
bool isReady() override;
|
||||
|
||||
void setVoice(unsigned index) override;
|
||||
void setRate(int rate) override;
|
||||
void setPitch(int pitch) override;
|
||||
void setVolume(unsigned volume) override;
|
||||
void setLanguage(Common::String language) override;
|
||||
|
||||
void updateState(SpeechEvent event);
|
||||
|
||||
void freeVoiceData(void *data) override;
|
||||
|
||||
private:
|
||||
void init();
|
||||
void updateVoices() override;
|
||||
void createVoice(int typeNumber, Common::TTSVoice::Gender, Common::TTSVoice::Age, char *description);
|
||||
Common::String strToUtf8(Common::String str, Common::String charset);
|
||||
static void *startSpeech(void *p);
|
||||
|
||||
StartSpeechParams _params;
|
||||
SpeechState _speechState;
|
||||
Common::List<Common::String> _speechQueue;
|
||||
pthread_mutex_t _speechMutex;
|
||||
pthread_t _thread;
|
||||
bool _threadCreated;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // BACKENDS_UPDATES_LINUX_H
|
||||
75
backends/text-to-speech/macosx/macosx-text-to-speech.h
Normal file
75
backends/text-to-speech/macosx/macosx-text-to-speech.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef BACKENDS_TEXT_TO_SPEECH_MACOSX_H
|
||||
#define BACKENDS_TEXT_TO_SPEECH_MACOSX_H
|
||||
|
||||
#include "common/scummsys.h"
|
||||
|
||||
#if defined(USE_TTS) && defined(MACOSX)
|
||||
|
||||
#include "common/text-to-speech.h"
|
||||
#include "common/queue.h"
|
||||
#include "common/ustr.h"
|
||||
|
||||
class MacOSXTextToSpeechManager : public Common::TextToSpeechManager {
|
||||
public:
|
||||
MacOSXTextToSpeechManager();
|
||||
~MacOSXTextToSpeechManager() override;
|
||||
|
||||
bool say(const Common::U32String &str, Action action) override;
|
||||
|
||||
bool stop() override;
|
||||
bool pause() override;
|
||||
bool resume() override;
|
||||
|
||||
bool isSpeaking() override;
|
||||
bool isPaused() override;
|
||||
bool isReady() override;
|
||||
|
||||
void setVoice(unsigned index) override;
|
||||
|
||||
void setRate(int rate) override;
|
||||
|
||||
void setPitch(int pitch) override;
|
||||
|
||||
void setVolume(unsigned volume) override;
|
||||
|
||||
void setLanguage(Common::String language) override;
|
||||
|
||||
int getDefaultVoice() override;
|
||||
|
||||
void freeVoiceData(void *data) override;
|
||||
|
||||
bool startNextSpeech();
|
||||
|
||||
private:
|
||||
void updateVoices() override;
|
||||
|
||||
Common::Queue<Common::String> _messageQueue;
|
||||
Common::String _currentSpeech;
|
||||
bool _paused;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // BACKENDS_TEXT_TO_SPEECH_MACOSX_H
|
||||
|
||||
309
backends/text-to-speech/macosx/macosx-text-to-speech.mm
Normal file
309
backends/text-to-speech/macosx/macosx-text-to-speech.mm
Normal file
@@ -0,0 +1,309 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
// Disable symbol overrides so that we can use system headers.
|
||||
#define FORBIDDEN_SYMBOL_ALLOW_ALL
|
||||
|
||||
#include "backends/text-to-speech/macosx/macosx-text-to-speech.h"
|
||||
|
||||
#if defined(USE_TTS) && defined(MACOSX)
|
||||
#include "common/translation.h"
|
||||
#include <AppKit/NSSpeechSynthesizer.h>
|
||||
#include <Foundation/NSString.h>
|
||||
#include <CoreFoundation/CFString.h>
|
||||
|
||||
@interface MacOSXTextToSpeechManagerDelegate : NSObject<NSSpeechSynthesizerDelegate> {
|
||||
MacOSXTextToSpeechManager *_ttsManager;
|
||||
BOOL _ignoreNextFinishedSpeaking;
|
||||
}
|
||||
- (id)initWithManager:(MacOSXTextToSpeechManager*)ttsManager;
|
||||
- (void)speechSynthesizer:(NSSpeechSynthesizer *)sender didFinishSpeaking:(BOOL)finishedSpeaking;
|
||||
- (void)ignoreNextFinishedSpeaking:(BOOL)ignore;
|
||||
@end
|
||||
|
||||
@implementation MacOSXTextToSpeechManagerDelegate
|
||||
- (id)initWithManager:(MacOSXTextToSpeechManager*)ttsManager {
|
||||
self = [super init];
|
||||
_ttsManager = ttsManager;
|
||||
_ignoreNextFinishedSpeaking = NO;
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)speechSynthesizer:(NSSpeechSynthesizer *)sender didFinishSpeaking:(BOOL)finishedSpeaking {
|
||||
if (!_ignoreNextFinishedSpeaking)
|
||||
_ttsManager->startNextSpeech();
|
||||
_ignoreNextFinishedSpeaking = NO;
|
||||
}
|
||||
|
||||
- (void)ignoreNextFinishedSpeaking:(BOOL)ignore {
|
||||
_ignoreNextFinishedSpeaking = ignore;
|
||||
}
|
||||
@end
|
||||
|
||||
NSSpeechSynthesizer *synthesizer;
|
||||
MacOSXTextToSpeechManagerDelegate *synthesizerDelegate;
|
||||
|
||||
MacOSXTextToSpeechManager::MacOSXTextToSpeechManager() : Common::TextToSpeechManager(), _paused(false) {
|
||||
synthesizer = [[NSSpeechSynthesizer alloc] init];
|
||||
synthesizerDelegate = [[MacOSXTextToSpeechManagerDelegate alloc] initWithManager:this];
|
||||
[synthesizer setDelegate:synthesizerDelegate];
|
||||
|
||||
#ifdef USE_TRANSLATION
|
||||
setLanguage(TransMan.getCurrentLanguage());
|
||||
#else
|
||||
setLanguage("en");
|
||||
#endif
|
||||
}
|
||||
|
||||
MacOSXTextToSpeechManager::~MacOSXTextToSpeechManager() {
|
||||
clearState();
|
||||
|
||||
[synthesizer release];
|
||||
[synthesizerDelegate release];
|
||||
}
|
||||
|
||||
bool MacOSXTextToSpeechManager::say(const Common::U32String &text, Action action) {
|
||||
Common::String textToSpeak = text.encode();
|
||||
if (isSpeaking()) {
|
||||
// Interruptions are done on word boundaries for nice transitions.
|
||||
// Should we interrupt immediately?
|
||||
if (action == DROP)
|
||||
return true;
|
||||
else if (action == INTERRUPT) {
|
||||
_messageQueue.clear();
|
||||
[synthesizer stopSpeakingAtBoundary:NSSpeechWordBoundary];
|
||||
} else if (action == INTERRUPT_NO_REPEAT) {
|
||||
// If the new speech is the one being currently said, continue that speech but clear the queue.
|
||||
// And otherwise both clear the queue and interrupt the current speech.
|
||||
_messageQueue.clear();
|
||||
if (_currentSpeech == textToSpeak)
|
||||
return true;
|
||||
[synthesizer stopSpeakingAtBoundary:NSSpeechWordBoundary];
|
||||
} else if (action == QUEUE_NO_REPEAT) {
|
||||
if (!_messageQueue.empty()) {
|
||||
if (_messageQueue.back() == textToSpeak)
|
||||
return true;
|
||||
} else if (_currentSpeech == textToSpeak)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
_messageQueue.push(textToSpeak);
|
||||
if (!isSpeaking())
|
||||
startNextSpeech();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MacOSXTextToSpeechManager::startNextSpeech() {
|
||||
_currentSpeech.clear();
|
||||
if (_messageQueue.empty())
|
||||
return false;
|
||||
|
||||
Common::String textToSpeak;
|
||||
do {
|
||||
textToSpeak = _messageQueue.pop();
|
||||
} while (textToSpeak.empty() && !_messageQueue.empty());
|
||||
if (textToSpeak.empty())
|
||||
return false;
|
||||
|
||||
// Get current encoding
|
||||
CFStringEncoding stringEncoding = kCFStringEncodingUTF8;
|
||||
|
||||
CFStringRef textNSString = CFStringCreateWithCString(NULL, textToSpeak.c_str(), stringEncoding);
|
||||
bool status = [synthesizer startSpeakingString:(NSString *)textNSString];
|
||||
CFRelease(textNSString);
|
||||
if (status)
|
||||
_currentSpeech = textToSpeak;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
bool MacOSXTextToSpeechManager::stop() {
|
||||
_messageQueue.clear();
|
||||
if (isSpeaking()) {
|
||||
_currentSpeech.clear(); // so that it immediately reports that it is no longer speaking
|
||||
// Stop as soon as possible
|
||||
// Also tell the MacOSXTextToSpeechManagerDelegate to ignore the next finishedSpeaking as
|
||||
// it has already been handled, but we might have started another speech by the time we
|
||||
// receive it, and we don't want to stop that one.
|
||||
[synthesizerDelegate ignoreNextFinishedSpeaking:YES];
|
||||
[synthesizer stopSpeakingAtBoundary:NSSpeechImmediateBoundary];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MacOSXTextToSpeechManager::pause() {
|
||||
// Pause on a word boundary as pausing/resuming in a middle of words is strange.
|
||||
[synthesizer pauseSpeakingAtBoundary:NSSpeechWordBoundary];
|
||||
_paused = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MacOSXTextToSpeechManager::resume() {
|
||||
_paused = false;
|
||||
[synthesizer continueSpeaking];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MacOSXTextToSpeechManager::isSpeaking() {
|
||||
// Because the NSSpeechSynthesizer is asynchronous, it doesn't start speaking immediately
|
||||
// and thus using [synthesizer isSpeaking] just after [synthesizer startSpeakingString:]] is
|
||||
// likely to return NO. So instead we check the _currentSpeech string (set when calling
|
||||
// startSpeakingString, and cleared when we receive the didFinishSpeaking message).
|
||||
//return [synthesizer isSpeaking];
|
||||
return !_currentSpeech.empty();
|
||||
}
|
||||
|
||||
bool MacOSXTextToSpeechManager::isPaused() {
|
||||
// Because the NSSpeechSynthesizer is asynchronous, and because we pause at the end of a word
|
||||
// and not immediately, we cannot check the speech status as it is likely to not be paused yet
|
||||
// immediately after we requested the pause. So we keep our own flag.
|
||||
//NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil];
|
||||
//return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] && [[statusDict objectForKey:NSSpeechStatusOutputPaused] boolValue];
|
||||
return _paused;
|
||||
}
|
||||
|
||||
bool MacOSXTextToSpeechManager::isReady() {
|
||||
// See comments in isSpeaking() and isPaused()
|
||||
//NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil];
|
||||
//return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] == NO;
|
||||
return _currentSpeech.empty() && !_paused;
|
||||
}
|
||||
|
||||
void MacOSXTextToSpeechManager::setVoice(unsigned index) {
|
||||
if (_ttsState->_availableVoices.empty())
|
||||
return;
|
||||
assert(index < _ttsState->_availableVoices.size());
|
||||
Common::TTSVoice voice = _ttsState->_availableVoices[index];
|
||||
_ttsState->_activeVoice = index;
|
||||
|
||||
[synthesizer setVoice:(NSString*)voice.getData()];
|
||||
|
||||
// Setting the voice reset the pitch and rate to the voice defaults.
|
||||
// Apply back the modifiers.
|
||||
int pitch = getPitch(), rate = getRate();
|
||||
Common::TextToSpeechManager::setPitch(0);
|
||||
Common::TextToSpeechManager::setRate(0);
|
||||
setPitch(pitch);
|
||||
setRate(rate);
|
||||
}
|
||||
|
||||
void MacOSXTextToSpeechManager::setRate(int rate) {
|
||||
int oldRate = getRate();
|
||||
Common::TextToSpeechManager::setRate(rate);
|
||||
// The rate is a value between -100 and +100, with 0 being the default rate.
|
||||
// Convert this to a multiplier between 0.5 and 1.5.
|
||||
float oldRateMultiplier = 1.0f + oldRate / 200.0f;
|
||||
float ratehMultiplier = 1.0f + rate / 200.0f;
|
||||
synthesizer.rate = synthesizer.rate / oldRateMultiplier * ratehMultiplier;
|
||||
}
|
||||
|
||||
void MacOSXTextToSpeechManager::setPitch(int pitch) {
|
||||
int oldPitch = getPitch();
|
||||
Common::TextToSpeechManager::setPitch(pitch);
|
||||
// The pitch is a value between -100 and +100, with 0 being the default pitch.
|
||||
// Convert this to a multiplier between 0.5 and 1.5 on the default voice pitch.
|
||||
float oldPitchMultiplier = 1.0f + oldPitch / 200.0f;
|
||||
float pitchMultiplier = 1.0f + pitch / 200.0f;
|
||||
NSNumber *basePitchNumber = [synthesizer objectForProperty:NSSpeechPitchBaseProperty error:nil];
|
||||
float basePitch = [basePitchNumber floatValue] / oldPitchMultiplier * pitchMultiplier;
|
||||
[synthesizer setObject:[NSNumber numberWithFloat:basePitch] forProperty:NSSpeechPitchBaseProperty error:nil];
|
||||
}
|
||||
|
||||
void MacOSXTextToSpeechManager::setVolume(unsigned volume) {
|
||||
Common::TextToSpeechManager::setVolume(volume);
|
||||
synthesizer.volume = volume / 100.0f;
|
||||
}
|
||||
|
||||
void MacOSXTextToSpeechManager::setLanguage(Common::String language) {
|
||||
Common::TextToSpeechManager::setLanguage(language);
|
||||
updateVoices();
|
||||
}
|
||||
|
||||
int MacOSXTextToSpeechManager::getDefaultVoice() {
|
||||
if (_ttsState->_availableVoices.size() < 2)
|
||||
return 0;
|
||||
NSString *defaultVoice = [NSSpeechSynthesizer defaultVoice];
|
||||
if (defaultVoice == nil)
|
||||
return 0;
|
||||
for (unsigned int i = 0 ; i < _ttsState->_availableVoices.size() ; ++i) {
|
||||
if ([defaultVoice isEqualToString:(NSString*)(_ttsState->_availableVoices[i].getData())])
|
||||
return i;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void MacOSXTextToSpeechManager::freeVoiceData(void *data) {
|
||||
NSString* voiceId = (NSString*)data;
|
||||
[voiceId release];
|
||||
}
|
||||
|
||||
void MacOSXTextToSpeechManager::updateVoices() {
|
||||
Common::String currentVoice;
|
||||
if (!_ttsState->_availableVoices.empty())
|
||||
currentVoice = _ttsState->_availableVoices[_ttsState->_activeVoice].getDescription();
|
||||
_ttsState->_availableVoices.clear();
|
||||
int activeVoiceIndex = -1, defaultVoiceIndex = -1;
|
||||
|
||||
Common::String lang = getLanguage();
|
||||
NSArray *voices = [NSSpeechSynthesizer availableVoices];
|
||||
NSString *defaultVoice = [NSSpeechSynthesizer defaultVoice];
|
||||
int voiceIndex = 0;
|
||||
for (NSString *voiceId in voices) {
|
||||
NSDictionary *voiceAttr = [NSSpeechSynthesizer attributesForVoice:voiceId];
|
||||
Common::String voiceLocale([[voiceAttr objectForKey:NSVoiceLocaleIdentifier] UTF8String]);
|
||||
if (voiceLocale.hasPrefix(lang)) {
|
||||
NSString *data = [[NSString alloc] initWithString:voiceId];
|
||||
Common::String name([[voiceAttr objectForKey:NSVoiceName] UTF8String]);
|
||||
Common::TTSVoice::Gender gender = Common::TTSVoice::UNKNOWN_GENDER;
|
||||
NSString *voiceGender = [voiceAttr objectForKey:NSVoiceGender];
|
||||
if (voiceGender != nil) {
|
||||
// This can be VoiceGenderMale, VoiceGenderFemale, VoiceGenderNeuter
|
||||
if ([voiceGender isEqualToString:@"VoiceGenderMale"])
|
||||
gender = Common::TTSVoice::MALE;
|
||||
else if ([voiceGender isEqualToString:@"VoiceGenderFemale"])
|
||||
gender = Common::TTSVoice::FEMALE;
|
||||
}
|
||||
Common::TTSVoice::Age age = Common::TTSVoice::UNKNOWN_AGE;
|
||||
NSNumber *voiceAge = [voiceAttr objectForKey:NSVoiceAge];
|
||||
if (voiceAge != nil) {
|
||||
if ([voiceAge integerValue] < 18)
|
||||
age = Common::TTSVoice::CHILD;
|
||||
else
|
||||
age = Common::TTSVoice::ADULT;
|
||||
}
|
||||
Common::TTSVoice voice(gender, age, data, name);
|
||||
_ttsState->_availableVoices.push_back(voice);
|
||||
if (name == currentVoice)
|
||||
activeVoiceIndex = voiceIndex;
|
||||
if (defaultVoice != nil && [defaultVoice isEqualToString:voiceId])
|
||||
defaultVoiceIndex = voiceIndex;
|
||||
++voiceIndex;
|
||||
}
|
||||
}
|
||||
|
||||
if (activeVoiceIndex == -1)
|
||||
activeVoiceIndex = defaultVoiceIndex == -1 ? 0 : defaultVoiceIndex;
|
||||
setVoice(activeVoiceIndex);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
490
backends/text-to-speech/windows/windows-text-to-speech.cpp
Normal file
490
backends/text-to-speech/windows/windows-text-to-speech.cpp
Normal file
@@ -0,0 +1,490 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
// Disable symbol overrides so that we can use system headers.
|
||||
#define FORBIDDEN_SYMBOL_ALLOW_ALL
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#if defined(USE_TTS) && defined(WIN32)
|
||||
#include <basetyps.h>
|
||||
#include <windows.h>
|
||||
#include <servprov.h>
|
||||
|
||||
#include <sapi.h>
|
||||
#if _SAPI_VER < 0x53
|
||||
#define SPF_PARSE_SAPI 0x80
|
||||
#endif
|
||||
|
||||
#include "backends/platform/sdl/win32/win32_wrapper.h"
|
||||
|
||||
#include "backends/text-to-speech/windows/windows-text-to-speech.h"
|
||||
|
||||
|
||||
#include "common/translation.h"
|
||||
#include "common/system.h"
|
||||
#include "common/ustr.h"
|
||||
#include "common/config-manager.h"
|
||||
|
||||
ISpVoice *_voice;
|
||||
|
||||
// We need this pointer to be able to stop speech immediately.
|
||||
ISpAudio *_audio;
|
||||
|
||||
WindowsTextToSpeechManager::WindowsTextToSpeechManager()
|
||||
: _speechState(BROKEN){
|
||||
init();
|
||||
_threadParams.queue = &_speechQueue;
|
||||
_threadParams.state = &_speechState;
|
||||
_threadParams.mutex = &_speechMutex;
|
||||
_thread = nullptr;
|
||||
_speechMutex = CreateMutex(nullptr, FALSE, nullptr);
|
||||
if (_speechMutex == nullptr) {
|
||||
_speechState = BROKEN;
|
||||
warning("Could not create TTS mutex");
|
||||
}
|
||||
}
|
||||
|
||||
void WindowsTextToSpeechManager::init() {
|
||||
// init COM
|
||||
if (FAILED(::CoInitialize(nullptr)))
|
||||
return;
|
||||
|
||||
// init audio
|
||||
ISpObjectTokenCategory *pTokenCategory;
|
||||
HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL, IID_ISpObjectTokenCategory, (void **)&pTokenCategory);
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr = pTokenCategory->SetId(SPCAT_AUDIOOUT, TRUE);
|
||||
if (SUCCEEDED(hr)) {
|
||||
WCHAR *tokenId;
|
||||
hr = pTokenCategory->GetDefaultTokenId(&tokenId);
|
||||
if (SUCCEEDED(hr)) {
|
||||
ISpObjectToken *pToken;
|
||||
hr = CoCreateInstance(CLSID_SpObjectToken, nullptr, CLSCTX_ALL, IID_ISpObjectToken, (void **)&pToken);
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr = pToken->SetId(nullptr, tokenId, FALSE);
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr = pToken->CreateInstance(nullptr, CLSCTX_ALL, IID_ISpAudio, (void **)&_audio);
|
||||
}
|
||||
}
|
||||
CoTaskMemFree(tokenId);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (FAILED(hr)) {
|
||||
warning("Could not initialize TTS audio");
|
||||
return;
|
||||
}
|
||||
|
||||
// init voice
|
||||
hr = CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, (void **)&_voice);
|
||||
if (FAILED(hr)) {
|
||||
warning("Could not initialize TTS voice");
|
||||
return;
|
||||
}
|
||||
|
||||
_speechState = NO_VOICE;
|
||||
|
||||
#ifdef USE_TRANSLATION
|
||||
setLanguage(TransMan.getCurrentLanguage());
|
||||
#else
|
||||
setLanguage("en");
|
||||
#endif
|
||||
|
||||
_voice->SetOutput(_audio, FALSE);
|
||||
|
||||
if (!_ttsState->_availableVoices.empty())
|
||||
_speechState = READY;
|
||||
else
|
||||
_speechState = NO_VOICE;
|
||||
_lastSaid = "";
|
||||
while (!_speechQueue.empty()) {
|
||||
free(_speechQueue.front());
|
||||
_speechQueue.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
WindowsTextToSpeechManager::~WindowsTextToSpeechManager() {
|
||||
stop();
|
||||
|
||||
clearState();
|
||||
|
||||
if (_thread != nullptr) {
|
||||
WaitForSingleObject(_thread, INFINITE);
|
||||
CloseHandle(_thread);
|
||||
}
|
||||
if (_speechMutex != nullptr) {
|
||||
CloseHandle(_speechMutex);
|
||||
}
|
||||
if (_voice)
|
||||
_voice->Release();
|
||||
::CoUninitialize();
|
||||
}
|
||||
|
||||
DWORD WINAPI startSpeech(LPVOID parameters) {
|
||||
WindowsTextToSpeechManager::SpeechParameters *params =
|
||||
(WindowsTextToSpeechManager::SpeechParameters *) parameters;
|
||||
// wait for the previous speech, if the previous thread exited too early
|
||||
_voice->WaitUntilDone(INFINITE);
|
||||
|
||||
while (!params->queue->empty()) {
|
||||
WaitForSingleObject(*params->mutex, INFINITE);
|
||||
// check again, when we have exclusive access to the queue
|
||||
if (params->queue->empty() || *(params->state) == WindowsTextToSpeechManager::PAUSED) {
|
||||
ReleaseMutex(*params->mutex);
|
||||
break;
|
||||
}
|
||||
WCHAR *currentSpeech = params->queue->front();
|
||||
_voice->Speak(currentSpeech, SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_PARSE_SAPI, nullptr);
|
||||
ReleaseMutex(*params->mutex);
|
||||
|
||||
while (*(params->state) != WindowsTextToSpeechManager::PAUSED)
|
||||
if (_voice->WaitUntilDone(10) == S_OK)
|
||||
break;
|
||||
|
||||
WaitForSingleObject(*params->mutex, INFINITE);
|
||||
if (!params->queue->empty() && params->queue->front() == currentSpeech) {
|
||||
if (currentSpeech != nullptr)
|
||||
free(currentSpeech);
|
||||
params->queue->pop_front();
|
||||
}
|
||||
ReleaseMutex(*params->mutex);
|
||||
}
|
||||
|
||||
WaitForSingleObject(*params->mutex, INFINITE);
|
||||
if (*(params->state) != WindowsTextToSpeechManager::PAUSED)
|
||||
*(params->state) = WindowsTextToSpeechManager::READY;
|
||||
ReleaseMutex(*params->mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool WindowsTextToSpeechManager::say(const Common::U32String &str, Action action) {
|
||||
if (_speechState == BROKEN || _speechState == NO_VOICE) {
|
||||
if (_ttsState->_enabled)
|
||||
warning("The text to speech cannot speak in this state");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isSpeaking() && action == DROP)
|
||||
return true;
|
||||
|
||||
// We have to set the pitch by prepending xml code at the start of the said string;
|
||||
Common::U32String pitch = Common::U32String::format("<pitch absmiddle=\"%d\"/>%S", _ttsState->_pitch / 10, str.c_str());
|
||||
WCHAR *strW = (WCHAR *) pitch.encodeUTF16Native();
|
||||
if (strW == nullptr) {
|
||||
warning("Cannot convert from UTF-32 encoding for text to speech");
|
||||
return true;
|
||||
}
|
||||
|
||||
WaitForSingleObject(_speechMutex, INFINITE);
|
||||
if (isSpeaking() && !_speechQueue.empty() && action == INTERRUPT_NO_REPEAT &&
|
||||
_speechQueue.front() != NULL && !wcscmp(_speechQueue.front(), strW)) {
|
||||
while (_speechQueue.size() != 1) {
|
||||
free(_speechQueue.back());
|
||||
_speechQueue.pop_back();
|
||||
}
|
||||
free(strW);
|
||||
ReleaseMutex(_speechMutex);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isSpeaking() && !_speechQueue.empty() && action == QUEUE_NO_REPEAT &&
|
||||
_speechQueue.front() != NULL &&!wcscmp(_speechQueue.back(), strW)) {
|
||||
ReleaseMutex(_speechMutex);
|
||||
return true;
|
||||
}
|
||||
|
||||
ReleaseMutex(_speechMutex);
|
||||
if ((isPaused() || isSpeaking()) && (action == INTERRUPT || action == INTERRUPT_NO_REPEAT)) {
|
||||
stop();
|
||||
}
|
||||
|
||||
WaitForSingleObject(_speechMutex, INFINITE);
|
||||
_speechQueue.push_back(strW);
|
||||
ReleaseMutex(_speechMutex);
|
||||
|
||||
if (!isSpeaking() && !isPaused()) {
|
||||
DWORD threadId;
|
||||
if (_thread != nullptr) {
|
||||
WaitForSingleObject(_thread, INFINITE);
|
||||
CloseHandle(_thread);
|
||||
}
|
||||
_speechState = SPEAKING;
|
||||
_thread = CreateThread(nullptr, 0, startSpeech, &_threadParams, 0, &threadId);
|
||||
if (_thread == nullptr) {
|
||||
warning("Could not create speech thread");
|
||||
_speechState = READY;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool WindowsTextToSpeechManager::stop() {
|
||||
if (_speechState == BROKEN || _speechState == NO_VOICE)
|
||||
return true;
|
||||
if (isPaused())
|
||||
resume();
|
||||
_audio->SetState(SPAS_STOP, 0);
|
||||
WaitForSingleObject(_speechMutex, INFINITE);
|
||||
// Delete the speech queue
|
||||
while (!_speechQueue.empty()) {
|
||||
if (_speechQueue.front() != NULL)
|
||||
free(_speechQueue.front());
|
||||
_speechQueue.pop_front();
|
||||
}
|
||||
// Stop the current speech
|
||||
_voice->Speak(nullptr, SPF_PURGEBEFORESPEAK | SPF_ASYNC, nullptr);
|
||||
_speechState = READY;
|
||||
ReleaseMutex(_speechMutex);
|
||||
_audio->SetState(SPAS_RUN, 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool WindowsTextToSpeechManager::pause() {
|
||||
if (_speechState == BROKEN || _speechState == NO_VOICE)
|
||||
return true;
|
||||
if (isPaused())
|
||||
return false;
|
||||
WaitForSingleObject(_speechMutex, INFINITE);
|
||||
_voice->Pause();
|
||||
_speechState = PAUSED;
|
||||
ReleaseMutex(_speechMutex);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool WindowsTextToSpeechManager::resume() {
|
||||
if (_speechState == BROKEN || _speechState == NO_VOICE)
|
||||
return true;
|
||||
if (!isPaused())
|
||||
return false;
|
||||
_voice->Resume();
|
||||
DWORD threadId;
|
||||
if (_thread != nullptr) {
|
||||
WaitForSingleObject(_thread, INFINITE);
|
||||
CloseHandle(_thread);
|
||||
}
|
||||
_speechState = SPEAKING;
|
||||
_thread = CreateThread(nullptr, 0, startSpeech, &_threadParams, 0, &threadId);
|
||||
if (_thread == nullptr) {
|
||||
warning("Could not create speech thread");
|
||||
_speechState = READY;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool WindowsTextToSpeechManager::isSpeaking() {
|
||||
return _speechState == SPEAKING;
|
||||
}
|
||||
|
||||
bool WindowsTextToSpeechManager::isPaused() {
|
||||
return _speechState == PAUSED;
|
||||
}
|
||||
|
||||
bool WindowsTextToSpeechManager::isReady() {
|
||||
if (_speechState == BROKEN || _speechState == NO_VOICE)
|
||||
return false;
|
||||
if (_speechState != PAUSED && !isSpeaking())
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
void WindowsTextToSpeechManager::setVoice(unsigned index) {
|
||||
if (_speechState == BROKEN || _speechState == NO_VOICE)
|
||||
return;
|
||||
_voice->SetVoice((ISpObjectToken *) _ttsState->_availableVoices[index].getData());
|
||||
_ttsState->_activeVoice = index;
|
||||
}
|
||||
|
||||
void WindowsTextToSpeechManager::setRate(int rate) {
|
||||
if (_speechState == BROKEN || _speechState == NO_VOICE)
|
||||
return;
|
||||
assert(rate >= -100 && rate <= 100);
|
||||
_voice->SetRate(rate / 10);
|
||||
_ttsState->_rate = rate;
|
||||
}
|
||||
|
||||
void WindowsTextToSpeechManager::setPitch(int pitch) {
|
||||
if (_speechState == BROKEN || _speechState == NO_VOICE)
|
||||
return;
|
||||
assert(pitch >= -100 && pitch <= 100);
|
||||
_ttsState->_pitch = pitch;
|
||||
}
|
||||
|
||||
void WindowsTextToSpeechManager::setVolume(unsigned volume) {
|
||||
if (_speechState == BROKEN || _speechState == NO_VOICE)
|
||||
return;
|
||||
assert(volume <= 100);
|
||||
_voice->SetVolume(volume);
|
||||
_ttsState->_volume = volume;
|
||||
}
|
||||
|
||||
void WindowsTextToSpeechManager::setLanguage(Common::String language) {
|
||||
if (_ttsState->_language != language.substr(0, 2) || _ttsState->_availableVoices.empty()) {
|
||||
Common::TextToSpeechManager::setLanguage(language);
|
||||
updateVoices();
|
||||
} else if (_speechState == NO_VOICE) {
|
||||
_speechState = READY;
|
||||
}
|
||||
setVoice(0);
|
||||
}
|
||||
|
||||
void WindowsTextToSpeechManager::createVoice(void *cpVoiceToken) {
|
||||
ISpObjectToken *voiceToken = (ISpObjectToken *) cpVoiceToken;
|
||||
|
||||
// description
|
||||
WCHAR *descW;
|
||||
char *buffer;
|
||||
Common::String desc;
|
||||
HRESULT hr = voiceToken->GetStringValue(nullptr, &descW);
|
||||
if (SUCCEEDED(hr)) {
|
||||
buffer = Win32::unicodeToAnsi(descW);
|
||||
desc = buffer;
|
||||
free(buffer);
|
||||
CoTaskMemFree(descW);
|
||||
}
|
||||
|
||||
if (desc == "Sample TTS Voice") {
|
||||
// This is a really bad voice, it is basically unusable
|
||||
return;
|
||||
}
|
||||
|
||||
// voice attributes
|
||||
ISpDataKey *key = nullptr;
|
||||
hr = voiceToken->OpenKey(L"Attributes", &key);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
voiceToken->Release();
|
||||
warning("Could not open attribute key for voice: %s", desc.c_str());
|
||||
return;
|
||||
}
|
||||
LPWSTR data;
|
||||
|
||||
// language
|
||||
hr = key->GetStringValue(L"Language", &data);
|
||||
if (FAILED(hr)) {
|
||||
voiceToken->Release();
|
||||
warning("Could not get the language attribute for voice: %s", desc.c_str());
|
||||
return;
|
||||
}
|
||||
Common::String language = lcidToLocale(wcstol(data, nullptr, 16));
|
||||
CoTaskMemFree(data);
|
||||
|
||||
// only get the voices for the current language
|
||||
if (language != _ttsState->_language) {
|
||||
voiceToken->Release();
|
||||
return;
|
||||
}
|
||||
|
||||
// gender
|
||||
hr = key->GetStringValue(L"Gender", &data);
|
||||
if (FAILED(hr)) {
|
||||
voiceToken->Release();
|
||||
warning("Could not get the gender attribute for voice: %s", desc.c_str());
|
||||
return;
|
||||
}
|
||||
Common::TTSVoice::Gender gender = !wcscmp(data, L"Male") ? Common::TTSVoice::MALE : Common::TTSVoice::FEMALE;
|
||||
CoTaskMemFree(data);
|
||||
|
||||
// age
|
||||
hr = key->GetStringValue(L"Age", &data);
|
||||
if (FAILED(hr)) {
|
||||
voiceToken->Release();
|
||||
warning("Could not get the age attribute for voice: %s", desc.c_str());
|
||||
return;
|
||||
}
|
||||
Common::TTSVoice::Age age = !wcscmp(data, L"Adult") ? Common::TTSVoice::ADULT : Common::TTSVoice::UNKNOWN_AGE;
|
||||
CoTaskMemFree(data);
|
||||
|
||||
_ttsState->_availableVoices.push_back(Common::TTSVoice(gender, age, (void *) voiceToken, desc));
|
||||
}
|
||||
|
||||
Common::String WindowsTextToSpeechManager::lcidToLocale(LCID locale) {
|
||||
int nchars = GetLocaleInfo(locale, LOCALE_SISO639LANGNAME, nullptr, 0);
|
||||
TCHAR *languageCode = new TCHAR[nchars];
|
||||
GetLocaleInfo(locale, LOCALE_SISO639LANGNAME, languageCode, nchars);
|
||||
Common::String result = Win32::tcharToString(languageCode);
|
||||
delete[] languageCode;
|
||||
return result;
|
||||
}
|
||||
|
||||
void WindowsTextToSpeechManager::updateVoices() {
|
||||
if (!_ttsState->_enabled) {
|
||||
_speechState = NO_VOICE;
|
||||
return;
|
||||
}
|
||||
|
||||
if (_speechState == BROKEN)
|
||||
return;
|
||||
|
||||
_ttsState->_availableVoices.clear();
|
||||
ISpObjectToken *cpVoiceToken = nullptr;
|
||||
IEnumSpObjectTokens *cpEnum = nullptr;
|
||||
unsigned long ulCount = 0;
|
||||
|
||||
ISpObjectTokenCategory *cpCategory;
|
||||
HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL, IID_ISpObjectTokenCategory, (void**)&cpCategory);
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr = cpCategory->SetId(L"HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech_OneCore\\Voices", FALSE);
|
||||
if (!SUCCEEDED(hr)) {
|
||||
hr = cpCategory->SetId(SPCAT_VOICES, FALSE);
|
||||
}
|
||||
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum);
|
||||
}
|
||||
}
|
||||
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr = cpEnum->GetCount(&ulCount);
|
||||
}
|
||||
_voice->SetVolume(0);
|
||||
while (SUCCEEDED(hr) && ulCount--) {
|
||||
hr = cpEnum->Next(1, &cpVoiceToken, nullptr);
|
||||
_voice->SetVoice(cpVoiceToken);
|
||||
if (SUCCEEDED(_voice->Speak(L"hi, this is test", SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_IS_NOT_XML, nullptr)))
|
||||
createVoice(cpVoiceToken);
|
||||
else
|
||||
cpVoiceToken->Release();
|
||||
}
|
||||
// stop the test speech, we don't use stop(), because we don't wan't it to set state to READY
|
||||
// and we could easily be in NO_VOICE or BROKEN state here, in which the stop() wouldn't work
|
||||
_audio->SetState(SPAS_STOP, 0);
|
||||
_audio->SetState(SPAS_RUN, 0);
|
||||
_voice->Speak(nullptr, SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_IS_NOT_XML, nullptr);
|
||||
_voice->SetVolume(_ttsState->_volume);
|
||||
cpEnum->Release();
|
||||
|
||||
if (_ttsState->_availableVoices.empty()) {
|
||||
_speechState = NO_VOICE;
|
||||
warning("No voice is available for language: %s", _ttsState->_language.c_str());
|
||||
} else if (_speechState == NO_VOICE)
|
||||
_speechState = READY;
|
||||
}
|
||||
|
||||
void WindowsTextToSpeechManager::freeVoiceData(void *data) {
|
||||
ISpObjectToken *voiceToken = (ISpObjectToken *) data;
|
||||
voiceToken->Release();
|
||||
}
|
||||
|
||||
#endif
|
||||
92
backends/text-to-speech/windows/windows-text-to-speech.h
Normal file
92
backends/text-to-speech/windows/windows-text-to-speech.h
Normal file
@@ -0,0 +1,92 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef BACKENDS_TEXT_TO_SPEECH_WINDOWS_H
|
||||
#define BACKENDS_TEXT_TO_SPEECH_WINDOWS_H
|
||||
|
||||
#include "common/scummsys.h"
|
||||
|
||||
#if defined(USE_TTS) && defined(WIN32)
|
||||
|
||||
#include "common/text-to-speech.h"
|
||||
#include "common/str.h"
|
||||
#include "common/ustr.h"
|
||||
#include "common/list.h"
|
||||
|
||||
|
||||
class WindowsTextToSpeechManager final : public Common::TextToSpeechManager {
|
||||
public:
|
||||
enum SpeechState {
|
||||
READY,
|
||||
PAUSED,
|
||||
SPEAKING,
|
||||
BROKEN,
|
||||
NO_VOICE
|
||||
};
|
||||
|
||||
struct SpeechParameters {
|
||||
Common::List<WCHAR *> *queue;
|
||||
SpeechState *state;
|
||||
HANDLE *mutex;
|
||||
};
|
||||
|
||||
WindowsTextToSpeechManager();
|
||||
~WindowsTextToSpeechManager() override;
|
||||
|
||||
bool say(const Common::U32String &str, Action action) override;
|
||||
|
||||
bool stop() override;
|
||||
bool pause() override;
|
||||
bool resume() override;
|
||||
|
||||
bool isSpeaking() override;
|
||||
bool isPaused() override;
|
||||
bool isReady() override;
|
||||
|
||||
void setVoice(unsigned index) override;
|
||||
|
||||
void setRate(int rate) override;
|
||||
|
||||
void setPitch(int pitch) override;
|
||||
|
||||
void setVolume(unsigned volume) override;
|
||||
|
||||
void setLanguage(Common::String language) override;
|
||||
|
||||
void freeVoiceData(void *data) override;
|
||||
|
||||
private:
|
||||
void init();
|
||||
void updateVoices() override;
|
||||
void createVoice(void *cpVoiceToken);
|
||||
Common::String lcidToLocale(LCID locale);
|
||||
SpeechState _speechState;
|
||||
Common::String _lastSaid;
|
||||
HANDLE _thread;
|
||||
Common::List<WCHAR *> _speechQueue;
|
||||
SpeechParameters _threadParams;
|
||||
HANDLE _speechMutex;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif // BACKENDS_UPDATES_WINDOWS_H
|
||||
Reference in New Issue
Block a user