社本@元ネオニート Blog

社本@元ネオニート (元々ワック) のいたずら書き (for Programmers)

AILight Banner
AILight Blog

プロフィール

社本@元ネオニート Blog
働いたら負けかなと思っていたのですが。。とうとう働き始めました。アメリカのシアトル・エリアにある企業向けのバックアップソフトを開発している会社です。日本人は私だけ。なんとか。。やっていけてるようです。

目次

Blog 利用状況

記事分類

過去の記事

タグ

テキストから音声合成 (Text-to-Speech) - Speech API

Sppech API を使って、テキストから音声合成を行うクラスが、倉庫から発掘されましたw
倉庫に閉まったままではもったいないので、公開しておきます。


---
#pragma once

#include <initguid.h>
#include "speech.h"        // SAPI4.0

/////////////////////////////////////////////////////////////////////////////
// スマートポインタ

_COM_SMARTPTR_TYPEDEF(IAttributes, IID_IAttributes);
_COM_SMARTPTR_TYPEDEF(ISpchError, IID_ISpchError);
_COM_SMARTPTR_TYPEDEF(ILexPronounce, IID_ILexPronounce);
_COM_SMARTPTR_TYPEDEF(ILexPronounce2, IID_ILexPronounce2);
_COM_SMARTPTR_TYPEDEF(ISTMicWizard, IID_ISTMicWizard);
_COM_SMARTPTR_TYPEDEF(ISTLexDlg, IID_ISTLexDlg);
_COM_SMARTPTR_TYPEDEF(ISTPhoneConv, IID_ISTPhoneConv);
_COM_SMARTPTR_TYPEDEF(IAudio, IID_IAudio);
_COM_SMARTPTR_TYPEDEF(IAudioDest, IID_IAudioDest);
_COM_SMARTPTR_TYPEDEF(IAudioDestNotifySink, IID_IAudioDestNotifySink);
_COM_SMARTPTR_TYPEDEF(IAudioMultiMediaDevice, IID_IAudioMultiMediaDevice);
_COM_SMARTPTR_TYPEDEF(IAudioSource, IID_IAudioSource);
_COM_SMARTPTR_TYPEDEF(IAudioSourceInstrumented, IID_IAudioSourceInstrumented);
_COM_SMARTPTR_TYPEDEF(IAudioSourceSusp, IID_IAudioSourceSusp);
_COM_SMARTPTR_TYPEDEF(IAudioSourceNotifySink, IID_IAudioSourceNotifySink);
_COM_SMARTPTR_TYPEDEF(IAudioDirect, IID_IAudioDirect);
_COM_SMARTPTR_TYPEDEF(ISRAttributes, IID_ISRAttributes);
_COM_SMARTPTR_TYPEDEF(ISRCentral, IID_ISRCentral);
_COM_SMARTPTR_TYPEDEF(ISRDialogs, IID_ISRDialogs);
_COM_SMARTPTR_TYPEDEF(ISRDialogs2, IID_ISRDialogs2);
_COM_SMARTPTR_TYPEDEF(ISREnum, IID_ISREnum);
_COM_SMARTPTR_TYPEDEF(ISRFind, IID_ISRFind);
_COM_SMARTPTR_TYPEDEF(ISRGramLexPron, IID_ISRGramLexPron);
_COM_SMARTPTR_TYPEDEF(ISRGramCommon, IID_ISRGramCommon);
_COM_SMARTPTR_TYPEDEF(ISRGramCFG, IID_ISRGramCFG);
_COM_SMARTPTR_TYPEDEF(ISRGramDictation, IID_ISRGramDictation);
_COM_SMARTPTR_TYPEDEF(ISRGramInsertionGUI, IID_ISRGramInsertionGUI);
_COM_SMARTPTR_TYPEDEF(ISRGramNotifySink, IID_ISRGramNotifySink);
_COM_SMARTPTR_TYPEDEF(ISRNotifySink, IID_ISRNotifySink);
_COM_SMARTPTR_TYPEDEF(ISRNotifySink2, IID_ISRNotifySink2);
_COM_SMARTPTR_TYPEDEF(ISRResBasic, IID_ISRResBasic);
_COM_SMARTPTR_TYPEDEF(ISRResScores, IID_ISRResScores);
_COM_SMARTPTR_TYPEDEF(ISRResMerge, IID_ISRResMerge);
_COM_SMARTPTR_TYPEDEF(ISRResAudio, IID_ISRResAudio);
_COM_SMARTPTR_TYPEDEF(ISRResAudioEx, IID_ISRResAudioEx);
_COM_SMARTPTR_TYPEDEF(ISRResCorrection, IID_ISRResCorrection);
_COM_SMARTPTR_TYPEDEF(ISRResEval, IID_ISRResEval);
_COM_SMARTPTR_TYPEDEF(ISRResGraph, IID_ISRResGraph);
_COM_SMARTPTR_TYPEDEF(ISRResGraphEx, IID_ISRResGraphEx);
_COM_SMARTPTR_TYPEDEF(ISRResMemory, IID_ISRResMemory);
_COM_SMARTPTR_TYPEDEF(ISRResModifyGUI, IID_ISRResModifyGUI);
_COM_SMARTPTR_TYPEDEF(ISRResSpeaker, IID_ISRResSpeaker);
_COM_SMARTPTR_TYPEDEF(ISRSpeaker, IID_ISRSpeaker);
_COM_SMARTPTR_TYPEDEF(ISRSpeaker2, IID_ISRSpeaker2);
_COM_SMARTPTR_TYPEDEF(ITTSAttributes, IID_ITTSAttributes);
_COM_SMARTPTR_TYPEDEF(ITTSBufNotifySink, IID_ITTSBufNotifySink);
_COM_SMARTPTR_TYPEDEF(ITTSCentral, IID_ITTSCentral);
_COM_SMARTPTR_TYPEDEF(ITTSDialogs, IID_ITTSDialogs);
_COM_SMARTPTR_TYPEDEF(ITTSEnum, IID_ITTSEnum);
_COM_SMARTPTR_TYPEDEF(ITTSFind, IID_ITTSFind);
_COM_SMARTPTR_TYPEDEF(ITTSNotifySink, IID_ITTSNotifySink);
_COM_SMARTPTR_TYPEDEF(ITTSNotifySink2, IID_ITTSNotifySink2);
_COM_SMARTPTR_TYPEDEF(IVCmdNotifySink, IID_IVCmdNotifySink);
_COM_SMARTPTR_TYPEDEF(IVCmdEnum, IID_IVCmdEnum);
_COM_SMARTPTR_TYPEDEF(IEnumSRShare, IID_IEnumSRShare);
_COM_SMARTPTR_TYPEDEF(IVCmdMenu, IID_IVCmdMenu);
_COM_SMARTPTR_TYPEDEF(IVoiceCmd, IID_IVoiceCmd);
_COM_SMARTPTR_TYPEDEF(IVCmdAttributes, IID_IVCmdAttributes);
_COM_SMARTPTR_TYPEDEF(IVCmdDialogs, IID_IVCmdDialogs);
_COM_SMARTPTR_TYPEDEF(IVDctNotifySink, IID_IVDctNotifySink);
_COM_SMARTPTR_TYPEDEF(IVDctNotifySink2, IID_IVDctNotifySink2);
_COM_SMARTPTR_TYPEDEF(IVoiceDictation, IID_IVoiceDictation);
_COM_SMARTPTR_TYPEDEF(IVDctText, IID_IVDctText);
_COM_SMARTPTR_TYPEDEF(IVDctText2, IID_IVDctText2);
_COM_SMARTPTR_TYPEDEF(IVDctTextCache, IID_IVDctTextCache);
_COM_SMARTPTR_TYPEDEF(IVDctInvTextNorm, IID_IVDctInvTextNorm);
_COM_SMARTPTR_TYPEDEF(IVDctAttributes, IID_IVDctAttributes);
_COM_SMARTPTR_TYPEDEF(IVDctCommandsBuiltIn, IID_IVDctCommandsBuiltIn);
_COM_SMARTPTR_TYPEDEF(IVDctCommandsApp, IID_IVDctCommandsApp);
_COM_SMARTPTR_TYPEDEF(IVDctCommands, IID_IVDctCommands);
_COM_SMARTPTR_TYPEDEF(IVDctGlossary, IID_IVDctGlossary);
_COM_SMARTPTR_TYPEDEF(IVDctDialogs, IID_IVDctDialogs);
_COM_SMARTPTR_TYPEDEF(IVDctGUI, IID_IVDctGUI);
_COM_SMARTPTR_TYPEDEF(IVTxtNotifySink, IID_IVTxtNotifySink);
_COM_SMARTPTR_TYPEDEF(IVoiceText, IID_IVoiceText);
_COM_SMARTPTR_TYPEDEF(IVTxtAttributes, IID_IVTxtAttributes);
_COM_SMARTPTR_TYPEDEF(IVTxtDialogs, IID_IVTxtDialogs);
_COM_SMARTPTR_TYPEDEF(ISTRecord, IID_ISTRecord);
_COM_SMARTPTR_TYPEDEF(ISTRecordNotifySink, IID_ISTRecordNotifySink);
_COM_SMARTPTR_TYPEDEF(ISTGramComp, IID_ISTGramComp);
_COM_SMARTPTR_TYPEDEF(ISTTTSQueue, IID_ISTTTSQueue);
_COM_SMARTPTR_TYPEDEF(ISTTTSQueueNotifySink, IID_ISTTTSQueueNotifySink);
_COM_SMARTPTR_TYPEDEF(ISTLog, IID_ISTLog);
_COM_SMARTPTR_TYPEDEF(IAudioSourceLog, IID_IAudioSourceLog);
_COM_SMARTPTR_TYPEDEF(IAudioTel, IID_IAudioTel);
_COM_SMARTPTR_TYPEDEF(ITTSExternalSynthesizer, IID_ITTSExternalSynthesizer);
_COM_SMARTPTR_TYPEDEF(IAudioExternalSynthesizer, IID_IAudioExternalSynthesizer);


/////////////////////////////////////////////////////////////////////////////
// CTTSModeInfo

class CTTSModeInfo : public TTSMODEINFO
{
public:
 CTTSModeInfo()
 {
  ::ZeroMemory(this, sizeof(TTSMODEINFO));
 }
 CTTSModeInfo(LANGUAGE& newLanguage)
 {
  ::ZeroMemory(this, sizeof(TTSMODEINFO));
  language = newLanguage;
 }
};


/////////////////////////////////////////////////////////////////////////////
// CLanguage

class CLanguage : public LANGUAGE
{
public:
 CLanguage()
 {
  ::ZeroMemory(this, sizeof(LANGUAGE));
 }
 CLanguage(LANGID newLanguageID)
 {
  ::ZeroMemory(this, sizeof(LANGUAGE));
  LanguageID = newLanguageID;
 }
};


/////////////////////////////////////////////////////////////////////////////
// CTextToSpeech
class CTextToSpeech
{
public:
 CTextToSpeech()
 {
  // 何もしない。。
 }
 virtual ~CTextToSpeech()
 {
  this->Term();
 }

public:
 HRESULT Init()
 {
  CTTSModeInfo info;
  return this->Init(info);
 }
 HRESULT Init(LANGUAGE& newLanguage)
 {
  CTTSModeInfo info(newLanguage);
  return this->Init(info);
 }
 HRESULT Init(CTTSModeInfo& info)
 {
  HRESULT hr = S_OK;
  try
  {
   ITTSFindPtr spITTSFind;
   hr = spITTSFind.CreateInstance(CLSID_TTSEnumerator);
   if (FAILED(hr))
   {
    return hr;
   }
   CTTSModeInfo result;
   hr = spITTSFind->Find(&info, NULL, &result);
   if (FAILED(hr))
   {
    // エンジンが見つからなかった。。
    return hr;
   }
   // Get the audio dest
   IAudioMultiMediaDevicePtr spAudioMultiMediaDevice;
   hr = spAudioMultiMediaDevice.CreateInstance(CLSID_MMAudioDest);
   if (FAILED(hr))
   {
    return hr;
   }
   hr = spAudioMultiMediaDevice->DeviceNumSet(WAVE_MAPPER);
   if (FAILED(hr))
   {
    return hr;
   }
   // Should do select now
   hr = spITTSFind->Select(result.gModeID, &m_spITTSCentral, (LPUNKNOWN)spAudioMultiMediaDevice);
   if (FAILED(hr))
   {
    return hr;
   }
  }
  catch (...)
  {
   return E_FAIL;
  }
  return hr;
 }
 void Term()
 {
  if (m_spITTSCentral == NULL)
  {
   return;
  }
  m_spITTSCentral = NULL;
 }

public:
 bool IsAvailable()
 {
  return (m_spITTSCentral == NULL) ? false : true;
 }
 HRESULT TextToSpeech(LPCTSTR szText)
 {
  USES_CONVERSION;

  if (m_spITTSCentral == NULL)
  {
   return E_FAIL;
  }

  HRESULT hr = S_OK;

  try
  {
   SDATA data;
   ::ZeroMemory(&data, sizeof(SDATA));
   data.pData = (LPVOID)szText;
   data.dwSize = _tcslen(szText) * sizeof(TCHAR);

   hr = m_spITTSCentral->TextData(CHARSET_TEXT, 0, data, NULL, IID_ITTSBufNotifySink);
   if (FAILED(hr))
   {
    return hr;
   }
  }
  catch (...)
  {
   return E_FAIL;
  }
  return hr;
 }

protected:
 ITTSCentralPtr m_spITTSCentral;
};

投稿日時 : 2006年7月5日 15:10


コメントを追加

タイトル
名前
URL
コメント