作者:手机用户2602880745 | 来源:互联网 | 2023-09-25 11:52
以下代码来自:http://mmcheng.net/zh/imagespirit/
本人仅作提取:
SREngine语音识别引擎封装类:
#pragma once/************************************************************************/
/* Notice: this project is used to support speech recognition of my */
/* ImageSpirit project. Please see the corresponding paper for more */
/* details. The CORE part of ImageSpirit system will be made public */
/* available soon. More resource: http://mmcheng.net/imagespirit/ */
/* ImageSpirit: Verbal Guided Image Parsing. M.-M. Cheng, S. Zheng, */
/* W.-Y. Lin, V. Vineet, P. Sturgess, N. Crook, N. Mitra, P. Torr, */
/* ACM TOG, 2014. */
/************************************************************************/#include // Microsoft Speech API
#pragma comment(lib,"SAPI.lib")class SREngine
{
public://speech variableCComPtr m_cpRecognizer;CComPtr m_cpRecoContext;CComPtr m_cpCmdGrammar;//audio variableCComPtr m_cpAudio;// Const valuesstatic const UINT WM_RECOEVENT = WM_USER+100;static const UINT MYGRAMMARID = 101;public:HRESULT SetRuleState(const WCHAR * pszRuleName = NULL, BOOL fActivate = SPRS_ACTIVE);HRESULT LoadCmdFromFile(const WCHAR * xmlFileName);HRESULT InitializeSapi(HWND hWnd, UINT Msg = WM_RECOEVENT, const WCHAR *xmlFileName = NULL);
};
#include "stdafx.h"
#include "SREngine.h"HRESULT SREngine::InitializeSapi(HWND hWnd, UINT Msg, const WCHAR *xmlFileName)
{HRESULT hr = S_OK;const ULONGLONG ullInterest = SPFEI(SPEI_SOUND_START) | SPFEI(SPEI_SOUND_END) | SPFEI(SPEI_PHRASE_START) | SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_FALSE_RECOGNITION) | SPFEI(SPEI_HYPOTHESIS) | SPFEI(SPEI_INTERFERENCE) | SPFEI(SPEI_RECO_OTHER_CONTEXT) | SPFEI(SPEI_REQUEST_UI) | SPFEI(SPEI_RECO_STATE_CHANGE) | SPFEI(SPEI_PROPERTY_NUM_CHANGE) | SPFEI(SPEI_PROPERTY_STRING_CHANGE);V_RETURN(m_cpRecognizer.CoCreateInstance( CLSID_SpInprocRecognizer));V_RETURN(SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &m_cpAudio));V_RETURN(m_cpRecognizer ->SetInput(m_cpAudio, TRUE)); V_RETURN(m_cpRecognizer->CreateRecoContext(&m_cpRecoContext)); V_RETURN(m_cpRecoContext->SetNotifyWindowMessage(hWnd, Msg, 0, 0));V_RETURN(m_cpRecoContext->SetInterest(ullInterest, ullInterest)); if (xmlFileName != NULL)return LoadCmdFromFile(xmlFileName);return hr;
}HRESULT SREngine::LoadCmdFromFile(const WCHAR *xmlFileName)
{HRESULT hr = S_OK;if (m_cpCmdGrammar != NULL)return hr;V_RETURN(m_cpRecoContext ->CreateGrammar(MYGRAMMARID, &m_cpCmdGrammar)); //Command and control---C&CV_RETURN(m_cpCmdGrammar->LoadCmdFromFile(xmlFileName, SPLO_DYNAMIC));return hr;
}HRESULT SREngine::SetRuleState(const WCHAR *pszRuleName, BOOL fActivate)
{return m_cpCmdGrammar ->SetRuleState(pszRuleName, NULL, fActivate ? SPRS_ACTIVE : SPRS_INACTIVE);
}
界面操作展示类:
#ifndef SPRECOUI_H
#define SPRECOUI_H//#include "ui_SpRecoUI.h"
#include "ui_ImageSpirit.h"class SpRecoUI : public QMainWindow
{Q_OBJECTpublic:SpRecoUI(QWidget *parent = 0, Qt::WindowFlags flags = 0);~SpRecoUI();LRESULT OnRecoEvent();bool nativeEvent(const QByteArray &eventType, void *message, long *result);private slots:void onVoiceStart();void onVoiceStop();private:Ui::SpRecoUIClass ui;// For speech recognitionbool m_bSoundEnd, m_bSoundStart;SREngine m_SREngine;void Recognized(CSpEvent &spEvent);
};#endif // SPRECOUI_H
#include "stdafx.h"
#include "SpRecoUI.h"#include SpRecoUI::SpRecoUI(QWidget *parent, Qt::WindowFlags flags): QMainWindow(parent, flags)
{ui.setupUi(this);connect(ui.pbVoiceInput, SIGNAL(pressed()), this, SLOT(onVoiceStart()));connect(ui.pbVoiceInput, SIGNAL(released()), this, SLOT(onVoiceStop())); //SAPI initm_bSoundStart = false;m_bSoundEnd = false; if(FAILED(m_SREngine.InitializeSapi((HWND)this->winId(), SREngine::WM_RECOEVENT, L"./SpeechGrammar.xml")))QMessageBox::information(NULL, "Error", "Initialize speech engine failed!", MB_OK);
}SpRecoUI::~SpRecoUI()
{}void SpRecoUI::onVoiceStart()
{VERIFY_RES(m_SREngine.SetRuleState(NULL, TRUE));setWindowTitle("Sound started");
}void SpRecoUI::onVoiceStop()
{VERIFY_RES(m_SREngine.SetRuleState(NULL, FALSE));setWindowTitle("Sound stopped");
}bool SpRecoUI::nativeEvent(const QByteArray &eventType, void *message, long *result)
{MSG* pMsg = (MSG*) message;setWindowTitle("Control - Debug: winEvent");if(pMsg->message == SREngine::WM_RECOEVENT)*result = this->OnRecoEvent();return false;
}// Speech Recognition Event Process
LRESULT SpRecoUI::OnRecoEvent()
{if (m_SREngine.m_cpRecoCOntext== NULL)return FALSE;CSpEvent spEvent;HRESULT hr = S_OK;while(spEvent.GetFrom(m_SREngine.m_cpRecoContext) == S_OK) {setWindowTitle("Control - Debug");switch(spEvent.eEventId){case SPEI_SOUND_START: m_bSoundStart = true; break;case SPEI_SOUND_END: m_bSoundEnd = true; break;case SPEI_RECOGNITION: if (m_bSoundStart && m_bSoundEnd) Recognized(spEvent); break;}}return TRUE;
}void SpRecoUI::Recognized(CSpEvent &spEvent)
{USES_CONVERSION;CComPtr cpResult = spEvent.RecoResult();CSpDynamicString dstrText;cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);QString strResult = dstrText.CopyToChar(); // W2T(dstrText);SPPHRASE* pPhrase = NULL;if (SUCCEEDED(cpResult->GetPhrase(&pPhrase))){strResult += tr(" RuleName:") + QString::fromStdWString(pPhrase->Rule.pszName);strResult += tr(" PropName:") + QString::fromStdWString(pPhrase->pProperties->pszName); if (pPhrase->pProperties->pNextSibling)strResult += tr(" Sibling:") + QString::fromStdWString(pPhrase->pProperties->pNextSibling->pszName);if (pPhrase->pProperties->pFirstChild)strResult += tr(" Child:") + QString::fromStdWString(pPhrase->pProperties->pFirstChild->pszName);}if (pPhrase)::CoTaskMemFree(pPhrase); ui.textEdit->insertPlainText(strResult+"\n");
}
SpeechGrammar XML语音命令字配置:
+Activate -the
+Make -the
+Change -the
-from-to
+Change -the
-from-to
Move
Repeat
-the
along -the
wall
floor
picture
cabinet
chair
table
window
door
ceiling
lamp
bed
desk
monitor
car
blinds
television
sofa
counter
black
blue
brown
gray
green
orange
pink
purple
red
white
yellow
-in
top-left
top-middle
top-right
center-left
center-middle
center-right
bottom-left
bottom-middle
bottom-right
wood
painted
cotton
paper
glass
brick
metal
leather
plastic
up
down
left
right
lowwer
taller
smaller
larger