FileDocCategorySizeDatePackage
Recognizer.javaAPI DocAndroid 5.1 API27573Thu Mar 12 22:22:10 GMT 2015android.speech.srec

Recognizer

public final class Recognizer extends Object
Simple, synchronous speech recognizer, using the Nuance SREC package. Usages proceeds as follows:
  • Create a Recognizer.
  • Create a Recognizer.Grammar.
  • Setup the Recognizer.Grammar.
  • Reset the Recognizer.Grammar slots, if needed.
  • Fill the Recognizer.Grammar slots, if needed.
  • Compile the Recognizer.Grammar, if needed.
  • Save the filled Recognizer.Grammar, if needed.
  • Start the Recognizer.
  • Loop over advance and putAudio until recognition complete.
  • Fetch and process results, or notify of failure.
  • Stop the Recognizer.
  • Destroy the Recognizer.

Below is example code


// create and start audio input
InputStream audio = new MicrophoneInputStream(11025, 11025*5);
// create a Recognizer
String cdir = Recognizer.getConfigDir(null);
Recognizer recognizer = new Recognizer(cdir + "/baseline11k.par");
// create and load a Grammar
Recognizer.Grammar grammar = recognizer.new Grammar(cdir + "/grammars/VoiceDialer.g2g");
// setup the Grammar to work with the Recognizer
grammar.setupRecognizer();
// fill the Grammar slots with names and save, if required
grammar.resetAllSlots();
for (String name : names) grammar.addWordToSlot("@Names", name, null, 1, "V=1");
grammar.compile();
grammar.save(".../foo.g2g");
// start the Recognizer
recognizer.start();
// loop over Recognizer events
while (true) {
switch (recognizer.advance()) {
case Recognizer.EVENT_INCOMPLETE:
case Recognizer.EVENT_STARTED:
case Recognizer.EVENT_START_OF_VOICING:
case Recognizer.EVENT_END_OF_VOICING:
// let the Recognizer continue to run
continue;
case Recognizer.EVENT_RECOGNITION_RESULT:
// success, so fetch results here!
for (int i = 0; i < recognizer.getResultCount(); i++) {
String result = recognizer.getResult(i, Recognizer.KEY_LITERAL);
}
break;
case Recognizer.EVENT_NEED_MORE_AUDIO:
// put more audio in the Recognizer
recognizer.putAudio(audio);
continue;
default:
notifyFailure();
break;
}
break;
}
// stop the Recognizer
recognizer.stop();
// destroy the Recognizer
recognizer.destroy();
// stop the audio device
audio.close();

Fields Summary
private static String
TAG
public static final String
KEY_CONFIDENCE
Result key corresponding to confidence score.
public static final String
KEY_LITERAL
Result key corresponding to literal text.
public static final String
KEY_MEANING
Result key corresponding to semantic meaning text.
private long
mVocabulary
private long
mRecognizer
private Grammar
mActiveGrammar
private byte[]
mPutAudioBuffer
public static final int
EVENT_INVALID
Reserved value.
public static final int
EVENT_NO_MATCH
Recognizer could not find a match for the utterance.
public static final int
EVENT_INCOMPLETE
Recognizer processed one frame of audio.
public static final int
EVENT_STARTED
Recognizer has just been started.
public static final int
EVENT_STOPPED
Recognizer is stopped.
public static final int
EVENT_START_OF_VOICING
Beginning of speech detected.
public static final int
EVENT_END_OF_VOICING
End of speech detected.
public static final int
EVENT_SPOKE_TOO_SOON
Beginning of utterance occured too soon.
public static final int
EVENT_RECOGNITION_RESULT
Recognition match detected.
public static final int
EVENT_START_OF_UTTERANCE_TIMEOUT
Timeout occured before beginning of utterance.
public static final int
EVENT_RECOGNITION_TIMEOUT
Timeout occured before speech recognition could complete.
public static final int
EVENT_NEED_MORE_AUDIO
Not enough samples to process one frame.
public static final int
EVENT_MAX_SPEECH
More audio encountered than is allowed by 'swirec_max_speech_duration'.
Constructors Summary
public Recognizer(String configFile)
Create an instance of a SREC speech recognizer.

param
configFile pathname of the baseline*.par configuration file, which in turn contains references to dictionaries, speech models, and other data needed to configure and operate the recognizer. A separate config file is needed for each audio sample rate. Two files, baseline11k.par and baseline8k.par, which correspond to 11025 and 8000 hz, are present in the directory indicated by {@link #getConfigDir}.
throws
IOException

        PMemInit();
        SR_SessionCreate(configFile);
        mRecognizer = SR_RecognizerCreate();
        SR_RecognizerSetup(mRecognizer);
        mVocabulary = SR_VocabularyLoad();
    
Methods Summary
private static native voidPMemInit()

private static native voidPMemShutdown()

private static native java.lang.StringSR_AcousticStateGet(long recognizer)

private static native voidSR_AcousticStateReset(long recognizer)

private static native voidSR_AcousticStateSet(long recognizer, java.lang.String state)

private static native voidSR_GrammarAddWordToSlot(long grammar, java.lang.String slot, java.lang.String word, java.lang.String pronunciation, int weight, java.lang.String tag)

private static native voidSR_GrammarAllowAll(long grammar)

private static native voidSR_GrammarAllowOnly(long grammar, java.lang.String transcription)

private static native voidSR_GrammarCompile(long grammar)

private static native longSR_GrammarCreate()

private static native voidSR_GrammarDestroy(long grammar)

private static native longSR_GrammarLoad(java.lang.String filename)

private static native voidSR_GrammarResetAllSlots(long grammar)

private static native voidSR_GrammarSave(long grammar, java.lang.String filename)

private static native voidSR_GrammarSetupRecognizer(long grammar, long recognizer)

private static native voidSR_GrammarSetupVocabulary(long grammar, long vocabulary)

private static native voidSR_GrammarUnsetupRecognizer(long grammar)

private static native voidSR_RecognizerActivateRule(long recognizer, long grammar, java.lang.String ruleName, int weight)

private static native intSR_RecognizerAdvance(long recognizer)

private static native booleanSR_RecognizerCheckGrammarConsistency(long recognizer, long grammar)

private static native longSR_RecognizerCreate()

private static native voidSR_RecognizerDeactivateAllRules(long recognizer)

private static native voidSR_RecognizerDeactivateRule(long recognizer, long grammar, java.lang.String ruleName)

private static native voidSR_RecognizerDestroy(long recognizer)

private static native booleanSR_RecognizerGetBoolParameter(long recognizer, java.lang.String key)

private static native java.lang.StringSR_RecognizerGetParameter(long recognizer, java.lang.String key)

private static native intSR_RecognizerGetSize_tParameter(long recognizer, java.lang.String key)

private static native booleanSR_RecognizerHasSetupRules(long recognizer)

private static native booleanSR_RecognizerIsActiveRule(long recognizer, long grammar, java.lang.String ruleName)

private static native booleanSR_RecognizerIsSetup(long recognizer)

private static native booleanSR_RecognizerIsSignalClipping(long recognizer)

private static native booleanSR_RecognizerIsSignalDCOffset(long recognizer)

private static native booleanSR_RecognizerIsSignalNoisy(long recognizer)

private static native booleanSR_RecognizerIsSignalTooFewSamples(long recognizer)

private static native booleanSR_RecognizerIsSignalTooManySamples(long recognizer)

private static native booleanSR_RecognizerIsSignalTooQuiet(long recognizer)

private static native intSR_RecognizerPutAudio(long recognizer, byte[] buffer, int offset, int length, boolean isLast)

private static native intSR_RecognizerResultGetKeyCount(long recognizer, int nbest)

private static native java.lang.String[]SR_RecognizerResultGetKeyList(long recognizer, int nbest)

private static native intSR_RecognizerResultGetSize(long recognizer)

private static native java.lang.StringSR_RecognizerResultGetValue(long recognizer, int nbest, java.lang.String key)

private static native byte[]SR_RecognizerResultGetWaveform(long recognizer)

private static native voidSR_RecognizerSetBoolParameter(long recognizer, java.lang.String key, boolean value)

private static native voidSR_RecognizerSetParameter(long recognizer, java.lang.String key, java.lang.String value)

private static native voidSR_RecognizerSetSize_tParameter(long recognizer, java.lang.String key, int value)

private static native voidSR_RecognizerSetup(long recognizer)

private static native voidSR_RecognizerSetupRule(long recognizer, long grammar, java.lang.String ruleName)

private static native voidSR_RecognizerStart(long recognizer)

private static native voidSR_RecognizerStop(long recognizer)

private static native voidSR_RecognizerUnsetup(long recognizer)

private static native voidSR_SessionCreate(java.lang.String filename)

private static native voidSR_SessionDestroy()

private static native voidSR_VocabularyDestroy(long vocabulary)

private static native java.lang.StringSR_VocabularyGetPronunciation(long vocabulary, java.lang.String word)

private static native longSR_VocabularyLoad()

public intadvance()
Process some audio and return the current status.

return
recognition event, one of:
  • EVENT_INVALID
  • EVENT_NO_MATCH
  • EVENT_INCOMPLETE
  • EVENT_STARTED
  • EVENT_STOPPED
  • EVENT_START_OF_VOICING
  • EVENT_END_OF_VOICING
  • EVENT_SPOKE_TOO_SOON
  • EVENT_RECOGNITION_RESULT
  • EVENT_START_OF_UTTERANCE_TIMEOUT
  • EVENT_RECOGNITION_TIMEOUT
  • EVENT_NEED_MORE_AUDIO
  • EVENT_MAX_SPEECH

        return SR_RecognizerAdvance(mRecognizer);
    
public voiddestroy()
Clean up resources.

        try {
            if (mVocabulary != 0) SR_VocabularyDestroy(mVocabulary);
        } finally {
            mVocabulary = 0;
            try {
                if (mRecognizer != 0) SR_RecognizerUnsetup(mRecognizer);
            } finally {
                try {
                    if (mRecognizer != 0) SR_RecognizerDestroy(mRecognizer);
                } finally {
                    mRecognizer = 0;
                    try {
                        SR_SessionDestroy();
                    } finally {
                        PMemShutdown();
                    }
                }
            }
        }
    
public static java.lang.StringeventToString(int event)
Produce a displayable string from an advance event.

param
event
return
String representing the event.


                        
         
        switch (event) {
            case EVENT_INVALID:
                return "EVENT_INVALID";
            case EVENT_NO_MATCH:
                return "EVENT_NO_MATCH";
            case EVENT_INCOMPLETE:
                return "EVENT_INCOMPLETE";
            case EVENT_STARTED:
                return "EVENT_STARTED";
            case EVENT_STOPPED:
                return "EVENT_STOPPED";
            case EVENT_START_OF_VOICING:
                return "EVENT_START_OF_VOICING";
            case EVENT_END_OF_VOICING:
                return "EVENT_END_OF_VOICING";
            case EVENT_SPOKE_TOO_SOON:
                return "EVENT_SPOKE_TOO_SOON";
            case EVENT_RECOGNITION_RESULT:
                return "EVENT_RECOGNITION_RESULT";
            case EVENT_START_OF_UTTERANCE_TIMEOUT:
                return "EVENT_START_OF_UTTERANCE_TIMEOUT";
            case EVENT_RECOGNITION_TIMEOUT:
                return "EVENT_RECOGNITION_TIMEOUT";
            case EVENT_NEED_MORE_AUDIO:
                return "EVENT_NEED_MORE_AUDIO";
            case EVENT_MAX_SPEECH:
                return "EVENT_MAX_SPEECH";
        }
        return "EVENT_" + event;
    
protected voidfinalize()
Clean up resources.

        if (mVocabulary != 0 || mRecognizer != 0) {
            destroy();
            throw new IllegalStateException("someone forgot to destroy Recognizer");
        }
    
public java.lang.StringgetAcousticState()
Get the acoustic state vector.

return
String containing the acoustic state vector.
hide

        return SR_AcousticStateGet(mRecognizer);
    
public static java.lang.StringgetConfigDir(java.util.Locale locale)
Get the pathname of the SREC configuration directory corresponding to the language indicated by the Locale. This directory contains dictionaries, speech models, configuration files, and other data needed by the Recognizer.

param
locale Locale corresponding to the desired language, or null for default, currently Locale.US.
return
Pathname of the configuration directory.

    
                                                            
         
        if (locale == null) locale = Locale.US;
        String dir = "/system/usr/srec/config/" +
                locale.toString().replace('_", '.").toLowerCase(Locale.ROOT);
        if ((new File(dir)).isDirectory()) return dir;
        return null;
    
public java.lang.StringgetResult(int index, java.lang.String key)
Get a result value. Must be called after EVENT_RECOGNITION_RESULT is returned by advance, but before stop.

param
index index of the result.
param
key key of the result. This is typically one of KEY_CONFIDENCE, KEY_LITERAL, or KEY_MEANING, but the user can also define their own keys in a grxml file, or in the tag slot of Grammar.addWordToSlot.
return
the result.

        return SR_RecognizerResultGetValue(mRecognizer, index, key);
    
public intgetResultCount()
Get the number of recognition results. Must be called after EVENT_RECOGNITION_RESULT is returned by advance, but before stop.

return
number of results in nbest list.


                                    
       
        return SR_RecognizerResultGetSize(mRecognizer);
    
public java.lang.String[]getResultKeys(int index)
Get a set of keys for the result. Must be called after EVENT_RECOGNITION_RESULT is returned by advance, but before stop.

param
index index of result.
return
array of keys.

        return SR_RecognizerResultGetKeyList(mRecognizer, index);
    
public intputAudio(byte[] buf, int offset, int length, boolean isLast)
Put audio samples into the Recognizer.

param
buf holds the audio samples.
param
offset offset of the first sample.
param
length number of bytes containing samples.
param
isLast indicates no more audio data, normally false.
return
number of bytes accepted.

        return SR_RecognizerPutAudio(mRecognizer, buf, offset, length, isLast);
    
public voidputAudio(java.io.InputStream audio)
Read audio samples from an InputStream and put them in the Recognizer.

param
audio InputStream containing PCM audio samples.

        // make sure the audio buffer is allocated
        if (mPutAudioBuffer == null) mPutAudioBuffer = new byte[512];
        // read some data
        int nbytes = audio.read(mPutAudioBuffer);
        // eof, so signal Recognizer
        if (nbytes == -1) {
            SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, 0, true);
        }
        // put it into the Recognizer
        else if (nbytes != SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, nbytes, false)) {
            throw new IOException("SR_RecognizerPutAudio failed nbytes=" + nbytes);
        }
    
public voidresetAcousticState()
Reset the acoustic state vectorto it's default value.

hide

        SR_AcousticStateReset(mRecognizer);
    
public voidsetAcousticState(java.lang.String state)
Set the acoustic state vector.

param
state String containing the acoustic state vector.
hide

        SR_AcousticStateSet(mRecognizer, state);
    
public voidstart()
Start recognition

        // TODO: shouldn't be here?
        SR_RecognizerActivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash", 1);
        SR_RecognizerStart(mRecognizer);
    
public voidstop()
Stop the Recognizer.

        SR_RecognizerStop(mRecognizer);
        SR_RecognizerDeactivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash");