FileDocCategorySizeDatePackage
Recognizer.javaAPI DocAndroid 1.5 API27542Wed May 06 22:41:56 BST 2009android.speech.srec

Recognizer

public final class Recognizer extends Object
Simple, synchronous speech recognizer, using the Nuance SREC package. Usages proceeds as follows:
  • Create a Recognizer.
  • Create a Recognizer.Grammar.
  • Setup the Recognizer.Grammar.
  • Reset the Recognizer.Grammar slots, if needed.
  • Fill the Recognizer.Grammar slots, if needed.
  • Compile the Recognizer.Grammar, if needed.
  • Save the filled Recognizer.Grammar, if needed.
  • Start the Recognizer.
  • Loop over advance and putAudio until recognition complete.
  • Fetch and process results, or notify of failure.
  • Stop the Recognizer.
  • Destroy the Recognizer.

Below is example code


// create and start audio input
InputStream audio = new MicrophoneInputStream(11025, 11025*5);
// create a Recognizer
String cdir = Recognizer.getConfigDir(null);
Recognizer recognizer = new Recognizer(cdir + "/baseline11k.par");
// create and load a Grammar
Recognizer.Grammar grammar = recognizer.new Grammar(cdir + "/grammars/VoiceDialer.g2g");
// setup the Grammar to work with the Recognizer
grammar.setupRecognizer();
// fill the Grammar slots with names and save, if required
grammar.resetAllSlots();
for (String name : names) grammar.addWordToSlot("@Names", name, null, 1, "V=1");
grammar.compile();
grammar.save(".../foo.g2g");
// start the Recognizer
recognizer.start();
// loop over Recognizer events
while (true) {
switch (recognizer.advance()) {
case Recognizer.EVENT_INCOMPLETE:
case Recognizer.EVENT_STARTED:
case Recognizer.EVENT_START_OF_VOICING:
case Recognizer.EVENT_END_OF_VOICING:
// let the Recognizer continue to run
continue;
case Recognizer.EVENT_RECOGNITION_RESULT:
// success, so fetch results here!
for (int i = 0; i < recognizer.getResultCount(); i++) {
String result = recognizer.getResult(i, Recognizer.KEY_LITERAL);
}
break;
case Recognizer.EVENT_NEED_MORE_AUDIO:
// put more audio in the Recognizer
recognizer.putAudio(audio);
continue;
default:
notifyFailure();
break;
}
break;
}
// stop the Recognizer
recognizer.stop();
// destroy the Recognizer
recognizer.destroy();
// stop the audio device
audio.close();

Fields Summary
private static String
TAG
public static final String
KEY_CONFIDENCE
Result key corresponding to confidence score.
public static final String
KEY_LITERAL
Result key corresponding to literal text.
public static final String
KEY_MEANING
Result key corresponding to semantic meaning text.
private int
mVocabulary
private int
mRecognizer
private Grammar
mActiveGrammar
private byte[]
mPutAudioBuffer
public static final int
EVENT_INVALID
Reserved value.
public static final int
EVENT_NO_MATCH
Recognizer could not find a match for the utterance.
public static final int
EVENT_INCOMPLETE
Recognizer processed one frame of audio.
public static final int
EVENT_STARTED
Recognizer has just been started.
public static final int
EVENT_STOPPED
Recognizer is stopped.
public static final int
EVENT_START_OF_VOICING
Beginning of speech detected.
public static final int
EVENT_END_OF_VOICING
End of speech detected.
public static final int
EVENT_SPOKE_TOO_SOON
Beginning of utterance occured too soon.
public static final int
EVENT_RECOGNITION_RESULT
Recognition match detected.
public static final int
EVENT_START_OF_UTTERANCE_TIMEOUT
Timeout occured before beginning of utterance.
public static final int
EVENT_RECOGNITION_TIMEOUT
Timeout occured before speech recognition could complete.
public static final int
EVENT_NEED_MORE_AUDIO
Not enough samples to process one frame.
public static final int
EVENT_MAX_SPEECH
More audio encountered than is allowed by 'swirec_max_speech_duration'.
Constructors Summary
public Recognizer(String configFile)
Create an instance of a SREC speech recognizer.

param
configFile pathname of the baseline*.par configuration file, which in turn contains references to dictionaries, speech models, and other data needed to configure and operate the recognizer. A separate config file is needed for each audio sample rate. Two files, baseline11k.par and baseline8k.par, which correspond to 11025 and 8000 hz, are present in the directory indicated by {@link #getConfigDir}.
throws
IOException

        PMemInit();
        SR_SessionCreate(configFile);
        mRecognizer = SR_RecognizerCreate();
        SR_RecognizerSetup(mRecognizer);
        mVocabulary = SR_VocabularyLoad();
    
Methods Summary
private static native voidPMemInit()

private static native voidPMemShutdown()

private static native java.lang.StringSR_AcousticStateGet(int recognizer)

private static native voidSR_AcousticStateReset(int recognizer)

private static native voidSR_AcousticStateSet(int recognizer, java.lang.String state)

private static native voidSR_GrammarAddWordToSlot(int grammar, java.lang.String slot, java.lang.String word, java.lang.String pronunciation, int weight, java.lang.String tag)

private static native voidSR_GrammarAllowAll(int grammar)

private static native voidSR_GrammarAllowOnly(int grammar, java.lang.String transcription)

private static native voidSR_GrammarCompile(int grammar)

private static native intSR_GrammarCreate()

private static native voidSR_GrammarDestroy(int grammar)

private static native intSR_GrammarLoad(java.lang.String filename)

private static native voidSR_GrammarResetAllSlots(int grammar)

private static native voidSR_GrammarSave(int grammar, java.lang.String filename)

private static native voidSR_GrammarSetupRecognizer(int grammar, int recognizer)

private static native voidSR_GrammarSetupVocabulary(int grammar, int vocabulary)

private static native voidSR_GrammarUnsetupRecognizer(int grammar)

private static native voidSR_RecognizerActivateRule(int recognizer, int grammar, java.lang.String ruleName, int weight)

private static native intSR_RecognizerAdvance(int recognizer)

private static native booleanSR_RecognizerCheckGrammarConsistency(int recognizer, int grammar)

private static native intSR_RecognizerCreate()

private static native voidSR_RecognizerDeactivateAllRules(int recognizer)

private static native voidSR_RecognizerDeactivateRule(int recognizer, int grammar, java.lang.String ruleName)

private static native voidSR_RecognizerDestroy(int recognizer)

private static native booleanSR_RecognizerGetBoolParameter(int recognizer, java.lang.String key)

private static native java.lang.StringSR_RecognizerGetParameter(int recognizer, java.lang.String key)

private static native intSR_RecognizerGetSize_tParameter(int recognizer, java.lang.String key)

private static native booleanSR_RecognizerHasSetupRules(int recognizer)

private static native booleanSR_RecognizerIsActiveRule(int recognizer, int grammar, java.lang.String ruleName)

private static native booleanSR_RecognizerIsSetup(int recognizer)

private static native booleanSR_RecognizerIsSignalClipping(int recognizer)

private static native booleanSR_RecognizerIsSignalDCOffset(int recognizer)

private static native booleanSR_RecognizerIsSignalNoisy(int recognizer)

private static native booleanSR_RecognizerIsSignalTooFewSamples(int recognizer)

private static native booleanSR_RecognizerIsSignalTooManySamples(int recognizer)

private static native booleanSR_RecognizerIsSignalTooQuiet(int recognizer)

private static native intSR_RecognizerPutAudio(int recognizer, byte[] buffer, int offset, int length, boolean isLast)

private static native intSR_RecognizerResultGetKeyCount(int recognizer, int nbest)

private static native java.lang.String[]SR_RecognizerResultGetKeyList(int recognizer, int nbest)

private static native intSR_RecognizerResultGetSize(int recognizer)

private static native java.lang.StringSR_RecognizerResultGetValue(int recognizer, int nbest, java.lang.String key)

private static native byte[]SR_RecognizerResultGetWaveform(int recognizer)

private static native voidSR_RecognizerSetBoolParameter(int recognizer, java.lang.String key, boolean value)

private static native voidSR_RecognizerSetParameter(int recognizer, java.lang.String key, java.lang.String value)

private static native voidSR_RecognizerSetSize_tParameter(int recognizer, java.lang.String key, int value)

private static native voidSR_RecognizerSetup(int recognizer)

private static native voidSR_RecognizerSetupRule(int recognizer, int grammar, java.lang.String ruleName)

private static native voidSR_RecognizerStart(int recognizer)

private static native voidSR_RecognizerStop(int recognizer)

private static native voidSR_RecognizerUnsetup(int recognizer)

private static native voidSR_SessionCreate(java.lang.String filename)

private static native voidSR_SessionDestroy()

private static native voidSR_VocabularyDestroy(int vocabulary)

private static native java.lang.StringSR_VocabularyGetPronunciation(int vocabulary, java.lang.String word)

private static native intSR_VocabularyLoad()

public intadvance()
Process some audio and return the current status.

return
recognition event, one of:
  • EVENT_INVALID
  • EVENT_NO_MATCH
  • EVENT_INCOMPLETE
  • EVENT_STARTED
  • EVENT_STOPPED
  • EVENT_START_OF_VOICING
  • EVENT_END_OF_VOICING
  • EVENT_SPOKE_TOO_SOON
  • EVENT_RECOGNITION_RESULT
  • EVENT_START_OF_UTTERANCE_TIMEOUT
  • EVENT_RECOGNITION_TIMEOUT
  • EVENT_NEED_MORE_AUDIO
  • EVENT_MAX_SPEECH

        return SR_RecognizerAdvance(mRecognizer);
    
public voiddestroy()
Clean up resources.

        try {
            if (mVocabulary != 0) SR_VocabularyDestroy(mVocabulary);
        } finally {
            mVocabulary = 0;
            try {
                if (mRecognizer != 0) SR_RecognizerUnsetup(mRecognizer);
            } finally {
                try {
                    if (mRecognizer != 0) SR_RecognizerDestroy(mRecognizer);
                } finally {
                    mRecognizer = 0;
                    try {
                        SR_SessionDestroy();
                    } finally {
                        PMemShutdown();
                    }
                }
            }
        }
    
public static java.lang.StringeventToString(int event)
Produce a displayable string from an advance event.

param
event
return
String representing the event.


                        
         
        switch (event) {
            case EVENT_INVALID:
                return "EVENT_INVALID";
            case EVENT_NO_MATCH:
                return "EVENT_NO_MATCH";
            case EVENT_INCOMPLETE:
                return "EVENT_INCOMPLETE";
            case EVENT_STARTED:
                return "EVENT_STARTED";
            case EVENT_STOPPED:
                return "EVENT_STOPPED";
            case EVENT_START_OF_VOICING:
                return "EVENT_START_OF_VOICING";
            case EVENT_END_OF_VOICING:
                return "EVENT_END_OF_VOICING";
            case EVENT_SPOKE_TOO_SOON:
                return "EVENT_SPOKE_TOO_SOON";
            case EVENT_RECOGNITION_RESULT:
                return "EVENT_RECOGNITION_RESULT";
            case EVENT_START_OF_UTTERANCE_TIMEOUT:
                return "EVENT_START_OF_UTTERANCE_TIMEOUT";
            case EVENT_RECOGNITION_TIMEOUT:
                return "EVENT_RECOGNITION_TIMEOUT";
            case EVENT_NEED_MORE_AUDIO:
                return "EVENT_NEED_MORE_AUDIO";
            case EVENT_MAX_SPEECH:
                return "EVENT_MAX_SPEECH";
        }
        return "EVENT_" + event;
    
protected voidfinalize()
Clean up resources.

        if (mVocabulary != 0 || mRecognizer != 0) {
            destroy();
            throw new IllegalStateException("someone forgot to destroy Recognizer");
        }
    
public java.lang.StringgetAcousticState()
Get the acoustic state vector.

return
String containing the acoustic state vector.
hide

        return SR_AcousticStateGet(mRecognizer);
    
public static java.lang.StringgetConfigDir(java.util.Locale locale)
Get the pathname of the SREC configuration directory corresponding to the language indicated by the Locale. This directory contains dictionaries, speech models, configuration files, and other data needed by the Recognizer.

param
locale Locale corresponding to the desired language, or null for default, currently Locale.US.
return
Pathname of the configuration directory.

    
                                                            
         
        if (locale == null) locale = Locale.US;
        String dir = "/system/usr/srec/config/" +
                locale.toString().replace('_", '.").toLowerCase();
        if ((new File(dir)).isDirectory()) return dir;
        return null;
    
public java.lang.StringgetResult(int index, java.lang.String key)
Get a result value. Must be called after EVENT_RECOGNITION_RESULT is returned by advance, but before stop.

param
index index of the result.
param
key key of the result. This is typically one of KEY_CONFIDENCE, KEY_LITERAL, or KEY_MEANING, but the user can also define their own keys in a grxml file, or in the tag slot of Grammar.addWordToSlot.
return
the result.

        return SR_RecognizerResultGetValue(mRecognizer, index, key);
    
public intgetResultCount()
Get the number of recognition results. Must be called after EVENT_RECOGNITION_RESULT is returned by advance, but before stop.

return
number of results in nbest list.


                                    
       
        return SR_RecognizerResultGetSize(mRecognizer);
    
public java.lang.String[]getResultKeys(int index)
Get a set of keys for the result. Must be called after EVENT_RECOGNITION_RESULT is returned by advance, but before stop.

param
index index of result.
return
array of keys.

        return SR_RecognizerResultGetKeyList(mRecognizer, index);
    
public intputAudio(byte[] buf, int offset, int length, boolean isLast)
Put audio samples into the Recognizer.

param
buf holds the audio samples.
param
offset offset of the first sample.
param
length number of bytes containing samples.
param
isLast indicates no more audio data, normally false.
return
number of bytes accepted.

        return SR_RecognizerPutAudio(mRecognizer, buf, offset, length, isLast);
    
public voidputAudio(java.io.InputStream audio)
Read audio samples from an InputStream and put them in the Recognizer.

param
audio InputStream containing PCM audio samples.

        // make sure the audio buffer is allocated
        if (mPutAudioBuffer == null) mPutAudioBuffer = new byte[512];
        // read some data
        int nbytes = audio.read(mPutAudioBuffer);
        // eof, so signal Recognizer
        if (nbytes == -1) {
            SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, 0, true);
        }
        // put it into the Recognizer
        else if (nbytes != SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, nbytes, false)) {
            throw new IOException("SR_RecognizerPutAudio failed nbytes=" + nbytes);
        }
    
public voidresetAcousticState()
Reset the acoustic state vectorto it's default value.

hide

        SR_AcousticStateReset(mRecognizer);
    
public voidsetAcousticState(java.lang.String state)
Set the acoustic state vector.

param
state String containing the acoustic state vector.
hide

        SR_AcousticStateSet(mRecognizer, state);
    
public voidstart()
Start recognition

        // TODO: shouldn't be here?
        SR_RecognizerActivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash", 1);
        SR_RecognizerStart(mRecognizer);
    
public voidstop()
Stop the Recognizer.

        SR_RecognizerStop(mRecognizer);
        SR_RecognizerDeactivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash");