FileDocCategorySizeDatePackage
BrazilianStemmer.javaAPI DocApache Lucene 2.1.027334Wed Feb 14 10:46:28 GMT 2007org.apache.lucene.analysis.br

BrazilianStemmer

public class BrazilianStemmer extends Object
A stemmer for Brazilian words.

Fields Summary
private String
TERM
Changed term
private String
CT
private String
R1
private String
R2
private String
RV
Constructors Summary
public BrazilianStemmer()

	
Methods Summary
private java.lang.StringchangeTerm(java.lang.String value)
1) Turn to lowercase 2) Remove accents 3) ã -> a ; õ -> o 4) ç -> c

return
null or a string transformed

    int     j;
    String  r = "" ;

    // be-safe !!!
    if (value == null) {
      return null ;
    }

    value = value.toLowerCase() ;
    for (j=0 ; j < value.length() ; j++) {
      if ((value.charAt(j) == 'á") ||
          (value.charAt(j) == 'â") ||
          (value.charAt(j) == 'ã")) {
        r= r + "a" ; continue ;
      }
      if ((value.charAt(j) == 'é") ||
          (value.charAt(j) == 'ê")) {
        r= r + "e" ; continue ;
      }
      if (value.charAt(j) == 'í") {
        r= r + "i" ; continue ;
      }
      if ((value.charAt(j) == 'ó") ||
          (value.charAt(j) == 'ô") ||
          (value.charAt(j) == 'õ")) {
        r= r + "o" ; continue ;
      }
      if ((value.charAt(j) == 'ú") ||
          (value.charAt(j) == 'ü")) {
        r= r + "u" ; continue ;
      }
      if (value.charAt(j) == 'ç") {
        r= r + "c" ; continue ;
      }
      if (value.charAt(j) == 'ñ") {
        r= r + "n" ; continue ;
      }

      r= r+ value.charAt(j) ;
    }

    return r ;
  
private voidcreateCT(java.lang.String term)
Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.

    CT = changeTerm(term) ;

    if (CT.length() < 2) return ;

    // if the first character is ... , remove it
    if ((CT.charAt(0) == '"")  ||
        (CT.charAt(0) == '\'") ||
        (CT.charAt(0) == '-")  ||
        (CT.charAt(0) == ',")  ||
        (CT.charAt(0) == ';")  ||
        (CT.charAt(0) == '.")  ||
        (CT.charAt(0) == '?")  ||
        (CT.charAt(0) == '!")
        ) {
        CT = CT.substring(1);
    }

    if (CT.length() < 2) return ;

    // if the last character is ... , remove it
    if ((CT.charAt(CT.length()-1) == '-") ||
        (CT.charAt(CT.length()-1) == ',") ||
        (CT.charAt(CT.length()-1) == ';") ||
        (CT.charAt(CT.length()-1) == '.") ||
        (CT.charAt(CT.length()-1) == '?") ||
        (CT.charAt(CT.length()-1) == '!") ||
        (CT.charAt(CT.length()-1) == '\'") ||
        (CT.charAt(CT.length()-1) == '"")
        ) {
        CT = CT.substring(0,CT.length()-1);
    }
  
private java.lang.StringgetR1(java.lang.String value)
Gets R1 R1 - is the region after the first non-vowel follwing a vowel, or is the null region at the end of the word if there is no such non-vowel.

return
null or a string representing R1

    int     i;
    int     j;

    // be-safe !!!
    if (value == null) {
      return null ;
    }

    // find 1st vowel
    i = value.length()-1 ;
    for (j=0 ; j < i ; j++) {
      if (isVowel(value.charAt(j))) {
        break ;
      }
    }

    if (!(j < i)) {
      return null ;
    }

    // find 1st non-vowel
    for ( ; j < i ; j++) {
      if (!(isVowel(value.charAt(j)))) {
        break ;
      }
    }

    if (!(j < i)) {
      return null ;
    }

    return value.substring(j+1) ;
  
private java.lang.StringgetRV(java.lang.String value)
Gets RV RV - IF the second letter is a consoant, RV is the region after the next following vowel, OR if the first two letters are vowels, RV is the region after the next consoant, AND otherwise (consoant-vowel case) RV is the region after the third letter. BUT RV is the end of the word if this positions cannot be found.

return
null or a string representing RV

    int     i;
    int     j;

    // be-safe !!!
    if (value == null) {
      return null ;
    }

    i = value.length()-1 ;

    // RV - IF the second letter is a consoant, RV is the region after
    //      the next following vowel,
    if ((i > 0) && !isVowel(value.charAt(1))) {
      // find 1st vowel
      for (j=2 ; j < i ; j++) {
        if (isVowel(value.charAt(j))) {
          break ;
        }
      }

      if (j < i) {
        return value.substring(j+1) ;
      }
    }


    // RV - OR if the first two letters are vowels, RV is the region
    //      after the next consoant,
    if ((i > 1) &&
        isVowel(value.charAt(0)) &&
        isVowel(value.charAt(1))) {
      // find 1st consoant
      for (j=2 ; j < i ; j++) {
        if (!isVowel(value.charAt(j))) {
          break ;
        }
      }

      if (j < i) {
        return value.substring(j+1) ;
      }
    }

    // RV - AND otherwise (consoant-vowel case) RV is the region after
    //      the third letter.
    if (i > 2) {
      return value.substring(3) ;
    }

    return null ;
  
private booleanisIndexable(java.lang.String term)
Checks a term if it can be processed indexed.

return
true if it can be indexed

		return (term.length() < 30) && (term.length() > 2) ;
	
private booleanisStemmable(java.lang.String term)
Checks a term if it can be processed correctly.

return
true if, and only if, the given term consists in letters.

		for ( int c = 0; c < term.length(); c++ ) {
			// Discard terms that contain non-letter characters.
			if ( !Character.isLetter(term.charAt(c))) {
				return false;
			}
		}
		return true;
	
private booleanisVowel(char value)
See if string is 'a','e','i','o','u'

return
true if is vowel

    return (value == 'a") ||
           (value == 'e") ||
           (value == 'i") ||
           (value == 'o") ||
           (value == 'u") ;
  
public java.lang.Stringlog()
For log and debug purpose

return
TERM, CT, RV, R1 and R2

    return " (TERM = " + TERM + ")" +
           " (CT = " + CT +")" +
           " (RV = " + RV +")" +
           " (R1 = " + R1 +")" +
           " (R2 = " + R2 +")" ;
	
private java.lang.StringremoveSuffix(java.lang.String value, java.lang.String toRemove)
Remove a string suffix

return
the String without the suffix

    // be-safe !!!
    if ((value == null) ||
        (toRemove == null) ||
        !suffix(value,toRemove) ) {
      return value ;
    }

    return value.substring(0,value.length()-toRemove.length()) ;
  
private java.lang.StringreplaceSuffix(java.lang.String value, java.lang.String toReplace, java.lang.String changeTo)
Replace a string suffix by another

return
the replaced String

    String vvalue ;

    // be-safe !!!
    if ((value == null) ||
        (toReplace == null) ||
        (changeTo == null) ) {
      return value ;
    }

    vvalue = removeSuffix(value,toReplace) ;

    if (value.equals(vvalue)) {
      return value ;
    } else {
      return vvalue + changeTo ;
    }
  
protected java.lang.Stringstem(java.lang.String term)
Stemms the given term to an unique discriminator.

param
term The term that should be stemmed.
return
Discriminator for term

    boolean altered = false ; // altered the term

    // creates CT
    createCT(term) ;

		if ( !isIndexable( CT ) ) {
			return null;
		}
		if ( !isStemmable( CT ) ) {
			return CT ;
		}

    R1 = getR1(CT) ;
    R2 = getR1(R1) ;
    RV = getRV(CT) ;
    TERM = term + ";" +CT ;

    altered = step1() ;
    if (!altered) {
      altered = step2() ;
    }

    if (altered) {
      step3();
    } else {
      step4();
    }

    step5() ;

    return CT ;
	
private booleanstep1()
Standart suffix removal. Search for the longest among the following suffixes, and perform the following actions:

return
false if no ending was removed

    if (CT == null) return false ;

    // suffix lenght = 7
    if (suffix(CT,"uciones") && suffix(R2,"uciones")) {
        CT = replaceSuffix(CT,"uciones","u") ; return true;
    }

    // suffix lenght = 6
    if (CT.length() >= 6) {
      if (suffix(CT,"imentos") && suffix(R2,"imentos")) {
          CT = removeSuffix(CT,"imentos") ; return true;
      }
      if (suffix(CT,"amentos") && suffix(R2,"amentos")) {
          CT = removeSuffix(CT,"amentos") ; return true;
      }
      if (suffix(CT,"adores") && suffix(R2,"adores")) {
          CT = removeSuffix(CT,"adores") ; return true;
      }
      if (suffix(CT,"adoras") && suffix(R2,"adoras")) {
          CT = removeSuffix(CT,"adoras") ; return true;
      }
      if (suffix(CT,"logias") && suffix(R2,"logias")) {
          replaceSuffix(CT,"logias","log") ; return true;
      }
      if (suffix(CT,"encias") && suffix(R2,"encias")) {
          CT = replaceSuffix(CT,"encias","ente") ; return true;
      }
      if (suffix(CT,"amente") && suffix(R1,"amente")) {
          CT = removeSuffix(CT,"amente") ; return true;
      }
      if (suffix(CT,"idades") && suffix(R2,"idades")) {
          CT = removeSuffix(CT,"idades") ; return true;
      }
    }

    // suffix lenght = 5
    if (CT.length() >= 5) {
      if (suffix(CT,"acoes") && suffix(R2,"acoes")) {
          CT = removeSuffix(CT,"acoes") ; return true;
      }
      if (suffix(CT,"imento") && suffix(R2,"imento")) {
          CT = removeSuffix(CT,"imento") ; return true;
      }
      if (suffix(CT,"amento") && suffix(R2,"amento")) {
          CT = removeSuffix(CT,"amento") ; return true;
      }
      if (suffix(CT,"adora") && suffix(R2,"adora")) {
          CT = removeSuffix(CT,"adora") ; return true;
      }
      if (suffix(CT,"ismos") && suffix(R2,"ismos")) {
          CT = removeSuffix(CT,"ismos") ; return true;
      }
      if (suffix(CT,"istas") && suffix(R2,"istas")) {
          CT = removeSuffix(CT,"istas") ; return true;
      }
      if (suffix(CT,"logia") && suffix(R2,"logia")) {
          CT = replaceSuffix(CT,"logia","log") ; return true;
      }
      if (suffix(CT,"ucion") && suffix(R2,"ucion")) {
          CT = replaceSuffix(CT,"ucion","u") ; return true;
      }
      if (suffix(CT,"encia") && suffix(R2,"encia")) {
          CT = replaceSuffix(CT,"encia","ente") ; return true;
      }
      if (suffix(CT,"mente") && suffix(R2,"mente")) {
          CT = removeSuffix(CT,"mente") ; return true;
      }
      if (suffix(CT,"idade") && suffix(R2,"idade")) {
          CT = removeSuffix(CT,"idade") ; return true;
      }
    }

    // suffix lenght = 4
    if (CT.length() >= 4) {
      if (suffix(CT,"acao") && suffix(R2,"acao")) {
          CT = removeSuffix(CT,"acao") ; return true;
      }
      if (suffix(CT,"ezas") && suffix(R2,"ezas")) {
          CT = removeSuffix(CT,"ezas") ; return true;
      }
      if (suffix(CT,"icos") && suffix(R2,"icos")) {
          CT = removeSuffix(CT,"icos") ; return true ;
      }
      if (suffix(CT,"icas") && suffix(R2,"icas")) {
          CT = removeSuffix(CT,"icas") ; return true ;
      }
      if (suffix(CT,"ismo") && suffix(R2,"ismo")) {
          CT = removeSuffix(CT,"ismo") ; return true ;
      }
      if (suffix(CT,"avel") && suffix(R2,"avel")) {
          CT = removeSuffix(CT,"avel") ; return true ;
      }
      if (suffix(CT,"ivel") && suffix(R2,"ivel")) {
          CT = removeSuffix(CT,"ivel") ; return true ;
      }
      if (suffix(CT,"ista") && suffix(R2,"ista")) {
          CT = removeSuffix(CT,"ista") ; return true ;
      }
      if (suffix(CT,"osos") && suffix(R2,"osos")) {
          CT = removeSuffix(CT,"osos") ; return true ;
      }
      if (suffix(CT,"osas") && suffix(R2,"osas")) {
          CT = removeSuffix(CT,"osas") ; return true ;
      }
      if (suffix(CT,"ador") && suffix(R2,"ador")) {
          CT = removeSuffix(CT,"ador") ; return true ;
      }
      if (suffix(CT,"ivas") && suffix(R2,"ivas")) {
          CT = removeSuffix(CT,"ivas") ; return true ;
      }
      if (suffix(CT,"ivos") && suffix(R2,"ivos")) {
          CT = removeSuffix(CT,"ivos") ; return true ;
      }
      if (suffix(CT,"iras") &&
          suffix(RV,"iras") &&
          suffixPreceded(CT,"iras","e")) {
          CT = replaceSuffix(CT,"iras","ir") ; return true ;
      }
    }

    // suffix lenght = 3
    if (CT.length() >= 3) {
      if (suffix(CT,"eza") && suffix(R2,"eza")) {
          CT = removeSuffix(CT,"eza") ; return true ;
      }
      if (suffix(CT,"ico") && suffix(R2,"ico")) {
          CT = removeSuffix(CT,"ico") ; return true ;
      }
      if (suffix(CT,"ica") && suffix(R2,"ica")) {
          CT = removeSuffix(CT,"ica") ; return true ;
      }
      if (suffix(CT,"oso") && suffix(R2,"oso")) {
          CT = removeSuffix(CT,"oso") ; return true ;
      }
      if (suffix(CT,"osa") && suffix(R2,"osa")) {
          CT = removeSuffix(CT,"osa") ; return true ;
      }
      if (suffix(CT,"iva") && suffix(R2,"iva")) {
          CT = removeSuffix(CT,"iva") ; return true ;
      }
      if (suffix(CT,"ivo") && suffix(R2,"ivo")) {
          CT = removeSuffix(CT,"ivo") ; return true ;
      }
      if (suffix(CT,"ira") &&
          suffix(RV,"ira") &&
          suffixPreceded(CT,"ira","e")) {
          CT = replaceSuffix(CT,"ira","ir") ; return true ;
      }
    }

    // no ending was removed by step1
    return false ;
  
private booleanstep2()
Verb suffixes. Search for the longest among the following suffixes in RV, and if found, delete.

return
false if no ending was removed

    if (RV == null) return false ;

    // suffix lenght = 7
    if (RV.length() >= 7) {
      if (suffix(RV,"issemos")) {
        CT = removeSuffix(CT,"issemos") ; return true;
      }
      if (suffix(RV,"essemos")) {
        CT = removeSuffix(CT,"essemos") ; return true;
      }
      if (suffix(RV,"assemos")) {
        CT = removeSuffix(CT,"assemos") ; return true;
      }
      if (suffix(RV,"ariamos")) {
        CT = removeSuffix(CT,"ariamos") ; return true;
      }
      if (suffix(RV,"eriamos")) {
        CT = removeSuffix(CT,"eriamos") ; return true;
      }
      if (suffix(RV,"iriamos")) {
        CT = removeSuffix(CT,"iriamos") ; return true;
      }
    }

    // suffix lenght = 6
    if (RV.length() >= 6) {
      if (suffix(RV,"iremos")) {
        CT = removeSuffix(CT,"iremos") ; return true;
      }
      if (suffix(RV,"eremos")) {
        CT = removeSuffix(CT,"eremos") ; return true;
      }
      if (suffix(RV,"aremos")) {
        CT = removeSuffix(CT,"aremos") ; return true;
      }
      if (suffix(RV,"avamos")) {
        CT = removeSuffix(CT,"avamos") ; return true;
      }
      if (suffix(RV,"iramos")) {
        CT = removeSuffix(CT,"iramos") ; return true;
      }
      if (suffix(RV,"eramos")) {
        CT = removeSuffix(CT,"eramos") ; return true;
      }
      if (suffix(RV,"aramos")) {
        CT = removeSuffix(CT,"aramos") ; return true;
      }
      if (suffix(RV,"asseis")) {
        CT = removeSuffix(CT,"asseis") ; return true;
      }
      if (suffix(RV,"esseis")) {
        CT = removeSuffix(CT,"esseis") ; return true;
      }
      if (suffix(RV,"isseis")) {
        CT = removeSuffix(CT,"isseis") ; return true;
      }
      if (suffix(RV,"arieis")) {
        CT = removeSuffix(CT,"arieis") ; return true;
      }
      if (suffix(RV,"erieis")) {
        CT = removeSuffix(CT,"erieis") ; return true;
      }
      if (suffix(RV,"irieis")) {
        CT = removeSuffix(CT,"irieis") ; return true;
      }
    }


    // suffix lenght = 5
    if (RV.length() >= 5) {
      if (suffix(RV,"irmos")) {
        CT = removeSuffix(CT,"irmos") ; return true;
      }
      if (suffix(RV,"iamos")) {
        CT = removeSuffix(CT,"iamos") ; return true;
      }
      if (suffix(RV,"armos")) {
        CT = removeSuffix(CT,"armos") ; return true;
      }
      if (suffix(RV,"ermos")) {
        CT = removeSuffix(CT,"ermos") ; return true;
      }
      if (suffix(RV,"areis")) {
        CT = removeSuffix(CT,"areis") ; return true;
      }
      if (suffix(RV,"ereis")) {
        CT = removeSuffix(CT,"ereis") ; return true;
      }
      if (suffix(RV,"ireis")) {
        CT = removeSuffix(CT,"ireis") ; return true;
      }
      if (suffix(RV,"asses")) {
        CT = removeSuffix(CT,"asses") ; return true;
      }
      if (suffix(RV,"esses")) {
        CT = removeSuffix(CT,"esses") ; return true;
      }
      if (suffix(RV,"isses")) {
        CT = removeSuffix(CT,"isses") ; return true;
      }
      if (suffix(RV,"astes")) {
        CT = removeSuffix(CT,"astes") ; return true;
      }
      if (suffix(RV,"assem")) {
        CT = removeSuffix(CT,"assem") ; return true;
      }
      if (suffix(RV,"essem")) {
        CT = removeSuffix(CT,"essem") ; return true;
      }
      if (suffix(RV,"issem")) {
        CT = removeSuffix(CT,"issem") ; return true;
      }
      if (suffix(RV,"ardes")) {
        CT = removeSuffix(CT,"ardes") ; return true;
      }
      if (suffix(RV,"erdes")) {
        CT = removeSuffix(CT,"erdes") ; return true;
      }
      if (suffix(RV,"irdes")) {
        CT = removeSuffix(CT,"irdes") ; return true;
      }
      if (suffix(RV,"ariam")) {
        CT = removeSuffix(CT,"ariam") ; return true;
      }
      if (suffix(RV,"eriam")) {
        CT = removeSuffix(CT,"eriam") ; return true;
      }
      if (suffix(RV,"iriam")) {
        CT = removeSuffix(CT,"iriam") ; return true;
      }
      if (suffix(RV,"arias")) {
        CT = removeSuffix(CT,"arias") ; return true;
      }
      if (suffix(RV,"erias")) {
        CT = removeSuffix(CT,"erias") ; return true;
      }
      if (suffix(RV,"irias")) {
        CT = removeSuffix(CT,"irias") ; return true;
      }
      if (suffix(RV,"estes")) {
        CT = removeSuffix(CT,"estes") ; return true;
      }
      if (suffix(RV,"istes")) {
        CT = removeSuffix(CT,"istes") ; return true;
      }
      if (suffix(RV,"areis")) {
        CT = removeSuffix(CT,"areis") ; return true;
      }
      if (suffix(RV,"aveis")) {
        CT = removeSuffix(CT,"aveis") ; return true;
      }
    }

    // suffix lenght = 4
    if (RV.length() >= 4) {
      if (suffix(RV,"aria")) {
        CT = removeSuffix(CT,"aria") ; return true;
      }
      if (suffix(RV,"eria")) {
        CT = removeSuffix(CT,"eria") ; return true;
      }
      if (suffix(RV,"iria")) {
        CT = removeSuffix(CT,"iria") ; return true;
      }
      if (suffix(RV,"asse")) {
        CT = removeSuffix(CT,"asse") ; return true;
      }
      if (suffix(RV,"esse")) {
        CT = removeSuffix(CT,"esse") ; return true;
      }
      if (suffix(RV,"isse")) {
        CT = removeSuffix(CT,"isse") ; return true;
      }
      if (suffix(RV,"aste")) {
        CT = removeSuffix(CT,"aste") ; return true;
      }
      if (suffix(RV,"este")) {
        CT = removeSuffix(CT,"este") ; return true;
      }
      if (suffix(RV,"iste")) {
        CT = removeSuffix(CT,"iste") ; return true;
      }
      if (suffix(RV,"arei")) {
        CT = removeSuffix(CT,"arei") ; return true;
      }
      if (suffix(RV,"erei")) {
        CT = removeSuffix(CT,"erei") ; return true;
      }
      if (suffix(RV,"irei")) {
        CT = removeSuffix(CT,"irei") ; return true;
      }
      if (suffix(RV,"aram")) {
        CT = removeSuffix(CT,"aram") ; return true;
      }
      if (suffix(RV,"eram")) {
        CT = removeSuffix(CT,"eram") ; return true;
      }
      if (suffix(RV,"iram")) {
        CT = removeSuffix(CT,"iram") ; return true;
      }
      if (suffix(RV,"avam")) {
        CT = removeSuffix(CT,"avam") ; return true;
      }
      if (suffix(RV,"arem")) {
        CT = removeSuffix(CT,"arem") ; return true;
      }
      if (suffix(RV,"erem")) {
        CT = removeSuffix(CT,"erem") ; return true;
      }
      if (suffix(RV,"irem")) {
        CT = removeSuffix(CT,"irem") ; return true;
      }
      if (suffix(RV,"ando")) {
        CT = removeSuffix(CT,"ando") ; return true;
      }
      if (suffix(RV,"endo")) {
        CT = removeSuffix(CT,"endo") ; return true;
      }
      if (suffix(RV,"indo")) {
        CT = removeSuffix(CT,"indo") ; return true;
      }
      if (suffix(RV,"arao")) {
        CT = removeSuffix(CT,"arao") ; return true;
      }
      if (suffix(RV,"erao")) {
        CT = removeSuffix(CT,"erao") ; return true;
      }
      if (suffix(RV,"irao")) {
        CT = removeSuffix(CT,"irao") ; return true;
      }
      if (suffix(RV,"adas")) {
        CT = removeSuffix(CT,"adas") ; return true;
      }
      if (suffix(RV,"idas")) {
        CT = removeSuffix(CT,"idas") ; return true;
      }
      if (suffix(RV,"aras")) {
        CT = removeSuffix(CT,"aras") ; return true;
      }
      if (suffix(RV,"eras")) {
        CT = removeSuffix(CT,"eras") ; return true;
      }
      if (suffix(RV,"iras")) {
        CT = removeSuffix(CT,"iras") ; return true;
      }
      if (suffix(RV,"avas")) {
        CT = removeSuffix(CT,"avas") ; return true;
      }
      if (suffix(RV,"ares")) {
        CT = removeSuffix(CT,"ares") ; return true;
      }
      if (suffix(RV,"eres")) {
        CT = removeSuffix(CT,"eres") ; return true;
      }
      if (suffix(RV,"ires")) {
        CT = removeSuffix(CT,"ires") ; return true;
      }
      if (suffix(RV,"ados")) {
        CT = removeSuffix(CT,"ados") ; return true;
      }
      if (suffix(RV,"idos")) {
        CT = removeSuffix(CT,"idos") ; return true;
      }
      if (suffix(RV,"amos")) {
        CT = removeSuffix(CT,"amos") ; return true;
      }
      if (suffix(RV,"emos")) {
        CT = removeSuffix(CT,"emos") ; return true;
      }
      if (suffix(RV,"imos")) {
        CT = removeSuffix(CT,"imos") ; return true;
      }
      if (suffix(RV,"iras")) {
        CT = removeSuffix(CT,"iras") ; return true;
      }
      if (suffix(RV,"ieis")) {
        CT = removeSuffix(CT,"ieis") ; return true;
      }
    }

    // suffix lenght = 3
    if (RV.length() >= 3) {
      if (suffix(RV,"ada")) {
        CT = removeSuffix(CT,"ada") ; return true;
      }
      if (suffix(RV,"ida")) {
        CT = removeSuffix(CT,"ida") ; return true;
      }
      if (suffix(RV,"ara")) {
        CT = removeSuffix(CT,"ara") ; return true;
      }
      if (suffix(RV,"era")) {
        CT = removeSuffix(CT,"era") ; return true;
      }
      if (suffix(RV,"ira")) {
        CT = removeSuffix(CT,"ava") ; return true;
      }
      if (suffix(RV,"iam")) {
        CT = removeSuffix(CT,"iam") ; return true;
      }
      if (suffix(RV,"ado")) {
        CT = removeSuffix(CT,"ado") ; return true;
      }
      if (suffix(RV,"ido")) {
        CT = removeSuffix(CT,"ido") ; return true;
      }
      if (suffix(RV,"ias")) {
        CT = removeSuffix(CT,"ias") ; return true;
      }
      if (suffix(RV,"ais")) {
        CT = removeSuffix(CT,"ais") ; return true;
      }
      if (suffix(RV,"eis")) {
        CT = removeSuffix(CT,"eis") ; return true;
      }
      if (suffix(RV,"ira")) {
        CT = removeSuffix(CT,"ira") ; return true;
      }
      if (suffix(RV,"ear")) {
        CT = removeSuffix(CT,"ear") ; return true;
      }
    }

    // suffix lenght = 2
    if (RV.length() >= 2) {
      if (suffix(RV,"ia")) {
        CT = removeSuffix(CT,"ia") ; return true;
      }
      if (suffix(RV,"ei")) {
        CT = removeSuffix(CT,"ei") ; return true;
      }
      if (suffix(RV,"am")) {
        CT = removeSuffix(CT,"am") ; return true;
      }
      if (suffix(RV,"em")) {
        CT = removeSuffix(CT,"em") ; return true;
      }
      if (suffix(RV,"ar")) {
        CT = removeSuffix(CT,"ar") ; return true;
      }
      if (suffix(RV,"er")) {
        CT = removeSuffix(CT,"er") ; return true;
      }
      if (suffix(RV,"ir")) {
        CT = removeSuffix(CT,"ir") ; return true;
      }
      if (suffix(RV,"as")) {
        CT = removeSuffix(CT,"as") ; return true;
      }
      if (suffix(RV,"es")) {
        CT = removeSuffix(CT,"es") ; return true;
      }
      if (suffix(RV,"is")) {
        CT = removeSuffix(CT,"is") ; return true;
      }
      if (suffix(RV,"eu")) {
        CT = removeSuffix(CT,"eu") ; return true;
      }
      if (suffix(RV,"iu")) {
        CT = removeSuffix(CT,"iu") ; return true;
      }
      if (suffix(RV,"iu")) {
        CT = removeSuffix(CT,"iu") ; return true;
      }
      if (suffix(RV,"ou")) {
        CT = removeSuffix(CT,"ou") ; return true;
      }
    }

    // no ending was removed by step2
    return false ;
  
private voidstep3()
Delete suffix 'i' if in RV and preceded by 'c'

    if (RV == null) return ;

    if (suffix(RV,"i") && suffixPreceded(RV,"i","c")) {
      CT = removeSuffix(CT,"i") ;
    }

  
private voidstep4()
Residual suffix If the word ends with one of the suffixes (os a i o á í ó) in RV, delete it

    if (RV == null) return  ;

    if (suffix(RV,"os")) {
      CT = removeSuffix(CT,"os") ; return ;
    }
    if (suffix(RV,"a")) {
      CT = removeSuffix(CT,"a") ; return ;
    }
    if (suffix(RV,"i")) {
      CT = removeSuffix(CT,"i") ; return ;
    }
    if (suffix(RV,"o")) {
      CT = removeSuffix(CT,"o") ; return ;
    }

  
private voidstep5()
If the word ends with one of ( e é ê) in RV,delete it, and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV, delete the 'u' (or 'i') Or if the word ends ç remove the cedilha

    if (RV == null) return  ;

    if (suffix(RV,"e")) {
      if (suffixPreceded(RV,"e","gu")) {
        CT = removeSuffix(CT,"e") ;
        CT = removeSuffix(CT,"u") ;
        return ;
      }

      if (suffixPreceded(RV,"e","ci")) {
        CT = removeSuffix(CT,"e") ;
        CT = removeSuffix(CT,"i") ;
        return ;
      }

      CT = removeSuffix(CT,"e") ; return ;
    }
  
private booleansuffix(java.lang.String value, java.lang.String suffix)
Check if a string ends with a suffix

return
true if the string ends with the specified suffix


    // be-safe !!!
    if ((value == null) || (suffix == null)) {
      return false ;
    }

    if (suffix.length() > value.length()) {
      return false ;
    }

    return value.substring(value.length()-suffix.length()).equals(suffix);
  
private booleansuffixPreceded(java.lang.String value, java.lang.String suffix, java.lang.String preceded)
See if a suffix is preceded by a String

return
true if the suffix is preceded

    // be-safe !!!
    if ((value == null) ||
        (suffix == null) ||
        (preceded == null) ||
        !suffix(value,suffix) ) {
      return false ;
    }

    return suffix(removeSuffix(value,suffix),preceded) ;