FileDocCategorySizeDatePackage
SweetSpotSimilarity.javaAPI DocApache Lucene 2.2.06405Sat Jun 16 22:21:02 BST 2007org.apache.lucene.misc

SweetSpotSimilarity

public class SweetSpotSimilarity extends DefaultSimilarity
A similarity with a lengthNorm that provides for a "platuea" of equally good lengths, and tf helper functions.

For lengthNorm, A global min/max can be specified to define the platuea of lengths that should all have a norm of 1.0. Below the min, and above the max the lengthNorm drops off in a sqrt function.

A per field min/max can be specified if different fields have different sweet spots.

For tf, baselineTf and hyperbolicTf functions are provided, which subclasses can choose between.

Fields Summary
private int
ln_min
private int
ln_max
private float
ln_steep
private Map
ln_mins
private Map
ln_maxs
private Map
ln_steeps
private float
tf_base
private float
tf_min
private float
tf_hyper_min
private float
tf_hyper_max
private double
tf_hyper_base
private float
tf_hyper_xoffset
Constructors Summary
public SweetSpotSimilarity()

    
    
    super();
  
Methods Summary
public floatbaselineTf(float freq)
Implemented as: (x <= min) ? base : sqrt(x+(base**2)-min) ...but with a special case check for 0.

This degrates to sqrt(x) when min and base are both 0

see
#setBaselineTfFactors


    if (0.0f == freq) return 0.0f;
  
    return (freq <= tf_min)
      ? tf_base
      : (float)Math.sqrt(freq + (tf_base * tf_base) - tf_min);
  
public floathyperbolicTf(float freq)
Uses a hyperbolic tangent function that allows for a hard max... tf(x)=min+(max-min)/2*(((base**(x-xoffset)-base**-(x-xoffset))/(base**(x-xoffset)+base**-(x-xoffset)))+1)

This code is provided as a convincience for subclasses that want to use a hyperbolic tf function.

see
#setHyperbolicTfFactors

    if (0.0f == freq) return 0.0f;

    final float min = tf_hyper_min;
    final float max = tf_hyper_max;
    final double base = tf_hyper_base;
    final float xoffset = tf_hyper_xoffset;
    final double x = (double)(freq - xoffset);
  
    final float result = min +
      (float)(
              (max-min) / 2.0f
              *
              (
               ( ( Math.pow(base,x) - Math.pow(base,-x) )
                 / ( Math.pow(base,x) + Math.pow(base,-x) )
                 )
               + 1.0d
               )
              );

    return Float.isNaN(result) ? max : result;
    
  
public floatlengthNorm(java.lang.String fieldName, int numTerms)
Implemented as: 1/sqrt( steepness * (abs(x-min) + abs(x-max) - (max-min)) + 1 ) .

This degrades to 1/sqrt(x) when min and max are both 1 and steepness is 0.5

:TODO: potential optimiation is to just flat out return 1.0f if numTerms is between min and max.

see
#setLengthNormFactors

    int l = ln_min;
    int h = ln_max;
    float s = ln_steep;
  
    if (ln_mins.containsKey(fieldName)) {
      l = ((Number)ln_mins.get(fieldName)).intValue();
    }
    if (ln_maxs.containsKey(fieldName)) {
      h = ((Number)ln_maxs.get(fieldName)).intValue();
    }
    if (ln_steeps.containsKey(fieldName)) {
      s = ((Number)ln_steeps.get(fieldName)).floatValue();
    }
  
    return (float)
      (1.0f /
       Math.sqrt
       (
        (
         s *
         (float)(Math.abs(numTerms - l) + Math.abs(numTerms - h) - (h-l))
         )
        + 1.0f
        )
       );
  
public voidsetBaselineTfFactors(float base, float min)
Sets the baseline and minimum function variables for baselineTf

see
#baselineTf

    tf_min = min;
    tf_base = base;
  
public voidsetHyperbolicTfFactors(float min, float max, double base, float xoffset)
Sets the function variables for the hyperbolicTf functions

param
min the minimum tf value to ever be returned (default: 0.0)
param
max the maximum tf value to ever be returned (default: 2.0)
param
base the base value to be used in the exponential for the hyperbolic function (default: e)
param
xoffset the midpoint of the hyperbolic function (default: 10.0)
see
#hyperbolicTf

    tf_hyper_min = min;
    tf_hyper_max = max;
    tf_hyper_base = base;
    tf_hyper_xoffset = xoffset;
  
public voidsetLengthNormFactors(int min, int max, float steepness)
Sets the default function variables used by lengthNorm when no field specifc variables have been set.

see
#lengthNorm

    this.ln_min = min;
    this.ln_max = max;
    this.ln_steep = steepness;
  
public voidsetLengthNormFactors(java.lang.String field, int min, int max, float steepness)
Sets the function variables used by lengthNorm for a specific named field

see
#lengthNorm

    ln_mins.put(field, new Integer(min));
    ln_maxs.put(field, new Integer(max));
    ln_steeps.put(field, new Float(steepness));
  
public floattf(int freq)
Delegates to baselineTf

see
#baselineTf

    return baselineTf(freq);