HistogramPredictorpublic class HistogramPredictor extends Object A histogram based predictor which records co-occurrences of applations with a speficic
feature, for example, location, * time of day, etc. The histogram is kept in a two level
hash table. The first level key is the feature value and the second level key is the app
id. |
Fields Summary |
---|
static final String | TAG | private HashMap | mPredictor | private HashMap | mClassCounts | private HashSet | mBlacklist | private static final int | MINIMAL_FEATURE_VALUE_COUNTS | private static final int | MINIMAL_APP_APPEARANCE_COUNTS | private static final int | PRIOR_K_VALUE | private static final String[] | APP_BLACKLIST |
Constructors Summary |
---|
public HistogramPredictor(String[] blackList)
for (String appName : blackList) {
mBlacklist.add(appName);
}
|
Methods Summary |
---|
public void | addSample(java.lang.String sampleId, java.util.Map features)
for (Map.Entry<String, String> entry : features.entrySet()) {
String featureName = entry.getKey();
String featureValue = entry.getValue();
useFeature(featureName);
HistogramCounter counter = mPredictor.get(featureName);
counter.addSample(sampleId, featureValue);
}
int sampleCount = (mClassCounts.containsKey(sampleId)) ?
mClassCounts.get(sampleId) + 1 : 1;
mClassCounts.put(sampleId, sampleCount);
| public java.util.List | findTopClasses(java.util.Map features, int topK)
// Most sophisticated function in this class
HashMap<String, Double> appScores = new HashMap<String, Double>();
int validFeatureCount = 0;
// compute all app scores
for (Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) {
String featureName = entry.getKey();
HistogramCounter counter = entry.getValue();
if (features.containsKey(featureName)) {
String featureValue = features.get(featureName);
HashMap<String, Double> scoreMap = counter.getClassScores(featureValue);
if (scoreMap.isEmpty()) {
continue;
}
validFeatureCount++;
for (Map.Entry<String, Double> item : scoreMap.entrySet()) {
String appName = item.getKey();
double appScore = item.getValue();
if (appScores.containsKey(appName)) {
appScore += appScores.get(appName);
}
appScores.put(appName, appScore);
}
}
}
HashMap<String, Double> appCandidates = new HashMap<String, Double>();
for (Map.Entry<String, Double> entry : appScores.entrySet()) {
String appName = entry.getKey();
if (mBlacklist.contains(appName)) {
Log.i(TAG, appName + " is in blacklist");
continue;
}
if (!mClassCounts.containsKey(appName)) {
throw new RuntimeException("class count error!");
}
int appCount = mClassCounts.get(appName);
if (appCount < MINIMAL_APP_APPEARANCE_COUNTS) {
Log.i(TAG, appName + " doesn't have enough counts");
continue;
}
double appScore = entry.getValue();
double appPrior = Math.log((double) appCount);
appCandidates.put(appName,
appScore - appPrior * (validFeatureCount - PRIOR_K_VALUE));
}
// sort app scores
List<Map.Entry<String, Double> > appList =
new ArrayList<Map.Entry<String, Double> >(appCandidates.size());
appList.addAll(appCandidates.entrySet());
Collections.sort(appList, new Comparator<Map.Entry<String, Double> >() {
public int compare(Map.Entry<String, Double> o1,
Map.Entry<String, Double> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
if (topK == 0) {
topK = appList.size();
}
return appList.subList(0, Math.min(topK, appList.size()));
| public byte[] | getModel()
// TODO: convert model to a more memory efficient data structure.
HashMap<String, HashMap<String, HashMap<String, Integer > > > model =
new HashMap<String, HashMap<String, HashMap<String, Integer > > >();
for(Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) {
model.put(entry.getKey(), entry.getValue().getCounter());
}
try {
ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
ObjectOutputStream objStream = new ObjectOutputStream(byteStream);
objStream.writeObject(model);
byte[] bytes = byteStream.toByteArray();
return bytes;
} catch (IOException e) {
throw new RuntimeException("Can't get model");
}
| private void | loadClassCounter()
String TIME_OF_WEEK = "Time of Week";
if (!mPredictor.containsKey(TIME_OF_WEEK)) {
throw new RuntimeException("Precition model error: missing Time of Week!");
}
HashMap<String, HashMap<String, Integer> > counter =
mPredictor.get(TIME_OF_WEEK).getCounter();
mClassCounts.clear();
for (HashMap<String, Integer> map : counter.values()) {
for (Map.Entry<String, Integer> entry : map.entrySet()) {
int classCount = entry.getValue();
String className = entry.getKey();
// mTotalClassCount += classCount;
if (mClassCounts.containsKey(className)) {
classCount += mClassCounts.get(className);
}
mClassCounts.put(className, classCount);
}
}
Log.i(TAG, "class counts: " + mClassCounts);
| public void | resetPredictor()
// TODO: not sure this step would reduce memory waste
for (HistogramCounter counter : mPredictor.values()) {
counter.resetCounter();
}
mPredictor.clear();
mClassCounts.clear();
| public boolean | setModel(byte[] modelData)
HashMap<String, HashMap<String, HashMap<String, Integer > > > model;
try {
ByteArrayInputStream input = new ByteArrayInputStream(modelData);
ObjectInputStream objStream = new ObjectInputStream(input);
model = (HashMap<String, HashMap<String, HashMap<String, Integer > > >)
objStream.readObject();
} catch (IOException e) {
throw new RuntimeException("Can't load model");
} catch (ClassNotFoundException e) {
throw new RuntimeException("Learning class not found");
}
resetPredictor();
for (Map.Entry<String, HashMap<String, HashMap<String, Integer> > > entry :
model.entrySet()) {
useFeature(entry.getKey());
mPredictor.get(entry.getKey()).setCounter(entry.getValue());
}
// TODO: this is a temporary fix for now
loadClassCounter();
return true;
| private void | useFeature(java.lang.String featureName)
if (!mPredictor.containsKey(featureName)) {
mPredictor.put(featureName, new HistogramCounter());
}
|
|