// // NaiveBayesLearner.m // Do Not Disturb // // Created by Chris Karr on 9/15/07. // Copyright 2007 __MyCompanyName__. All rights reserved. // #import "NaiveBayesLearner.h" #include #define LEARNER_PATH [NSString stringWithFormat:@"%@/Library/Application Support/Do Not Disturb/Naive Bayes.learner", NSHomeDirectory ()] @implementation NaiveBayesLearner - (Learner *) init { if (self = [super init]) { BOOL isDir = NO; NSFileManager * fm = [NSFileManager defaultManager]; [fm fileExistsAtPath:[LEARNER_PATH stringByDeletingLastPathComponent] isDirectory:&isDir]; if (!isDir) [fm createDirectoryAtPath:[LEARNER_PATH stringByDeletingLastPathComponent] attributes:nil]; if ([fm fileExistsAtPath:LEARNER_PATH isDirectory:&isDir]) { NSData * data = [NSData dataWithContentsOfFile:LEARNER_PATH]; NSKeyedUnarchiver * unarchiver = [[NSKeyedUnarchiver alloc] initForReadingWithData:data]; probabilities = [[unarchiver decodeObjectForKey:@"Probabilities"] retain]; featureList = [[unarchiver decodeObjectForKey:@"Feature List"] retain]; [unarchiver release]; } else { probabilities = [[NSMutableDictionary alloc] init]; featureList = [[NSMutableSet alloc] init]; } } return self; } - (void) reset { [probabilities release]; [featureList release]; probabilities = [[NSMutableDictionary alloc] init]; featureList = [[NSMutableSet alloc] init]; } - (NSNumber *) exampleCount { NSNumber * count = [probabilities valueForKey:@"NBLCount"]; if (count == nil) count = [NSNumber numberWithInt:0]; return count; } - (NSString *) hash:(NSString *) plaintext { NSData * data = [plaintext dataUsingEncoding:NSUTF8StringEncoding]; unsigned char * digest = MD5 ([data bytes], [data length], NULL); return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5], digest[6], digest[7], digest[8], digest[9], digest[10], digest[11], digest[12], digest[13], digest[14], digest[15]]; } - (NSString *) getLabelForExample:(NSArray *) features { if ([features count] == 0) return nil; NSMutableSet * seenFeatures = [NSMutableSet set]; double maxProb = 0.0; NSString * maxLabel = nil; NSArray * labels = [probabilities allKeys]; NSString * label; NSEnumerator * iter = [labels objectEnumerator]; double totalCount = [[probabilities valueForKey:@"NBLCount"] doubleValue]; while (label = [iter nextObject]) { if (![label isEqual:@"NBLCount"]) { NSMutableDictionary * labelDict = [probabilities valueForKey:label]; double labelCount = [[labelDict valueForKey:@"NBLCount"] doubleValue]; double labelProb = labelCount / totalCount; NSString * feature; NSEnumerator * labelIter = [features objectEnumerator]; while ((feature = [labelIter nextObject])) { NSString * name = [feature valueForKey:@"name"]; NSString * hash = [self hash:name]; NSNumber * count = [labelDict valueForKey:hash]; if (count != nil) labelProb *= ([count doubleValue] / labelCount); else labelProb = 0.0; if (labelProb > 0.0) [seenFeatures addObject:feature]; } if (labelProb > maxProb) { maxProb = labelProb; maxLabel = label; } } } if (maxProb > 0.0 || ([features count] == [[seenFeatures allObjects] count])) return maxLabel; else return [self getLabelForExample:[seenFeatures allObjects]]; } - (void) addExample:(NSArray *) features forClass:(NSString *) label { NSMutableDictionary * labelDict = [probabilities valueForKey:label]; if (labelDict == nil) { labelDict = [NSMutableDictionary dictionary]; [labelDict setValue:[NSNumber numberWithInt:0] forKey:@"NBLCount"]; [probabilities setValue:labelDict forKey:label]; } NSNumber * exampleCount = [probabilities valueForKey:@"NBLCount"]; if (exampleCount == nil) exampleCount = [NSNumber numberWithInt:0]; [probabilities setValue:[NSNumber numberWithInt:([exampleCount intValue] + 1)] forKey:@"NBLCount"]; NSNumber * count = [NSNumber numberWithInt:([[labelDict valueForKey:@"NBLCount"] intValue] + 1)]; [labelDict setValue:count forKey:@"NBLCount"]; NSDictionary * feature; NSEnumerator * iter = [features objectEnumerator]; NSMutableSet * addedFeatures = [NSMutableSet set]; while (feature = [iter nextObject]) { NSString * name = [feature valueForKey:@"name"]; NSString * hash = [self hash:name]; if (![addedFeatures containsObject:hash]) { NSNumber * featureCount = [labelDict valueForKey:hash]; if (featureCount == nil) featureCount = [NSNumber numberWithInt:0]; [labelDict setValue:[NSNumber numberWithDouble:([featureCount doubleValue] + 1.0)] forKey:hash]; [addedFeatures addObject:hash]; } [featureList addObject:hash]; } NSMutableData * data = [NSMutableData data]; NSKeyedArchiver * archiver = [[NSKeyedArchiver alloc] initForWritingWithMutableData:data]; // NSLog (@"probabilities = %@", probabilities); // NSLog (@"features = %@", featureList); [archiver encodeObject:probabilities forKey:@"Probabilities"]; [archiver encodeObject:featureList forKey:@"Feature List"]; [archiver finishEncoding]; [data writeToFile:LEARNER_PATH atomically:YES]; } @end