// // NaiveBayesLearner.m // Pennyworth // // Created by Chris Karr on 12/24/07. // Copyright 2007 Chris J. Karr. All rights reserved. // #import "NaiveBayesLearner.h" // #include #define LEARNER_PATH [NSString stringWithFormat:@"%@/Library/Application Support/Pennyworth/Naive Bayes (KEY).learner", NSHomeDirectory ()] @implementation NaiveBayesLearner - (void) load { BOOL isDir = NO; NSFileManager * fm = [NSFileManager defaultManager]; NSString * path = [LEARNER_PATH stringByReplacingOccurrencesOfString:@"KEY" withString:key]; [fm fileExistsAtPath:[path stringByDeletingLastPathComponent] isDirectory:&isDir]; if (!isDir) [fm createDirectoryAtPath:[path stringByDeletingLastPathComponent] attributes:nil]; if ([fm fileExistsAtPath:path isDirectory:&isDir]) { NSData * data = [NSData dataWithContentsOfFile:path]; NSKeyedUnarchiver * unarchiver = [[NSKeyedUnarchiver alloc] initForReadingWithData:data]; probabilities = [[unarchiver decodeObjectForKey:@"Probabilities"] retain]; featureList = [[unarchiver decodeObjectForKey:@"Feature List"] retain]; [unarchiver release]; } else { probabilities = [[NSMutableDictionary alloc] init]; featureList = [[NSMutableSet alloc] init]; } } - (void) setKey:(NSString *) newKey { [super setKey:newKey]; [self load]; } - (void) reset { [probabilities release]; [featureList release]; probabilities = [[NSMutableDictionary alloc] init]; featureList = [[NSMutableSet alloc] init]; } - (NSNumber *) exampleCount { NSNumber * count = [probabilities valueForKey:@"NBLCount"]; if (count == nil) count = [NSNumber numberWithInt:0]; return count; } - (NSString *) hash:(NSString *) plaintext { /* NSData * data = [plaintext dataUsingEncoding:NSUTF8StringEncoding]; unsigned char * digest = MD5 ([data bytes], [data length], NULL); return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5], digest[6], digest[7], digest[8], digest[9], digest[10], digest[11], digest[12], digest[13], digest[14], digest[15]]; */ return @"foo"; } - (NSString *) getLabelForExample:(NSArray *) features { if ([features count] == 0) return nil; NSMutableSet * seenFeatures = [NSMutableSet set]; double maxProb = 0.0; NSString * maxLabel = nil; double totalCount = [[probabilities valueForKey:@"NBLCount"] doubleValue]; for (NSString * label in [probabilities allKeys]) { if (![label isEqual:@"NBLCount"]) { NSMutableDictionary * labelDict = [probabilities valueForKey:label]; double labelCount = [[labelDict valueForKey:@"NBLCount"] doubleValue]; double labelProb = labelCount / totalCount; for (NSString * feature in features) { NSString * name = [feature valueForKey:FEATURE_STRING]; NSString * hash = [self hash:name]; NSNumber * count = [labelDict valueForKey:hash]; if (count != nil) labelProb *= ([count doubleValue] / labelCount); else labelProb = 0.0; if (labelProb > 0.0) [seenFeatures addObject:feature]; } if (labelProb > maxProb) { maxProb = labelProb; maxLabel = label; } } } if (maxProb > 0.0 || ([features count] == [[seenFeatures allObjects] count])) return maxLabel; else return [self getLabelForExample:[seenFeatures allObjects]]; } - (void) addExample:(NSArray *) features forClass:(NSString *) label { NSMutableDictionary * labelDict = [probabilities valueForKey:label]; if (labelDict == nil) { labelDict = [NSMutableDictionary dictionary]; [labelDict setValue:[NSNumber numberWithInt:0] forKey:@"NBLCount"]; [probabilities setValue:labelDict forKey:label]; } NSNumber * exampleCount = [probabilities valueForKey:@"NBLCount"]; if (exampleCount == nil) exampleCount = [NSNumber numberWithInt:0]; [probabilities setValue:[NSNumber numberWithInt:([exampleCount intValue] + 1)] forKey:@"NBLCount"]; NSNumber * count = [NSNumber numberWithInt:([[labelDict valueForKey:@"NBLCount"] intValue] + 1)]; [labelDict setValue:count forKey:@"NBLCount"]; NSMutableSet * addedFeatures = [NSMutableSet set]; for (NSDictionary * feature in features) { NSString * name = [feature valueForKey:FEATURE_STRING]; NSString * hash = [self hash:name]; if (![addedFeatures containsObject:hash]) { NSNumber * featureCount = [labelDict valueForKey:hash]; if (featureCount == nil) featureCount = [NSNumber numberWithInt:0]; [labelDict setValue:[NSNumber numberWithDouble:([featureCount doubleValue] + 1.0)] forKey:hash]; [addedFeatures addObject:hash]; } [featureList addObject:hash]; } NSMutableData * data = [NSMutableData data]; NSKeyedArchiver * archiver = [[NSKeyedArchiver alloc] initForWritingWithMutableData:data]; [archiver encodeObject:probabilities forKey:@"Probabilities"]; [archiver encodeObject:featureList forKey:@"Feature List"]; [archiver finishEncoding]; [archiver release]; NSString * path = [LEARNER_PATH stringByReplacingOccurrencesOfString:@"KEY" withString:key]; [data writeToFile:path atomically:YES]; } @end