// // ID3DecisionTreeLearner.m // Pennyworth // // Created by Chris Karr on 1/23/08. // Copyright 2008 Chris J. Karr. All rights reserved. // #import "ID3DecisionTreeLearner.h" @implementation ID3DecisionTreeLearner - (float) entropyForExamples:(NSArray *) exampleArray { NSMutableDictionary * counts = [NSMutableDictionary dictionary]; float total = 0; for (NSDictionary * example in exampleArray) { NSString * label = [example valueForKey:LABEL_KEY]; if (label == nil) label = UNKNOWN_VALUE; NSNumber * count = [counts valueForKey:label]; if (count == nil) count = [NSNumber numberWithInt:0]; [counts setValue:[NSNumber numberWithInt:(1 + [count intValue])] forKey:label]; total += 1; } float entropy = 0; for (NSNumber * count in [counts allValues]) entropy -= ([count floatValue] / total) * log2 ([count floatValue] / total); return entropy; } - (float) entropyForFeature:(NSString *) feature examples:(NSArray *) exampleArray { NSMutableDictionary * featureLists = [NSMutableDictionary dictionary]; for (NSDictionary * example in exampleArray) { NSObject * value = [example valueForKey:feature]; if (value == nil) value = UNKNOWN_VALUE; NSMutableArray * valueValues = [NSMutableArray array]; if ([value isKindOfClass:[NSArray class]]) [valueValues addObjectsFromArray:(NSArray *) value]; else [valueValues addObject:value]; for (NSObject * v in valueValues) { NSMutableArray * valueList = [featureLists valueForKey:[v description]]; if (valueList == nil) { valueList = [NSMutableArray array]; [featureLists setValue:valueList forKey:[v description]]; } [valueList addObject:example]; } } float gain = 0; for (NSString * listKey in [featureLists allKeys]) { gain -= ((float) [[featureLists valueForKey:listKey] count] / (float) [exampleArray count]) * [self entropyForExamples:[featureLists valueForKey:listKey]]; } return gain; } - (NSString *) bestFeatureForExamples:(NSArray *) exampleArray { NSMutableSet * features = [NSMutableSet set]; for (NSDictionary * example in exampleArray) [features addObjectsFromArray:[example allKeys]]; float entropy = [self entropyForExamples:exampleArray]; [features removeObject:LABEL_KEY]; float maxGain = 0; NSString * maxFeature = nil; for (NSString * feature in [features allObjects]) { float gain = [self entropyForFeature:feature examples:exampleArray]; if (entropy + gain > maxGain) { maxGain = entropy + gain; maxFeature = feature; } } return maxFeature; } @end