// // HTML5LibTest.m // HTMLKit // // Created by Iska on 25/10/14. // Copyright (c) 2014 BrainCookie. All rights reserved. // #import "HTML5LibTest.h" #import "HTMLTokenizerStates.h" #import "HTMLTokens.h" @implementation HTML5LibTest - (instancetype)initWithFixture:(NSDictionary *)fixture { self = [super init]; if (self) { [self loadFixture:fixture]; } return self; } - (void)loadFixture:(NSDictionary *)fixture { BOOL doubleEscaped = [fixture[@"doubleEscaped"] boolValue]; // Test Description self.description = fixture[@"description"]; // Test Input self.input = fixture[@"input"]; if (doubleEscaped) { self.input = [self processDoubleEscaped:self.input]; } // Test Output NSMutableArray *tokens = [NSMutableArray array]; NSArray *outputs = fixture[@"output"]; for (NSArray *output in outputs) { HTMLToken *token = [self processOutputToken:output doubleEscaped:doubleEscaped]; [tokens addObject:token]; } self.output = tokens; // Test Initial States NSMutableArray *initialStates = [NSMutableArray array]; NSArray *states = fixture[@"initialStates"]; for (NSString *name in states) { HTMLTokenizerState state = HTMLTokenizerStateData; if ([name isEqualToString:@"PLAINTEXT state"]) { state = HTMLTokenizerStatePLAINTEXT; } else if ([name isEqualToString:@"RCDATA state"]) { state = HTMLTokenizerStateRCDATA; } else if ([name isEqualToString:@"RAWTEXT state"]) { state = HTMLTokenizerStateRAWTEXT; } [initialStates addObject:@(state)]; } if (initialStates.count == 0) { [initialStates addObject:@(HTMLTokenizerStateData)]; } self.initialStates = initialStates; // Test Last Start Tag self.lastStartTag = fixture[@"lastStartTag"]; // Ignore Error Order self.ignoreErrorOrder = [fixture[@"ignoreErrorOrder"] boolValue]; } - (HTMLToken *)processOutputToken:(NSArray *)output doubleEscaped:(BOOL)doubleEscaped { NSString *type = [output firstObject]; NSString *data = nil; if (output.count > 1) { NSString *data = [output lastObject]; if (doubleEscaped) { data = [self processDoubleEscaped:data]; } } if ([type isEqualToString:@"Character"]) { return [[HTMLCharacterToken alloc] initWithString:data]; } else if ([type isEqualToString:@"Comment"]) { return [[HTMLCommentToken alloc] initWithData:data]; } else if ([type isEqualToString:@"DOCTYPE"]) { HTMLDOCTYPEToken *token = [[HTMLDOCTYPEToken alloc] initWithName:data]; token.publicIdentifier = output[2]; token.systemIdentifier = output[3]; token.forceQuirks = ([output[4] boolValue] == NO); return token; } else if ([type isEqualToString:@"EndTag"]) { return [[HTMLEndTagToken alloc] initWithTagName:data]; } else if ([type isEqualToString:@"ParseError"]) { return [HTMLParseErrorToken new]; } else if ([type isEqualToString:@"StartTag"]) { HTMLStartTagToken *token = [[HTMLStartTagToken alloc] initWithTagName:data]; NSDictionary *attributes = output[2]; for (NSString *name in attributes) { NSString *value = [attributes objectForKey:name]; [token.attributes setObject:value forKey:name]; } token.selfClosing = (output.count == 4); return token; } return nil; } - (NSString *)processDoubleEscaped:(NSString *)string { NSError *error = nil; NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:@"\\\\u([0-9a-f]{4})" options:NSRegularExpressionCaseInsensitive error:&error]; NSArray *matches = [regex matchesInString:string options:0 range:NSMakeRange(0, string.length)]; for(NSTextCheckingResult *match in [matches reverseObjectEnumerator]) { NSRange hexRange = [match rangeAtIndex:2]; NSString *hexString = [string substringWithRange:hexRange]; NSScanner *scanner = [NSScanner scannerWithString:hexString]; unsigned int codepint; [scanner scanHexInt:&codepint]; NSString *replacement = [NSString stringWithFormat:@"%C", (unichar)codepint]; NSRange matchRange = [match rangeAtIndex:1]; string = [string stringByReplacingCharactersInRange:matchRange withString:replacement]; } return nil; } @end