Files
HTMLKit/HTMLKitTests/HTML5LibTokenizerTest.m
T
iska 7fb04f3394 Refactor Tokenizer tests into separate group and generate the test cases dynamically
Instead of loading and running the HTML5Lib test files manually, the Xcode test cases are generated
on start for all the HTML5Lib tokenizer tests. Also, the performance test is moved into its own class.

This setup shouldn't require any extra maintenance when HTML5Lib adds new tokenizer test cases.
2015-03-24 23:50:19 +01:00

153 lines
4.4 KiB
Objective-C

//
// HTML5LibTest.m
// HTMLKit
//
// Created by Iska on 25/10/14.
// Copyright (c) 2014 BrainCookie. All rights reserved.
//
#import "HTML5LibTokenizerTest.h"
#import "HTMLTokenizerStates.h"
#import "HTMLTokens.h"
@implementation HTML5LibTokenizerTest
- (instancetype)initWithTestDictionary:(NSDictionary *)dictionary
{
self = [super init];
if (self) {
[self loadTest:dictionary];
}
return self;
}
- (void)loadTest:(NSDictionary *)test
{
BOOL doubleEscaped = [test[@"doubleEscaped"] boolValue];
// Test Title
self.title = test[@"description"];
// Test Input
self.input = test[@"input"];
if (doubleEscaped) {
self.input = [self processDoubleEscaped:self.input];
}
// Test Output
NSMutableArray *tokens = [NSMutableArray array];
NSArray *outputs = test[@"output"];
for (NSArray *output in outputs) {
HTMLToken *token = [self processOutputToken:output doubleEscaped:doubleEscaped];
[tokens addObject:token];
}
[tokens addObject:[HTMLEOFToken token]];
self.output = tokens;
// Test Initial States
NSMutableArray *initialStates = [NSMutableArray array];
NSArray *states = test[@"initialStates"];
for (NSString *name in states) {
HTMLTokenizerState state = HTMLTokenizerStateData;
if ([name isEqualToString:@"PLAINTEXT state"]) {
state = HTMLTokenizerStatePLAINTEXT;
} else if ([name isEqualToString:@"RCDATA state"]) {
state = HTMLTokenizerStateRCDATA;
} else if ([name isEqualToString:@"RAWTEXT state"]) {
state = HTMLTokenizerStateRAWTEXT;
}
[initialStates addObject:@(state)];
}
if (initialStates.count == 0) {
[initialStates addObject:@(HTMLTokenizerStateData)];
}
self.initialStates = initialStates;
// Test Last Start Tag
self.lastStartTag = test[@"lastStartTag"];
// Ignore Error Order
self.ignoreErrorOrder = [test[@"ignoreErrorOrder"] boolValue];
}
- (HTMLToken *)processOutputToken:(id)output doubleEscaped:(BOOL)doubleEscaped
{
if ([output isKindOfClass:[NSString class]] && [output isEqualToString:@"ParseError"]) {
return [HTMLParseErrorToken new];
}
NSString *type = [output firstObject];
NSString *data = nil;
if ([output count] > 1) {
data = output[1];
if (doubleEscaped) {
data = [self processDoubleEscaped:data];
}
}
if ([type isEqualToString:@"Character"]) {
return [[HTMLCharacterToken alloc] initWithString:data];
} else if ([type isEqualToString:@"Comment"]) {
return [[HTMLCommentToken alloc] initWithData:data];
} else if ([type isEqualToString:@"DOCTYPE"]) {
data = [[NSNull null] isEqual:data] ? nil : data;
HTMLDOCTYPEToken *token = [[HTMLDOCTYPEToken alloc] initWithName:data];
token.publicIdentifier = [[NSNull null] isEqual:output[2]] ? nil : output[2];
token.systemIdentifier = [[NSNull null] isEqual:output[3]] ? nil : output[3];
token.forceQuirks = ([output[4] boolValue] == NO);
return token;
} else if ([type isEqualToString:@"EndTag"]) {
return [[HTMLEndTagToken alloc] initWithTagName:data];
} else if ([type isEqualToString:@"ParseError"]) {
return [HTMLParseErrorToken new];
} else if ([type isEqualToString:@"StartTag"]) {
HTMLStartTagToken *token = [[HTMLStartTagToken alloc] initWithTagName:data];
NSDictionary *attributes = output[2];
if (attributes && attributes.allKeys.count > 0) {
token.attributes = [HTMLOrderedDictionary new];
}
for (NSString *name in attributes) {
NSString *value = [attributes objectForKey:name];
[token.attributes setObject:value forKey:name];
}
token.selfClosing = ([output count] == 4);
return token;
}
return nil;
}
- (NSString *)processDoubleEscaped:(NSString *)string
{
NSError *error = nil;
NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:@"\\\\u([0-9a-f]{4})"
options:NSRegularExpressionCaseInsensitive
error:&error];
NSArray *matches = [regex matchesInString:string options:0 range:NSMakeRange(0, string.length)];
for(NSTextCheckingResult *match in [matches reverseObjectEnumerator]) {
NSRange hexRange = [match rangeAtIndex:1];
NSString *hexString = [string substringWithRange:hexRange];
NSScanner *scanner = [NSScanner scannerWithString:hexString];
unsigned int codepint;
[scanner scanHexInt:&codepint];
NSString *replacement = [NSString stringWithFormat:@"%C", (unichar)codepint];
NSRange matchRange = [match rangeAtIndex:0];
string = [string stringByReplacingCharactersInRange:matchRange withString:replacement];
}
return string;
}
- (NSString *)description
{
return self.title;
}
@end