9dd3c70fc3
Fixes #30 Foreign attributes were handled incorrectly because attribute names were implemented as pure strings without namespace prefix. This change keeps the attributes as strings and eliminates any handling in regards to namespaces. Spec note about attributes: "If designed today they would just have a name and value." https://dom.spec.whatwg.org/#attr
302 lines
10 KiB
Objective-C
302 lines
10 KiB
Objective-C
//
|
|
// HTML5LibTreeConstructionTest.m
|
|
// HTMLKit
|
|
//
|
|
// Created by Iska on 25/03/15.
|
|
// Copyright (c) 2015 BrainCookie. All rights reserved.
|
|
//
|
|
|
|
#import "HTML5LibTreeConstructionTest.h"
|
|
#import <XCTest/XCTest.h>
|
|
|
|
#import "HTMLDOM.h"
|
|
#import "HTMLDocumentType.h"
|
|
#import "HTMLElement.h"
|
|
#import "HTMLText.h"
|
|
#import "HTMLComment.h"
|
|
#import "HTMLKitTestUtil.h"
|
|
|
|
static NSString * const HTML5LibTests = @"html5lib-tests";
|
|
static NSString * const TreeConstruction = @"tree-construction";
|
|
|
|
@implementation HTML5LibTreeConstructionTest
|
|
|
|
+ (NSDictionary *)loadHTML5LibTreeConstructionTests
|
|
{
|
|
NSString *path = [HTMLKitTestUtil pathForFixture:TreeConstruction ofType:nil inDirectory:HTML5LibTests];
|
|
NSArray *testFiles = [[NSFileManager defaultManager] contentsOfDirectoryAtPath:path error:nil];
|
|
|
|
NSMutableDictionary *testsMap = [NSMutableDictionary dictionary];
|
|
for (NSString *testFile in testFiles) {
|
|
if (![testFile.pathExtension isEqualToString:@"dat"]) {
|
|
continue;
|
|
}
|
|
|
|
NSString *testFilePath = [path stringByAppendingPathComponent:testFile];
|
|
NSArray *tests = [HTML5LibTreeConstructionTest loadTestsWithFileAtPath:testFilePath];
|
|
[testsMap setObject:tests forKey:testFile];
|
|
}
|
|
|
|
return testsMap;
|
|
}
|
|
|
|
+ (NSArray *)loadTestsWithFileAtPath:(NSString *)filePath
|
|
{
|
|
NSString *contents = [NSString stringWithContentsOfFile:filePath encoding:NSUTF8StringEncoding error:nil];
|
|
|
|
NSMutableArray *tests = [NSMutableArray array];
|
|
|
|
NSScanner *scanner = [NSScanner scannerWithString:contents];
|
|
NSString * (^ nextTest)(void) = ^ NSString * () {
|
|
NSString *str;
|
|
[scanner scanUpToString:@"\n\n#data" intoString:&str];
|
|
return str;
|
|
};
|
|
|
|
NSRegularExpressionOptions options = NSRegularExpressionDotMatchesLineSeparators|
|
|
NSRegularExpressionUseUnixLineSeparators|
|
|
NSRegularExpressionAnchorsMatchLines;
|
|
|
|
NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:@"(^#(.(?!^#))+)"
|
|
options:options
|
|
error:nil];
|
|
|
|
NSString *rawTest = nil;
|
|
while ((rawTest = nextTest()) != nil) {
|
|
|
|
HTML5LibTreeConstructionTest *test = [HTML5LibTreeConstructionTest new];
|
|
test.testFile = filePath.lastPathComponent;
|
|
|
|
if ([rawTest rangeOfString:@"#script-off"].location != NSNotFound) {
|
|
// Ignore tests for "scripting flag disabled" case
|
|
continue;
|
|
}
|
|
|
|
NSArray *matches = [regex matchesInString:rawTest options:0 range:NSMakeRange(0, rawTest.length)];
|
|
|
|
for (NSTextCheckingResult *result in matches) {
|
|
NSString *match = [rawTest substringWithRange:result.range];
|
|
|
|
if ([match hasPrefix:@"#data\n"]) {
|
|
NSString *data = [match substringFromIndex:@"#data\n".length];
|
|
if (data.length > 0) {
|
|
data = [data substringToIndex:data.length];
|
|
}
|
|
test.data = data;
|
|
} else if ([match hasPrefix:@"#errors\n"]) {
|
|
NSArray *errors = [[match substringFromIndex:@"#errors\n".length] componentsSeparatedByString:@"\n"];
|
|
test.errors = [errors subarrayWithRange:NSMakeRange(0, errors.count)];
|
|
} else if ([match hasPrefix:@"#document-fragment\n"]) {
|
|
NSString *fragment = [match substringFromIndex:@"#document-fragment\n".length];
|
|
fragment = [fragment substringToIndex:fragment.length];
|
|
HTMLNamespace namespace = HTMLNamespaceHTML;
|
|
if ([fragment hasPrefix:@"math "]) {
|
|
fragment = [fragment substringFromIndex:@"math ".length];
|
|
namespace = HTMLNamespaceMathML;
|
|
} else if ([fragment hasPrefix:@"svg "]) {
|
|
fragment = [fragment substringFromIndex:@"svg ".length];
|
|
namespace = HTMLNamespaceSVG;
|
|
}
|
|
test.documentFragment = [[HTMLElement alloc] initWithTagName:fragment namespace:namespace attributes:@{}];
|
|
} else if ([match hasPrefix:@"#document\n"]) {
|
|
NSArray *parts = [[match substringFromIndex:@"#document\n".length] componentsSeparatedByString:@"| "];
|
|
NSArray *nodes = [HTML5LibTreeConstructionTest parseDocument:parts];
|
|
test.nodes = nodes;
|
|
}
|
|
}
|
|
[tests addObject:test];
|
|
}
|
|
|
|
return tests;
|
|
}
|
|
|
|
+ (NSArray *)parseDocument:(NSArray *)parts
|
|
{
|
|
NSMutableArray *nodes = [NSMutableArray array];
|
|
NSMutableArray *levels = [NSMutableArray array];
|
|
NSMutableArray *stack = [NSMutableArray array];
|
|
|
|
for (NSString *part in parts) {
|
|
if (part.length == 0) {
|
|
continue;
|
|
}
|
|
|
|
NSUInteger level = 0;
|
|
NSString *str = parseLevel(part, &level);
|
|
|
|
HTMLElement *currentParent = ^ HTMLElement * (NSUInteger childLevel) {
|
|
for (NSNumber *level in levels.reverseObjectEnumerator.allObjects) {
|
|
if (level.unsignedIntegerValue >= childLevel) {
|
|
[levels removeLastObject];
|
|
[stack removeLastObject];
|
|
}
|
|
}
|
|
return stack.lastObject;
|
|
}(level);
|
|
|
|
void (^ append)(id ) = ^ (id parsedResult){
|
|
if (currentParent) {
|
|
[currentParent appendNode:parsedResult];
|
|
} else {
|
|
[nodes addObject:parsedResult];
|
|
}
|
|
};
|
|
|
|
id parsedResult = nil;
|
|
|
|
if ((parsedResult = parseComment(str))) {
|
|
append(parsedResult);
|
|
} else if ((parsedResult = parseDocumentType(str))) {
|
|
append(parsedResult);
|
|
} else if ((parsedResult = parseTag(str))) {
|
|
append(parsedResult);
|
|
[levels addObject:@(level)];
|
|
[stack addObject:parsedResult];
|
|
} else if ((parsedResult = parseAttribute(str))) {
|
|
HTMLElement *element = stack.lastObject;
|
|
element[parsedResult[0]] = parsedResult[1];
|
|
} else if ((parsedResult = parseText(str))) {
|
|
append(parsedResult);
|
|
}
|
|
}
|
|
|
|
return nodes;
|
|
}
|
|
|
|
NS_INLINE NSString * parseLevel(NSString *str, NSUInteger *level)
|
|
{
|
|
const char *cstr = str.UTF8String;
|
|
NSUInteger idx = 0;
|
|
while ((*cstr) == ' ') { cstr++; idx++; }
|
|
*level = (idx / 2);
|
|
return [str substringFromIndex:idx];
|
|
}
|
|
|
|
NS_INLINE HTMLDocumentType * parseDocumentType(NSString *str)
|
|
{
|
|
if (![str hasPrefix:@"<!DOCTYPE "]) {
|
|
return nil;
|
|
}
|
|
|
|
NSString *rest = [str substringWithRange:NSMakeRange(@"<!DOCTYPE ".length, str.length - @"<!DOCTYPE ".length - 2)];
|
|
|
|
NSString *name = nil;
|
|
NSString *publicIdentifier = nil;
|
|
NSString *systemIdentifier = nil;
|
|
|
|
NSRange nameRange = [rest rangeOfString:@" "];
|
|
if (nameRange.location != NSNotFound) {
|
|
name = [rest substringToIndex:nameRange.location];
|
|
rest = [rest substringFromIndex:nameRange.location + 1];
|
|
} else {
|
|
name = (rest.length == 0) ? nil : [rest substringToIndex:rest.length];
|
|
}
|
|
|
|
NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:@"(\".*\")? (\".*\")"
|
|
options:NSRegularExpressionCaseInsensitive
|
|
error:nil];
|
|
|
|
NSTextCheckingResult *match = [regex firstMatchInString:rest options:0 range:NSMakeRange(0, rest.length)];
|
|
if (match.numberOfRanges > 0) {
|
|
NSRange pubRange = [match rangeAtIndex:1];
|
|
if (pubRange.location != NSNotFound) {
|
|
publicIdentifier = [rest substringWithRange:NSMakeRange(pubRange.location + 1, pubRange.length - 2)];
|
|
}
|
|
|
|
NSRange sysRange = [match rangeAtIndex:2];
|
|
if (sysRange.location != NSNotFound) {
|
|
systemIdentifier = [rest substringWithRange:NSMakeRange(sysRange.location + 1, sysRange.length - 2)];
|
|
}
|
|
}
|
|
|
|
HTMLDocumentType *doctype = [[HTMLDocumentType alloc] initWithName:name
|
|
publicIdentifier:publicIdentifier
|
|
systemIdentifier:systemIdentifier];
|
|
return doctype;
|
|
}
|
|
|
|
NS_INLINE HTMLElement * parseTag(NSString *str)
|
|
{
|
|
NSRegularExpression *tagRegex = [NSRegularExpression regularExpressionWithPattern:@"^(<.*>)$"
|
|
options:NSRegularExpressionAnchorsMatchLines
|
|
error:nil];
|
|
if ([tagRegex numberOfMatchesInString:str options:0 range:NSMakeRange(0, str.length)] != 1) {
|
|
return nil;
|
|
}
|
|
|
|
NSTextCheckingResult *match = [tagRegex firstMatchInString:str options:0 range:NSMakeRange(0, str.length)];
|
|
NSRange range = NSMakeRange(match.range.location + 1, match.range.length - 2);
|
|
|
|
NSArray *parts = [[str substringWithRange:range] componentsSeparatedByString:@" "];
|
|
NSString *tagName = parts.count == 2 ? parts[1] : parts[0];
|
|
HTMLNamespace namespace = parts.count == 1 ? HTMLNamespaceHTML : ([parts[0] isEqualToString:@"math"] ? HTMLNamespaceMathML : HTMLNamespaceSVG);
|
|
|
|
HTMLElement *element = [[HTMLElement alloc] initWithTagName:tagName namespace:namespace attributes:@{}];
|
|
return element;
|
|
}
|
|
|
|
NS_INLINE HTMLText * parseText(NSString *str)
|
|
{
|
|
NSRegularExpressionOptions options = NSRegularExpressionDotMatchesLineSeparators|
|
|
NSRegularExpressionUseUnixLineSeparators|
|
|
NSRegularExpressionAnchorsMatchLines;
|
|
|
|
NSRegularExpression *textRegex = [NSRegularExpression regularExpressionWithPattern:@"^(\".*\")"
|
|
options:options
|
|
error:nil];
|
|
if ([textRegex numberOfMatchesInString:str options:0 range:NSMakeRange(0, str.length)] != 1) {
|
|
return nil;
|
|
}
|
|
|
|
NSTextCheckingResult *match = [textRegex firstMatchInString:str options:0 range:NSMakeRange(0, str.length)];
|
|
NSRange range = NSMakeRange(match.range.location + 1, match.range.length - 2);
|
|
HTMLText *text = [[HTMLText alloc] initWithData:[str substringWithRange:range]];
|
|
return text;
|
|
}
|
|
|
|
NS_INLINE HTMLComment * parseComment(NSString *str)
|
|
{
|
|
NSRegularExpressionOptions options = NSRegularExpressionDotMatchesLineSeparators|
|
|
NSRegularExpressionUseUnixLineSeparators|
|
|
NSRegularExpressionAnchorsMatchLines;
|
|
|
|
NSRegularExpression *commentRegex = [NSRegularExpression regularExpressionWithPattern:@"^(<!--.*-->)$"
|
|
options:options
|
|
error:nil];
|
|
if ([commentRegex numberOfMatchesInString:str options:0 range:NSMakeRange(0, str.length)] != 1) {
|
|
return nil;
|
|
}
|
|
|
|
NSTextCheckingResult *match = [commentRegex firstMatchInString:str options:0 range:NSMakeRange(0, str.length)];
|
|
NSString *data = [str substringWithRange:match.range];
|
|
data = [data substringWithRange:NSMakeRange(@"<!-- ".length, data.length - @"<!-- ".length - @" -->".length)];
|
|
HTMLComment *comment = [[HTMLComment alloc] initWithData:data];
|
|
return comment;
|
|
}
|
|
|
|
NS_INLINE NSArray * parseAttribute(NSString *str)
|
|
{
|
|
NSRegularExpressionOptions options = NSRegularExpressionDotMatchesLineSeparators | NSRegularExpressionUseUnixLineSeparators;
|
|
|
|
NSRegularExpression *attributeRegex = [NSRegularExpression regularExpressionWithPattern:@"^[^\"](.*=\".*\")$"
|
|
options:options
|
|
error:nil];
|
|
if ([attributeRegex numberOfMatchesInString:str options:0 range:NSMakeRange(0, str.length)] != 1) {
|
|
return nil;
|
|
}
|
|
|
|
NSTextCheckingResult *match = [attributeRegex firstMatchInString:str options:0 range:NSMakeRange(0, str.length)];
|
|
str = [str substringWithRange:match.range];
|
|
NSRange range = [str rangeOfString:@"=" options:0];
|
|
|
|
NSString *key = [str substringToIndex:range.location];
|
|
key = [key stringByReplacingOccurrencesOfString:@" " withString:@":"];
|
|
|
|
NSString *value = [str substringFromIndex:range.location + 2];
|
|
value = [value substringToIndex:value.length - 1];
|
|
|
|
return @[key, value];
|
|
}
|
|
|
|
@end
|