// // HTMLParser.m // HTMLKit // // Created by Iska on 04/10/14. // Copyright (c) 2014 BrainCookie. All rights reserved. // #import "HTMLParser.h" #import "HTMLTokenizer.h" #import "HTMLTokens.h" #import "HTMLStackOfOpenElements.h" #import "HTMLListOfActiveFormattingElements.h" #import "HTMLParserInsertionModes.h" #import "HTMLDOM.h" #import "HTMLElementTypes.h" #import "HTMLElementAdjustment.h" #import "HTMLMarker.h" #import "NSString+HTMLKit.h" #import "CSSSelectors.h" #import "HTMLDocument+Private.h" @interface HTMLParser () { HTMLTokenizer *_tokenizer; NSMutableArray *_errors; HTMLInsertionMode _insertionMode; HTMLInsertionMode _originalInsertionMode; NSMutableArray *_stackOfTemplateInsertionModes; HTMLStackOfOpenElements *_stackOfOpenElements; HTMLListOfActiveFormattingElements *_listOfActiveFormattingElements; HTMLDocument *_document; HTMLElement *_contextElement; HTMLElement *_currentElement; HTMLElement *_headElementPointer; HTMLElement *_formElementPointer; HTMLCharacterToken *_pendingTableCharacterTokens; BOOL _framesetOkFlag; BOOL _fragmentParsingAlgorithm; BOOL _fosterParenting; BOOL _ignoreNextLineFeedCharacterToken; } @end @implementation HTMLParser #pragma mark - Lifecycle - (instancetype)initWithString:(NSString *)string { self = [super init]; if (self) { _framesetOkFlag = YES; _fragmentParsingAlgorithm = NO; _fosterParenting = NO; _ignoreNextLineFeedCharacterToken = NO; _errors = [NSMutableArray new]; _insertionMode = HTMLInsertionModeInitial; _stackOfTemplateInsertionModes = [NSMutableArray new]; _stackOfOpenElements = [HTMLStackOfOpenElements new]; _listOfActiveFormattingElements = [HTMLListOfActiveFormattingElements new]; _tokenizer = [[HTMLTokenizer alloc] initWithString:string ?: @""]; _tokenizer.parser = self; _pendingTableCharacterTokens = [[HTMLCharacterToken alloc] initWithString:@""]; _headElementPointer = nil; _formElementPointer = nil; } return self; } #pragma mark - Properties - (NSArray *)parseErrors { return _errors; } - (HTMLDocument *)document { return _document ?: [self parseDocument]; } #pragma mark - Parse - (void)initializeDocument { if (_document == nil) { _document = [HTMLDocument new]; } _document.readyState = HTMLDocumentLoading; _document.quirksMode = HTMLQuirksModeNoQuirks; _document.documentType = nil; [_document removeAllChildNodes]; _fragmentParsingAlgorithm = NO; } - (HTMLDocument *)parseDocument { [self initializeDocument]; [self runParser]; return _document; } - (NSArray *)parseFragmentWithContextElement:(HTMLElement *)contextElement { if (contextElement == nil) { return @[]; } if ([_contextElement isEqual:contextElement]) { HTMLElement *root = [_document firstElementMatchingSelector:rootSelector()]; return root? root.childNodes.objectEnumerator.allObjects: @[]; } [self initializeDocument]; _tokenizer = [[HTMLTokenizer alloc] initWithString:_tokenizer.string]; _tokenizer.parser = self; _contextElement = contextElement; _fragmentParsingAlgorithm = YES; _document.quirksMode = _contextElement.ownerDocument ? _contextElement.ownerDocument.quirksMode : HTMLQuirksModeNoQuirks; if (_contextElement.htmlNamespace == HTMLNamespaceHTML) { if ([_contextElement.tagName isEqualToAny:@"title", @"textarea", nil]) { _tokenizer.state = HTMLTokenizerStateRCDATA; } else if ([_contextElement.tagName isEqualToAny:@"style", @"xmp", @"iframe", @"noembed", @"noframes", nil]) { _tokenizer.state = HTMLTokenizerStateRAWTEXT; } else if ([_contextElement.tagName isEqualToString:@"script"]) { _tokenizer.state = HTMLTokenizerStateScriptData; } else if ([_contextElement.tagName isEqualToString:@"noscript"]) { _tokenizer.state = HTMLTokenizerStateRAWTEXT; } else if ([_contextElement.tagName isEqualToString:@"plaintext"]) { _tokenizer.state = HTMLTokenizerStatePLAINTEXT; } else { _tokenizer.state = HTMLTokenizerStateData; } } HTMLElement *root = [[HTMLElement alloc] initWithTagName:@"html"]; [_document appendNode:root]; [_stackOfOpenElements pushElement:root]; if ([_contextElement.tagName isEqualToString:@"template"]) { [_stackOfTemplateInsertionModes addObject:@(HTMLInsertionModeInTemplate)]; } [self resetInsertionModeAppropriately]; _formElementPointer = _contextElement; while (_formElementPointer != nil && ![_formElementPointer.tagName isEqualToString:@"form"]) { _formElementPointer = _formElementPointer.parentElement; } [self runParser]; return root.childNodes.objectEnumerator.allObjects; } - (void)runParser { for (HTMLToken *token in _tokenizer) { if (_document.readyState == HTMLDocumentComplete) { break; } [self processToken:token]; } } - (void)stopParsing { [_stackOfOpenElements popAll]; _document.readyState = HTMLDocumentComplete; } #pragma mark - Processing - (void)processToken:(HTMLToken *)token { BOOL (^ treeConstructionDispatcher)(HTMLElement *node) = ^BOOL(HTMLElement *node){ if (node == nil) { return YES; } if (node.htmlNamespace == HTMLNamespaceHTML) { return YES; } if (IsNodeMathMLTextIntegrationPoint(node)) { if (token.type == HTMLTokenTypeStartTag) { return ![token.asStartTagToken.tagName isEqualToAny:@"mglyph", @"malignmark", nil]; } if (token.type == HTMLTokenTypeCharacter) { return YES; } } if (node.htmlNamespace == HTMLNamespaceMathML && [node.tagName isEqualToString:@"annotation-xml"]) { if (token.type == HTMLTokenTypeStartTag && [token.asTagToken.tagName isEqualToString:@"svg"]) { return YES; } } if (IsNodeHTMLIntegrationPoint(node)) { if (token.type == HTMLTokenTypeStartTag || token.type == HTMLTokenTypeCharacter) { return YES; } } if (token.type == HTMLTokenTypeEOF) { return YES; } return NO; }; if (token.isParseError) { [self emitParseError:@"Tokenizer Parser Error: %@", token.asParseError]; return; } if (_ignoreNextLineFeedCharacterToken) { _ignoreNextLineFeedCharacterToken = NO; if (token.isCharacterToken) { NSString *characters = token.asCharacterToken.characters; if ([characters characterAtIndex:0] == 0x000A) { if (characters.length <= 1) { return; } [token.asCharacterToken trimFormIndex:1]; } } } if (treeConstructionDispatcher(self.adjustedCurrentNode)) { [self processToken:token byApplyingRulesForInsertionMode:_insertionMode]; } else { [self processTokenByApplyingRulesForParsingTokensInForeignContent:token]; } } - (void)reprocessToken:(HTMLToken *)token { [self processToken:token]; } - (void)processToken:(HTMLToken *)token byApplyingRulesForInsertionMode:(HTMLInsertionMode)insertionMode { switch (_insertionMode) { case HTMLInsertionModeInitial: return [self HTMLInsertionModeInitial:token]; case HTMLInsertionModeBeforeHTML: return [self HTMLInsertionModeBeforeHTML:token]; case HTMLInsertionModeBeforeHead: return [self HTMLInsertionModeBeforeHead:token]; case HTMLInsertionModeInHead: return [self HTMLInsertionModeInHead:token]; case HTMLInsertionModeInHeadNoscript: return [self HTMLInsertionModeInHeadNoscript:token]; case HTMLInsertionModeAfterHead: return [self HTMLInsertionModeAfterHead:token]; case HTMLInsertionModeInBody: return [self HTMLInsertionModeInBody:token]; case HTMLInsertionModeText: return [self HTMLInsertionModeText:token]; case HTMLInsertionModeInTable: return [self HTMLInsertionModeInTable:token]; case HTMLInsertionModeInTableText: return [self HTMLInsertionModeInTableText:token]; case HTMLInsertionModeInCaption: return [self HTMLInsertionModeInCaption:token]; case HTMLInsertionModeInColumnGroup: return [self HTMLInsertionModeInColumnGroup:token]; case HTMLInsertionModeInTableBody: return [self HTMLInsertionModeInTableBody:token]; case HTMLInsertionModeInRow: return [self HTMLInsertionModeInRow:token]; case HTMLInsertionModeInCell: return [self HTMLInsertionModeInCell:token]; case HTMLInsertionModeInSelect: return [self HTMLInsertionModeInSelect:token]; case HTMLInsertionModeInSelectInTable: return [self HTMLInsertionModeInSelectInTable:token]; case HTMLInsertionModeInTemplate: return [self HTMLInsertionModeInTemplate:token]; case HTMLInsertionModeAfterBody: return [self HTMLInsertionModeAfterBody:token]; case HTMLInsertionModeInFrameset: return [self HTMLInsertionModeInFrameset:token]; case HTMLInsertionModeAfterFrameset: return [self HTMLInsertionModeAfterFrameset:token]; case HTMLInsertionModeAfterAfterBody: return [self HTMLInsertionModeAfterAfterBody:token]; case HTMLInsertionModeAfterAfterFrameset: return [self HTMLInsertionModeAfterAfterFrameset:token]; } } #pragma mark - Nodes - (HTMLElement *)currentNode { return _stackOfOpenElements.currentNode; } - (HTMLElement *)adjustedCurrentNode { if (_stackOfOpenElements.count == 1 && _fragmentParsingAlgorithm) { return _contextElement; } return [self currentNode]; } - (HTMLInsertionMode)currentTemplateInsertionMode { if (_stackOfTemplateInsertionModes.count == 0) { return _insertionMode; } return [_stackOfTemplateInsertionModes.firstObject unsignedIntegerValue]; } #pragma mark - Emits - (void)emitParseError:(NSString *)format, ... NS_FORMAT_FUNCTION(1, 2) { va_list args; va_start(args, format); NSString *message = [[NSString alloc] initWithFormat:format arguments:args]; [_errors addObject:message]; va_end(args); } #pragma mark - Insertions & Manipulations - (HTMLNode *)appropriatePlaceForInsertingANodeWithOverrideTarget:(HTMLElement *)overrideTarget beforeChildNode:(out HTMLElement * __autoreleasing *)child { HTMLNode *target = self.currentNode; if (overrideTarget != nil) { target = overrideTarget; } while (_fosterParenting && [[(HTMLElement *)target tagName] isEqualToAny:@"table", @"tbody", @"tfoot", @"thead", @"tr", nil]) { HTMLElement *lastTemplate = nil; HTMLElement *lastTable = nil; for (HTMLElement *element in _stackOfOpenElements.reverseObjectEnumerator) { if ([element.tagName isEqualToString:@"template"]) { lastTemplate = element; break; } if ([element.tagName isEqualToString:@"table"]) { lastTable = element; break; } } if (lastTemplate != nil) { HTMLTemplate *template = (HTMLTemplate *)lastTemplate; target = template; break; } if (lastTable == nil) { HTMLElement *htmlElement = _stackOfOpenElements.firstNode; target = htmlElement; break; } if (lastTable.parentNode != nil) { *child = lastTable; target = lastTable.parentNode; break; } NSUInteger lastTableIndex = [_stackOfOpenElements indexOfElement:lastTable]; HTMLElement *previousNode = _stackOfOpenElements[lastTableIndex]; target = previousNode; break; } if ([target isKindOfClass:[HTMLTemplate class]]) { target = [(HTMLTemplate *)target content]; } return target; } - (void)insertComment:(HTMLCommentToken *)token { [self insertComment:token asChildOfNode:nil]; } - (void)insertComment:(HTMLCommentToken *)token asChildOfNode:(HTMLNode *)node { HTMLNode *parent = node; HTMLElement *child = nil; if (parent == nil) { parent = [self appropriatePlaceForInsertingANodeWithOverrideTarget:nil beforeChildNode:&child]; } HTMLComment *comment = [[HTMLComment alloc] initWithData:token.data]; [parent insertNode:comment beforeChildNode:child]; } - (HTMLElement *)createElementForToken:(HTMLTagToken *)token inNamespace:(HTMLNamespace)htmlNamespace { HTMLElement *element = [[HTMLElement alloc] initWithTagName:token.tagName namespace:htmlNamespace attributes:token.attributes]; return element; } - (HTMLElement *)insertElementForToken:(HTMLTagToken *)token { return [self insertForeignElementForToken:token inNamespace:HTMLNamespaceHTML]; } - (HTMLElement *)insertForeignElementForToken:(HTMLTagToken *)token inNamespace:(HTMLNamespace)namespace { HTMLElement *element = [self createElementForToken:token inNamespace:namespace]; return [self insertElement:element]; } - (HTMLElement *)insertElement:(HTMLElement *)element { HTMLElement *child = nil; HTMLNode *adjustedInsertionLocation = [self appropriatePlaceForInsertingANodeWithOverrideTarget:nil beforeChildNode:&child]; [adjustedInsertionLocation insertNode:element beforeChildNode:child]; [_stackOfOpenElements pushElement:element]; return element; } - (void)insertCharacters:(NSString *)data { HTMLElement *child = nil; HTMLNode *adjustedInsertionLocation = [self appropriatePlaceForInsertingANodeWithOverrideTarget:nil beforeChildNode:&child]; if (adjustedInsertionLocation.nodeType != HTMLNodeDocument) { if (child != nil && child.previousSibling.nodeType == HTMLNodeText) { HTMLText *textNode = (HTMLText *)child.previousSibling; [textNode appendData:data]; } else if (adjustedInsertionLocation.lastChild.nodeType == HTMLNodeText) { HTMLText *textNode = (HTMLText *)adjustedInsertionLocation.lastChild; [textNode appendData:data]; } else { HTMLText *text = [[HTMLText alloc] initWithData:data]; [adjustedInsertionLocation insertNode:text beforeChildNode:child]; } } } - (void)applyGenericParsingAlgorithmForToken:(HTMLStartTagToken *)token withTokenizerState:(HTMLTokenizerState)state { [self insertElementForToken:token]; _tokenizer.state = state; _originalInsertionMode = _insertionMode; [self switchInsertionMode:HTMLInsertionModeText]; } - (void)reconstructActiveFormattingElements { if (_listOfActiveFormattingElements.isEmpty) { return; } id last = _listOfActiveFormattingElements.lastEntry; if (last == [HTMLMarker marker] || [_stackOfOpenElements containsElement:last]) { return; } NSInteger index = _listOfActiveFormattingElements.count - 1; HTMLElement *entry = _listOfActiveFormattingElements[index]; // Reconstruct the active formatting elements // https://html.spec.whatwg.org/multipage/syntax.html#reconstruct-the-active-formatting-elements // Rewind phase while (![entry isEqual:[HTMLMarker marker]] && ![_stackOfOpenElements containsElement:entry]) { if (index == 0) { index--; break; } entry = _listOfActiveFormattingElements[--index]; } while (YES) { // Advance phase entry = _listOfActiveFormattingElements[++index]; // Create phase HTMLStartTagToken *token = [[HTMLStartTagToken alloc] initWithTagName:entry.tagName attributes:entry.attributes]; HTMLElement *element = [self insertElementForToken:token]; [_listOfActiveFormattingElements replaceElementAtIndex:index withElement:element]; if (element == _listOfActiveFormattingElements.lastEntry) { break; } } } - (void)generateImpliedEndTagsExceptForElement:(NSString *)tagName { while ([self.currentNode.tagName isEqualToAny:@"dd", @"dt", @"li", @"option", @"optgroup", @"p", @"rb", @"rp", @"rt", @"rtc", nil] && ![self.currentNode.tagName isEqualToString:tagName]) { [_stackOfOpenElements popCurrentNode]; } } - (void)generateAllImpliedEndTagsThoroughly { while ([self.currentNode.tagName isEqualToAny:@"caption", @"colgroup", @"dd", @"dt", @"li", @"option", @"optgroup", @"p", @"rb", @"rp", @"rt", @"rtc", @"tbody", @"td", @"tfoot", @"th", @"thead", @"tr", nil]) { [_stackOfOpenElements popCurrentNode]; } } - (void)closePElement { [self generateImpliedEndTagsExceptForElement:@"p"]; if (![self.currentNode.tagName isEqualToString:@"p"]) { [self emitParseError:@"Current node being closed is not a

element"]; } [_stackOfOpenElements popElementsUntilElementPoppedWithTagName:@"p"]; } - (BOOL)runAdoptionAgencyAlgorithmForTagName:(NSString *)tagName { if ([self.currentNode.tagName isEqualToString:tagName] && ![_listOfActiveFormattingElements containsElement:self.currentNode]) { [_stackOfOpenElements popCurrentNode]; return NO; } for (int outerLoopCounter = 0; outerLoopCounter < 8; outerLoopCounter++) { HTMLElement *formattingElement = [_listOfActiveFormattingElements formattingElementWithTagName:tagName]; if (formattingElement == nil) { return YES; } if (![_stackOfOpenElements containsElement:formattingElement]) { [self emitParseError:@"Formatting element <%@> is not in the Stack of Open Elements (Adoption Agency)", tagName]; [_listOfActiveFormattingElements removeElement:formattingElement]; return NO; } if (![_stackOfOpenElements hasElementInScopeWithTagName:formattingElement.tagName]) { [self emitParseError:@"Formatting element <%@> is not in scope (Adoption Agency)", tagName]; return NO; } if (![formattingElement isEqual:self.currentNode]) { [self emitParseError:@"Formatting element <%@> is not the current node (Adoption Agency)", tagName]; } NSUInteger formattingElementIndex = [_stackOfOpenElements indexOfElement:formattingElement]; HTMLElement *furthestBlock = [_stackOfOpenElements furthestBlockAfterIndex:formattingElementIndex]; if (furthestBlock == nil) { [_stackOfOpenElements popElementsUntilElementPopped:formattingElement]; [_listOfActiveFormattingElements removeElement:formattingElement]; return NO; } HTMLElement *commonAncestor = _stackOfOpenElements[formattingElementIndex - 1]; NSUInteger bookmark = [_listOfActiveFormattingElements indexOfElement:formattingElement]; HTMLElement *node = furthestBlock; HTMLElement *lastNode = furthestBlock; NSUInteger index = [_stackOfOpenElements indexOfElement:node]; int innerLoopCounter = 0; while (YES) { innerLoopCounter += 1; index -= 1; node = _stackOfOpenElements[index]; if ([node isEqual:formattingElement]) { break; } if (innerLoopCounter > 3 && [_listOfActiveFormattingElements containsElement:node]) { [_listOfActiveFormattingElements removeElement:node]; continue; } if (![_listOfActiveFormattingElements containsElement:node]) { [_stackOfOpenElements removeElement:node]; continue; } HTMLElement *newElement = [node copy]; [_listOfActiveFormattingElements replaceElementAtIndex:[_listOfActiveFormattingElements indexOfElement:node] withElement:newElement]; [_stackOfOpenElements replaceElementAtIndex:[_stackOfOpenElements indexOfElement:node] withElement:newElement]; node = newElement; if ([lastNode isEqual:furthestBlock]) { bookmark = [_listOfActiveFormattingElements indexOfElement:node] + 1; } [lastNode.parentNode removeChildNode:lastNode]; [node appendNode:lastNode]; lastNode = node; } HTMLElement *child = nil; HTMLNode *parent = [self appropriatePlaceForInsertingANodeWithOverrideTarget:commonAncestor beforeChildNode:&child]; [parent insertNode:lastNode beforeChildNode:child]; HTMLElement *newElement = [formattingElement copy]; [furthestBlock reparentChildNodesIntoNode:newElement]; [furthestBlock appendNode:newElement]; [_listOfActiveFormattingElements removeElement:formattingElement]; [_listOfActiveFormattingElements insertElement:newElement atIndex:bookmark]; [_stackOfOpenElements removeElement:formattingElement]; NSUInteger furthestBlockIndex = [_stackOfOpenElements indexOfElement:furthestBlock]; [_stackOfOpenElements insertElement:newElement atIndex:furthestBlockIndex + 1]; } return NO; } - (void)closeTheCell { [self generateImpliedEndTagsExceptForElement:nil]; if (![self.currentNode.tagName isEqualToAny:@"td", @"th", nil]) { [self emitParseError:@"Closing misnested Cell <%@>", self.currentNode.tagName]; } [_stackOfOpenElements popElementsUntilAnElementPoppedWithAnyOfTagNames:@[@"td", @"th"]]; [_listOfActiveFormattingElements clearUptoLastMarker]; [self switchInsertionMode:HTMLInsertionModeInRow]; } #pragma mark - State Machine - (void)switchInsertionMode:(HTMLInsertionMode)mode { if (mode == HTMLInsertionModeText || mode == HTMLInsertionModeInTableText) { _originalInsertionMode = _insertionMode; } _insertionMode = mode; } - (void)resetInsertionModeAppropriately { BOOL last = NO; HTMLElement *node = _stackOfOpenElements.lastNode; NSUInteger nodeIndex = _stackOfOpenElements.count - 1; while (YES) { if (node == _stackOfOpenElements.firstNode) { last = YES; if (_fragmentParsingAlgorithm) { node = _contextElement; } } if ([node.tagName isEqualToString:@"select"]) { if (last == NO) { HTMLElement *ancestor = node; NSUInteger ancestorIndex = nodeIndex; while (YES) { if (ancestor == _stackOfOpenElements.firstNode) { break; } ancestorIndex--; ancestor = _stackOfOpenElements[ancestorIndex]; if ([ancestor.tagName isEqualToString:@"template"]) { break; } if ([ancestor.tagName isEqualToString:@"table"]) { [self switchInsertionMode:HTMLInsertionModeInTable]; return; } } } [self switchInsertionMode:HTMLInsertionModeInSelect]; return; } if (last == NO) { if ([node.tagName isEqualToAny:@"td", @"th", nil]) { [self switchInsertionMode:HTMLInsertionModeInCell]; return; } } if ([node.tagName isEqualToString:@"tr"]) { [self switchInsertionMode:HTMLInsertionModeInRow]; return; } if ([node.tagName isEqualToAny:@"tbody", @"thead", @"tfoot", nil]) { [self switchInsertionMode:HTMLInsertionModeInTableBody]; return; } if ([node.tagName isEqualToString:@"caption"]) { [self switchInsertionMode:HTMLInsertionModeInCaption]; return; } if ([node.tagName isEqualToString:@"colgroup"]) { [self switchInsertionMode:HTMLInsertionModeInColumnGroup]; return; } if ([node.tagName isEqualToString:@"table"]) { [self switchInsertionMode:HTMLInsertionModeInTable]; return; } if ([node.tagName isEqualToString:@"template"]) { [self switchInsertionMode:self.currentTemplateInsertionMode]; return; } if (last == NO) { if ([node.tagName isEqualToString:@"head"]) { [self switchInsertionMode:HTMLInsertionModeInHead]; return; } } if ([node.tagName isEqualToString:@"body"]) { [self switchInsertionMode:HTMLInsertionModeInBody]; return; } if ([node.tagName isEqualToString:@"frameset"]) { [self switchInsertionMode:HTMLInsertionModeInFrameset]; return; } if ([node.tagName isEqualToString:@"html"]) { if (_headElementPointer == nil) { [self switchInsertionMode:HTMLInsertionModeBeforeHead]; } else { [self switchInsertionMode:HTMLInsertionModeAfterHead]; } return; } if (last) { [self switchInsertionMode:HTMLInsertionModeInBody]; return; } nodeIndex--; node = _stackOfOpenElements[nodeIndex]; } } #pragma mark - Insertion Modes - (void)HTMLInsertionModeInitial:(HTMLToken *)token { switch (token.type) { case HTMLTokenTypeCharacter: { [token.asCharacterToken trimLeadingWhitespace]; if (token.asCharacterToken.isEmpty) { return; } break; } case HTMLTokenTypeComment: [self insertComment:token.asCommentToken asChildOfNode:_document]; return; case HTMLTokenTypeDoctype: { HTMLDOCTYPEToken *doctypeToken = token.asDoctypeToken; HTMLDocumentType *doctype = [[HTMLDocumentType alloc] initWithName:doctypeToken.name publicIdentifier:doctypeToken.publicIdentifier systemIdentifier:doctypeToken.systemIdentifier]; if (!doctype.isValid) { [self emitParseError:@"Invalid DOCTYPE"]; } _document.documentType = doctype; _document.quirksMode = doctype.quirksMode; if (doctypeToken.forceQuirks) { _document.quirksMode = HTMLQuirksModeQuirks; } [self switchInsertionMode:HTMLInsertionModeBeforeHTML]; return; } default: break; } [self emitParseError:@"Expected a DOCTYPE"]; _document.quirksMode = HTMLQuirksModeQuirks; [self switchInsertionMode:HTMLInsertionModeBeforeHTML]; [self reprocessToken:token]; } - (void)HTMLInsertionModeBeforeHTML:(HTMLToken *)token { switch (token.type) { case HTMLTokenTypeDoctype: [self emitParseError:@"Unexpected DOCTYPE Token before "]; return; case HTMLTokenTypeComment: [self insertComment:token.asCommentToken asChildOfNode:_document]; return; case HTMLTokenTypeCharacter: { [token.asCharacterToken trimLeadingWhitespace]; if (token.asCharacterToken.isEmpty) { return; } break; } case HTMLTokenTypeStartTag: if ([token.asStartTagToken.tagName isEqualToString:@"html"]) { HTMLElement *html = [self createElementForToken:token.asTagToken inNamespace:HTMLNamespaceHTML]; [_document appendNode:html]; [_stackOfOpenElements pushElement:html]; [self switchInsertionMode:HTMLInsertionModeBeforeHead]; return; } break; case HTMLTokenTypeEndTag: if (![token.asEndTagToken.tagName isEqualToAny:@"head", @"body", @"html", @"br", nil]) { [self emitParseError:@"Unexpected end tag before ", token.asEndTagToken.tagName]; return; } break; default: break; } HTMLElement *html = [[HTMLElement alloc] initWithTagName:@"html"]; [_document appendNode:html]; [_stackOfOpenElements pushElement:html]; [self switchInsertionMode:HTMLInsertionModeBeforeHead]; [self reprocessToken:token]; } - (void)HTMLInsertionModeBeforeHead:(HTMLToken *)token { switch (token.type) { case HTMLTokenTypeCharacter: { [token.asCharacterToken trimLeadingWhitespace]; if (token.asCharacterToken.isEmpty) { return; } break; } case HTMLTokenTypeComment: [self insertComment:token.asCommentToken]; return; case HTMLTokenTypeDoctype: [self emitParseError:@"Unexpected DOCTYPE Token before "]; return; case HTMLTokenTypeStartTag: if ([token.asStartTagToken.tagName isEqualToString:@"html"]) { [self HTMLInsertionModeInBody:token]; } else if ([token.asStartTagToken.tagName isEqualToString:@"head"]) { HTMLElement *head = [self insertElementForToken:token.asTagToken]; _headElementPointer = head; [self switchInsertionMode:HTMLInsertionModeInHead]; } else { break; } return; case HTMLTokenTypeEndTag: if (![token.asEndTagToken.tagName isEqualToAny:@"head", @"body", @"html", @"br", nil]) { [self emitParseError:@"Unexpected end tag before ", token.asEndTagToken.tagName]; return; } break; default: break; } HTMLStartTagToken *headToken = [[HTMLStartTagToken alloc] initWithTagName:@"head"]; HTMLElement *head = [self insertElementForToken:headToken]; _headElementPointer = head; [self switchInsertionMode:HTMLInsertionModeInHead]; [self reprocessToken:token]; } - (void)HTMLInsertionModeInHead:(HTMLToken *)token { switch (token.type) { case HTMLTokenTypeCharacter: { HTMLCharacterToken *leadingWhiteSpace = [token.asCharacterToken tokenBySplitingLeadingWhiteSpace]; if (leadingWhiteSpace) { [self insertCharacters:leadingWhiteSpace.characters]; } if (token.asCharacterToken.isEmpty) { return; } break; } case HTMLTokenTypeComment: [self insertComment:token.asCommentToken]; return; case HTMLTokenTypeDoctype: [self emitParseError:@"Unexpected DOCTYPE Token in "]; return; case HTMLTokenTypeStartTag: if ([token.asStartTagToken.tagName isEqualToString:@"html"]) { [self HTMLInsertionModeInBody:token]; } else if ([token.asStartTagToken.tagName isEqualToAny:@"base", @"basefont", @"bgsound", @"link", nil]) { [self insertElementForToken:token.asStartTagToken]; [_stackOfOpenElements popCurrentNode]; } else if ([token.asStartTagToken.tagName isEqualToString:@"meta"]) { [self insertElementForToken:token.asStartTagToken]; [_stackOfOpenElements popCurrentNode]; } else if ([token.asStartTagToken.tagName isEqualToString:@"title"]) { [self applyGenericParsingAlgorithmForToken:token.asStartTagToken withTokenizerState:HTMLTokenizerStateRCDATA]; } else if ([token.asStartTagToken.tagName isEqualToAny:@"noscript", @"noframes", @"style", nil]) { [self applyGenericParsingAlgorithmForToken:token.asStartTagToken withTokenizerState:HTMLTokenizerStateRAWTEXT]; } else if ([token.asStartTagToken.tagName isEqualToString:@"script"]) { HTMLElement *child = nil; HTMLNode *adjustedInsertionLocation = [self appropriatePlaceForInsertingANodeWithOverrideTarget:nil beforeChildNode:&child]; HTMLElement *script = [self createElementForToken:token.asStartTagToken inNamespace:HTMLNamespaceHTML]; [adjustedInsertionLocation insertNode:script beforeChildNode:child]; [_stackOfOpenElements pushElement:script]; _tokenizer.state = HTMLTokenizerStateScriptData; _originalInsertionMode = _insertionMode; [self switchInsertionMode:HTMLInsertionModeText]; } else if ([token.asStartTagToken.tagName isEqualToString:@"head"]) { [self emitParseError:@"Unexpected start tag in "]; } else if ([token.asStartTagToken.tagName isEqualToString:@"template"]) { HTMLTemplate *template = [HTMLTemplate new]; [self insertElement:template]; [_listOfActiveFormattingElements addMarker]; _framesetOkFlag = NO; [self switchInsertionMode:HTMLInsertionModeInTemplate]; [_stackOfTemplateInsertionModes addObject:@(HTMLInsertionModeInTemplate)]; } else { break; } return; case HTMLTokenTypeEndTag: if ([token.asEndTagToken.tagName isEqualToString:@"head"]) { [_stackOfOpenElements popCurrentNode]; [self switchInsertionMode:HTMLInsertionModeAfterHead]; } else if ([token.asEndTagToken.tagName isEqualToAny:@"body", @"html", @"br", nil]) { break; } else if ([token.asEndTagToken.tagName isEqualToString:@"template"]) { if (![_stackOfOpenElements containsElementWithTagName:@"template"]) { [self emitParseError:@"Unexpected end tag in "]; return; } [self generateAllImpliedEndTagsThoroughly]; if (![self.currentNode.tagName isEqualToString:@"template"]) { [self emitParseError:@"Unexpected end tag in ", self.currentNode.tagName]; } [_stackOfOpenElements popElementsUntilTemplateElementPopped]; [_listOfActiveFormattingElements clearUptoLastMarker]; [_stackOfTemplateInsertionModes removeLastObject]; [self resetInsertionModeAppropriately]; } else { [self emitParseError:@"Unexpected end tag in ", token.asEndTagToken.tagName]; return; } return; default: break; } [_stackOfOpenElements popCurrentNode]; [self switchInsertionMode:HTMLInsertionModeAfterHead]; [self reprocessToken:token]; } - (void)HTMLInsertionModeInHeadNoscript:(HTMLToken *)token { switch (token.type) { case HTMLTokenTypeDoctype: [self emitParseError:@"Unexpected DOCTYPE Token in

in "]; } else { if ([_stackOfOpenElements hasElementInButtonScopeWithTagName:@"p"]) { [self closePElement]; } HTMLElement *form = [self insertElementForToken:token]; if (![_stackOfOpenElements containsElementWithTagName:@"template"]) { _formElementPointer = form; } } } else if ([tagName isEqualToAny:@"li", @"dd", @"dt", nil]) { /** li, dd & dt cases are all same, hence the merge */ _framesetOkFlag = NO; // Start Tag: li, dd, dt // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody NSDictionary *map = @{@"li": @[@"li"], @"dd": @[@"dd", @"dt"], @"dt": @[@"dd", @"dt"]}; for (HTMLElement *node in _stackOfOpenElements.reverseObjectEnumerator.allObjects) { if ([map[tagName] containsObject:node.tagName]) { [self generateImpliedEndTagsExceptForElement:node.tagName]; if (![self.currentNode.tagName isEqualToString:node.tagName]) { [self emitParseError:@"Unexpected Start Tag <%@> in ", node.tagName]; } [_stackOfOpenElements popElementsUntilElementPoppedWithTagName:node.tagName]; break; } else if (IsSpecialElement(node) && ![node.tagName isEqualToAny:@"address", @"div", @"p", nil]) { break; } } if ([_stackOfOpenElements hasElementInButtonScopeWithTagName:@"p"]) { [self closePElement]; } [self insertElementForToken:token]; } else if ([tagName isEqualToString:@"plaintext"]) { if ([_stackOfOpenElements hasElementInButtonScopeWithTagName:@"p"]) { [self closePElement]; } [self insertElementForToken:token]; _tokenizer.state = HTMLTokenizerStatePLAINTEXT; } else if ([tagName isEqualToString:@"button"]) { if ([_stackOfOpenElements hasElementInScopeWithTagName:@"button"]) { [self emitParseError:@"Unexpected nested Start Tag