From ebcbbbf693ae30ca84362b728dee6838fbbe3c20 Mon Sep 17 00:00:00 2001 From: iska Date: Mon, 22 Sep 2014 00:20:06 +0200 Subject: [PATCH] Add initial implementation for "consuming a character reference" https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference --- HTMLKit/HTMLTokenizer.m | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/HTMLKit/HTMLTokenizer.m b/HTMLKit/HTMLTokenizer.m index 9ae492f..b3c7949 100644 --- a/HTMLKit/HTMLTokenizer.m +++ b/HTMLKit/HTMLTokenizer.m @@ -98,6 +98,39 @@ [self emitToken:token]; } +#pragma mark - Consume Character Reference + +- (NSString *)consumeCharachterReferenceWithAddtionalAllowedCharacter:(UTF32Char)additionalAllowedCharacter +{ + UTF32Char character = [_inputStreamReader nextInputCharacter]; + if (additionalAllowedCharacter != (UTF32Char)EOF && character == additionalAllowedCharacter) { + return nil; + } + + switch (character) { + case CHARACTER_TABULATION: + case LINE_FEED: + case FORM_FEED: + case SPACE: + case LESS_THAN_SIGN: + case AMPERSAND: + case EOF: + return nil; + case NUMBER_SIGN: + { + NSString *numberReference = [self consumeNumberCharacterReference]; + return numberReference; + } + default: + return nil; + } +} + +- (NSString *)consumeNumberCharacterReference +{ + return nil; +} + #pragma mark - States - (void)HTMLTokenizerStateData