diff --git a/HTMLKit/HTMLCharacterToken.h b/HTMLKit/HTMLCharacterToken.h index 0341991..cb2a157 100644 --- a/HTMLKit/HTMLCharacterToken.h +++ b/HTMLKit/HTMLCharacterToken.h @@ -6,22 +6,76 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import #import "HTMLToken.h" +/** + HTML Character Token + */ @interface HTMLCharacterToken : HTMLToken +/** @brief The characters in this token. */ @property (nonatomic, copy) NSString *characters; +/** + Initializes a new character token. + + @param string The string with which to initialize the token. + @returns A new instance of a character token. + */ - (instancetype)initWithString:(NSString *)string; +/** + Appends the given string to this token. + + @param string The string to append. + */ - (void)appendString:(NSString *)string; + +/** + Checks whether this token is a whitespace character token. + + @discussion HTML whitespace characters are: CHARACTER TABULATION U+0009, LINE FEED U+000A, FORM FEED U+000C, + CARRIAGE RETURN U+000D, and SPACE U+0020 + + @returns `YES` if this token contains only whitespace characters, `NO` otherwise. + */ - (BOOL)isWhitespaceToken; + +/** + Checks whether this token is empty. + + @returns `YES` if this token is empty, `NO` otherwise. + */ - (BOOL)isEmpty; +/** + Retains all leading whitespace characters in this token. + */ - (void)retainLeadingWhitespace; + +/** + Trims all leading whitespace characters in this token. + */ - (void)trimLeadingWhitespace; + +/** + Trims the characters in this token from a given index + + @param index The start index from which to trim the token. + */ - (void)trimFormIndex:(NSUInteger)index; + +/** + Splits this token retaining only characters after the leading whitespace. The leading whitespace characters are then + returned a new characters token. + + @returns A characters token with leading whitespace characters. Returns 'nil` if no leading whitespace exists. + */ - (HTMLCharacterToken *)tokenBySplitingLeadingWhiteSpace; @end diff --git a/HTMLKit/HTMLCommentToken.h b/HTMLKit/HTMLCommentToken.h index 044fc7f..11f36a7 100644 --- a/HTMLKit/HTMLCommentToken.h +++ b/HTMLKit/HTMLCommentToken.h @@ -6,15 +6,34 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import #import "HTMLToken.h" +/** + HTML Comment Token + */ @interface HTMLCommentToken : HTMLToken +/** @brief The comment string in this token. */ @property (nonatomic, copy) NSString *data; +/** + Initializes a new comment token. + + @param string The string with which to initialize the token. + @returns A new instance of a comment token. + */ - (instancetype)initWithData:(NSString *)data; +/** + Appends the given string to this token. + + @param string The string to append. + */ - (void)appendStringToData:(NSString *)string; @end diff --git a/HTMLKit/HTMLDOCTYPEToken.h b/HTMLKit/HTMLDOCTYPEToken.h index 1a4de9b..17b6c2b 100644 --- a/HTMLKit/HTMLDOCTYPEToken.h +++ b/HTMLKit/HTMLDOCTYPEToken.h @@ -6,20 +6,57 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import #import "HTMLToken.h" +/** + HTML DOCTYPE Token + */ @interface HTMLDOCTYPEToken : HTMLToken +/** @brief The DOCTYPE's name. */ @property (nonatomic, copy) NSString *name; + +/** @brief The DOCTYPE's public identifier. */ @property (nonatomic, strong) NSMutableString *publicIdentifier; + +/** @brief The DOCTYPE's system identifier. */ @property (nonatomic, strong) NSMutableString *systemIdentifier; + +/** @brief Flag whether this DOCTYPE forces quirks mode. */ @property (nonatomic, assign) BOOL forceQuirks; +/** + Initializes a new DOCTYPE token. + + @param name The name with which to initialize the token. + @returns A new instance of a DOCTYPE token. + */ - (instancetype)initWithName:(NSString *)name; +/** + Appends the given string to this DOCTYPE's name. + + @param string The string to append. + */ - (void)appendStringToName:(NSString *)string; + +/** + Appends the given string to this DOCTYPE's public identifier. + + @param string The string to append. + */ - (void)appendStringToPublicIdentifier:(NSString *)string; + +/** + Appends the given string to this DOCTYPE's system identifier. + + @param string The string to append. + */ - (void)appendStringToSystemIdentifier:(NSString *)string; @end diff --git a/HTMLKit/HTMLEOFToken.h b/HTMLKit/HTMLEOFToken.h index 2ebd9c2..369f315 100644 --- a/HTMLKit/HTMLEOFToken.h +++ b/HTMLKit/HTMLEOFToken.h @@ -6,10 +6,18 @@ // Copyright (c) 2015 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import "HTMLToken.h" +/** + A HTML EOF Token. + */ @interface HTMLEOFToken : HTMLToken +/** Returns the singleton instance of the EOF Token. */ + (instancetype)token; @end diff --git a/HTMLKit/HTMLInputStreamReader.h b/HTMLKit/HTMLInputStreamReader.h index dea5e29..c4a6f88 100644 --- a/HTMLKit/HTMLInputStreamReader.h +++ b/HTMLKit/HTMLInputStreamReader.h @@ -6,8 +6,17 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import +/** + Typedef for the error callback block. + + @param reason The string describing the reason of the reported error. + */ typedef void (^ HTMLStreamReaderErrorCallback)(NSString *reason); /** @@ -16,31 +25,134 @@ typedef void (^ HTMLStreamReaderErrorCallback)(NSString *reason); */ @interface HTMLInputStreamReader : NSObject +/** @brief The underlying string with which this stream reader was initialized */ @property (nonatomic, readonly) NSString *string; + +/** @brief The current scan location */ @property (nonatomic, readonly) NSUInteger currentLocation; + +/** @brief An error callback block, which gets called when encountering errors while reading the stream */ @property (nonatomic, copy) HTMLStreamReaderErrorCallback errorCallback; +/** + Initializes a new Input Stream Reader with the given string. + + @param string The HTML string + @returns A new instance of the Input Stream Reader. + */ - (id)initWithString:(NSString *)string; +/** + Returns the current input character. + + @returns The current code point in the input stream as a `UTF32Char`. + */ - (UTF32Char)currentInputCharacter; + +/** + Returns the next input character without consuming it. + + @returns The next code point in the input stream as a `UTF32Char`. Returns `EOF` if the stream is fully consumed. + */ - (UTF32Char)nextInputCharacter; + +/** + Returns the input character at a given offset without consuming it. + + @param offset The offset of the character. + @returns The code point in the input stream as a `UTF32Char` at the given offset. + */ - (UTF32Char)inputCharacterPointAtOffset:(NSUInteger)offset; +/** + Consumes and returns the next input character. Consuming a characters advances the current scan location of the + input stream. + + @returns The next code point in the input stream as a `UTF32Char`. Returns `EOF` if the stream is fully consumed. + */ - (UTF32Char)consumeNextInputCharacter; + +/** + Causes the next input character to return the current input character. + */ - (void)reconsumeCurrentInputCharacter; + +/** @brief Unconsumes the current input character. */ - (void)unconsumeCurrentInputCharacter; +/** + Consumes the given character at the current location. + + @param character The character to consume. + @returns YES if the given character was consumed at the current location, NO otherwise. + */ - (BOOL)consumeCharacter:(UTF32Char)character; + +/** + Consumes characters at the current location matching a decimal number. + + @param result Upon return, contains the consumed decimal number. Pass `NULL` to skip over a decimal number at the + current location. + @returns YES if a decimal number could be consumed at the current location, NO otherwise. + */ - (BOOL)consumeNumber:(unsigned long long *)result; + +/** + Consumes characters at the current location matching a hexadecimal number. + + @param result Upon return, contains the consumed hexadecimal number. Pass `NULL` to skip over a hexadecimal number at + the current location. + @returns YES if a hexadecimal number could be consumed at the current location, NO otherwise. + */ - (BOOL)consumeHexNumber:(unsigned long long *)result; + +/** + Consumes the given string at the current location. + + @param string The string to consume. + @param caseSensitive YES if the string's case should be ignored, NO otherwise + @returns YES if the given string was consumed at the current location, NO otherwise. + */ - (BOOL)consumeString:(NSString *)string caseSensitive:(BOOL)caseSensitive; + +/** + Consumes characters starting at the current location until any character in a given string is encountered. + + @param characters The string containing the characters to consume up to. + @returns A string containing the consumed characters. Returns `nil` if none were consumed. + */ - (NSString *)consumeCharactersUpToCharactersInString:(NSString *)characters; + +/** + Consumes characters starting at the current location until a given string is encountered. + + @param string The string to consume up to. + @returns A string containing the consumed characters. Returns `nil` if none were consumed. + */ - (NSString *)consumeCharactersUpToString:(NSString *)string; + +/** + Consumes characters as long as the match the characters in the given string starting at the current location. + + @param characters A string with the characters to consume. + @returns A string containing the consumed characters. Returns `nil` if none were found. + */ - (NSString *)consumeCharactersInString:(NSString *)characters; + +/** + Consumes alphanumeric characters starting at the current location. + + @returns A string containing the consumed alphanumeric characters. Returns `nil` if none were found. + */ - (NSString *)consumeAlphanumericCharacters; +/** @brief Marks the current stream scan location. */ - (void)markCurrentLocation; + +/** @brief Resets the stream's scan location to the previously marked location. */ - (void)rewindToMarkedLocation; + +/** @brief Resets the stream to its begining. */ - (void)reset; @end diff --git a/HTMLKit/HTMLParseErrorToken.h b/HTMLKit/HTMLParseErrorToken.h index 0f27a57..b673976 100644 --- a/HTMLKit/HTMLParseErrorToken.h +++ b/HTMLKit/HTMLParseErrorToken.h @@ -6,14 +6,31 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import #import "HTMLToken.h" +/** + HTML Parse Error Token + */ @interface HTMLParseErrorToken : HTMLToken +/** @brief The error's reason message. */ @property (nonatomic, copy) NSString *reason; + +/** @brief The error's location in the stream. */ @property (nonatomic, assign) NSUInteger location; +/** + Initializes a new Parse Error token. + + @param reason The error's reason message. + @param location The error's location in the stream. + @returns A new instance of a parse error token. + */ - (instancetype)initWithReasonMessage:(NSString *)reason andStreamLocation:(NSUInteger)location; @end diff --git a/HTMLKit/HTMLTagToken.h b/HTMLKit/HTMLTagToken.h index 2107f58..9f16d21 100644 --- a/HTMLKit/HTMLTagToken.h +++ b/HTMLKit/HTMLTagToken.h @@ -6,27 +6,64 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import #import "HTMLToken.h" #import "HTMLOrderedDictionary.h" +/** + HTML Tag Token + */ @interface HTMLTagToken : HTMLToken +/** @brief The tag name. */ @property (nonatomic, copy) NSString *tagName; + +/** @brief The tag's attributes. */ @property (nonatomic, strong) HTMLOrderedDictionary *attributes; + +/** @brief Flag whether this tag is self-closing. */ @property (nonatomic, assign, getter = isSelfClosing) BOOL selfClosing; +/** + Initializes a new tag token. + + @param tagName The tag's name. + @returns A new instance of a tag token. + */ - (instancetype)initWithTagName:(NSString *)tagName; + +/** + Initializes a new tag token. + + @param tagName The tag's name. + @param attributes The tag's attributes. + @returns A new instance of a tag token. + */ - (instancetype)initWithTagName:(NSString *)tagName attributes:(NSMutableDictionary *)attributes; +/** + Appends the given string to this token's name. + + @param string The string to append. + */ - (void)appendStringToTagName:(NSString *)string; @end +/** + HTML Start Tag Token + */ @interface HTMLStartTagToken : HTMLTagToken @end +/** + HTML End Tag Token + */ @interface HTMLEndTagToken : HTMLTagToken @end diff --git a/HTMLKit/HTMLToken.h b/HTMLKit/HTMLToken.h index c943614..32d5b87 100644 --- a/HTMLKit/HTMLToken.h +++ b/HTMLKit/HTMLToken.h @@ -6,6 +6,10 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import @class HTMLDOCTYPEToken; @@ -16,10 +20,12 @@ @class HTMLCharacterToken; @class HTMLParseErrorToken; +/** @brief Returns YES if both arguments are `nil` or equal, NO otherwise. */ NS_INLINE BOOL bothNilOrEqual(id first, id second) { return (first == nil && second == nil) || ([first isEqual:second]); } +/** @brief The token type. */ typedef NS_ENUM(NSUInteger, HTMLTokenType) { HTMLTokenTypeCharacter, @@ -31,24 +37,76 @@ typedef NS_ENUM(NSUInteger, HTMLTokenType) HTMLTokenTypeStartTag }; +/** + Base class for HTML Tokens emitted by the Tokenizer. + + @see HTMLTokenizer + */ @interface HTMLToken : NSObject @property (nonatomic, assign) HTMLTokenType type; +/** @brief YES if this token is DOCTYPE token. NO otherwise */ - (BOOL)isDoctypeToken; + +/** @brief YES if this token is Start Tag token. NO otherwise */ - (BOOL)isStartTagToken; + +/** @brief YES if this token is End Tag token. NO otherwise */ - (BOOL)isEndTagToken; + +/** @brief YES if this token is Comment token. NO otherwise */ - (BOOL)isCommentToken; + +/** @brief YES if this token is Character token. NO otherwise */ - (BOOL)isCharacterToken; + +/** @brief YES if this token is EOF token. NO otherwise */ - (BOOL)isEOFToken; + +/** @brief YES if this token is Parse Error token. NO otherwise */ - (BOOL)isParseError; +/** + @brief Casts this token to DOCTYPE token. + @warning This is a convenience method and should be paired with the appropriate check. + */ - (HTMLDOCTYPEToken *)asDoctypeToken; + +/** + @brief Casts this token to Tag token. + @warning This is a convenience method and should be paired with the appropriate check. + */ - (HTMLTagToken *)asTagToken; + +/** + @brief Casts this token to Start Tag token. + @warning This is a convenience method and should be paired with the appropriate check. + */ - (HTMLStartTagToken *)asStartTagToken; + +/** + @brief Casts this token to End Tag token. + @warning This is a convenience method and should be paired with the appropriate check. + */ - (HTMLEndTagToken *)asEndTagToken; + +/** + @brief Casts this token to Comment token. + @warning This is a convenience method and should be paired with the appropriate check. + */ - (HTMLCommentToken *)asCommentToken; + +/** + @brief Casts this token to Character token. + @warning This is a convenience method and should be paired with the appropriate check. + */ - (HTMLCharacterToken *)asCharacterToken; + +/** + @brief Casts this token to Parse Error token. + @warning This is a convenience method and should be paired with the appropriate check. + */ - (HTMLParseErrorToken *)asParseError; @end diff --git a/HTMLKit/HTMLTokenizer.h b/HTMLKit/HTMLTokenizer.h index 5d5b2bf..e4c51ba 100644 --- a/HTMLKit/HTMLTokenizer.h +++ b/HTMLKit/HTMLTokenizer.h @@ -6,23 +6,45 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import #import "HTMLToken.h" #import "HTMLTokenizerStates.h" +@class HTMLParser; + /** * HTML Tokenizer * https://html.spec.whatwg.org/multipage/syntax.html#tokenization */ - -@class HTMLParser; - @interface HTMLTokenizer : NSEnumerator +/** @brief The underlying string with which this tokenizer was initialized. */ @property (nonatomic, readonly) NSString *string; + +/** + The current tokenizer state. + + @see HTMLTokenizerState + */ @property (nonatomic, assign) HTMLTokenizerState state; + +/** + The associated HTML Parser instance. + + @see HTMLParser + */ @property (nonatomic, weak, readonly) HTMLParser *parser; +/** + Initializes a new Tokenizer with the given string. + + @param string The HTML string + @returns A new instance of the Tokenizer. + */ - (instancetype)initWithString:(NSString *)string; @end diff --git a/HTMLKit/HTMLTokenizerEntities.h b/HTMLKit/HTMLTokenizerEntities.h index b4ff9ae..43e4fc4 100644 --- a/HTMLKit/HTMLTokenizerEntities.h +++ b/HTMLKit/HTMLTokenizerEntities.h @@ -6,11 +6,27 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import +/** + HTML character reference entitites + https://html.spec.whatwg.org/multipage/syntax.html#named-character-references + */ @interface HTMLTokenizerEntities : NSObject +/** @brief All character reference entitites. */ + (NSArray *)entities; + +/** + Returns the replacement entity at the given index. + + @param index The index of the character reference. + @returns The replacement character reference entitiy. + */ + (NSString *)replacementAtIndex:(NSUInteger)index; @end diff --git a/HTMLKit/HTMLTokenizerStates.h b/HTMLKit/HTMLTokenizerStates.h index 086df56..20461c9 100644 --- a/HTMLKit/HTMLTokenizerStates.h +++ b/HTMLKit/HTMLTokenizerStates.h @@ -6,6 +6,10 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #define TOKENIZER_STATES \ STATE_ENTRY( HTMLTokenizerStateData, = 0) \ STATE_ENTRY( HTMLTokenizerStateCharacterReferenceInData, ) \ diff --git a/HTMLKit/HTMLTokens.h b/HTMLKit/HTMLTokens.h index 4b1ac69..6450e9f 100644 --- a/HTMLKit/HTMLTokens.h +++ b/HTMLKit/HTMLTokens.h @@ -6,6 +6,10 @@ // Copyright (c) 2014 BrainCookie. All rights reserved. // +///------------------------------------------------------ +/// HTMLKit private header +///------------------------------------------------------ + #import "HTMLToken.h" #import "HTMLCharacterToken.h" #import "HTMLCommentToken.h"