279 lines
6.6 KiB
Objective-C
279 lines
6.6 KiB
Objective-C
//
|
|
// HTMLInputStreamReader.m
|
|
// HTMLKit
|
|
//
|
|
// Created by Iska on 15/09/14.
|
|
// Copyright (c) 2014 BrainCookie. All rights reserved.
|
|
//
|
|
|
|
#import "HTMLInputStreamReader.h"
|
|
#import "HTMLTokenizerCharacters.h"
|
|
#import "NSCharacterSet+HTMLKit.h"
|
|
|
|
#pragma mark - HTMLInputStreamReader
|
|
|
|
@interface HTMLInputStreamReader ()
|
|
{
|
|
NSString *_string;
|
|
NSScanner *_scanner;
|
|
CFStringInlineBuffer _buffer;
|
|
NSUInteger _location;
|
|
NSUInteger _mark;
|
|
UTF32Char _currentInputCharacter;
|
|
NSUInteger _consume;
|
|
HTMLStreamReaderErrorCallback _errorCallback;
|
|
|
|
BOOL _reconsume;
|
|
}
|
|
@end
|
|
|
|
@implementation HTMLInputStreamReader
|
|
@synthesize string = _string;
|
|
@synthesize currentLocation = _location;
|
|
@synthesize errorCallback = _errorCallback;
|
|
|
|
#pragma mark - Lifecycle
|
|
|
|
- (id)initWithString:(NSString *)string
|
|
{
|
|
self = [super init];
|
|
if (self) {
|
|
_string = [string copy];
|
|
_scanner = [[NSScanner alloc] initWithString:string];
|
|
_scanner.charactersToBeSkipped = nil;
|
|
CFStringInitInlineBuffer((CFStringRef)_string, &_buffer, CFRangeMake(0, _string.length));
|
|
}
|
|
return self;
|
|
}
|
|
|
|
#pragma mark - Errors
|
|
|
|
- (void)emitParseError:(NSString *)reason
|
|
{
|
|
if (self.errorCallback) {
|
|
self.errorCallback(reason);
|
|
}
|
|
}
|
|
|
|
#pragma mark - Stream Processing
|
|
|
|
- (UTF32Char)currentInputCharacter
|
|
{
|
|
return _currentInputCharacter;
|
|
}
|
|
|
|
- (UTF32Char)nextInputCharacter
|
|
{
|
|
if (_reconsume) {
|
|
return _currentInputCharacter;
|
|
}
|
|
|
|
_consume = 0;
|
|
UTF32Char nextInputCharacter = CFStringGetCharacterFromInlineBuffer(&_buffer, _location);
|
|
|
|
if (nextInputCharacter == 0 && _location >= _string.length) return EOF;
|
|
|
|
_consume = 1;
|
|
if (nextInputCharacter == CARRIAGE_RETURN) {
|
|
UniChar next = CFStringGetCharacterFromInlineBuffer(&_buffer, _location + 1);
|
|
if (next == LINE_FEED) {
|
|
_consume = 2;
|
|
}
|
|
return LINE_FEED;
|
|
}
|
|
if (CFStringIsSurrogateLowCharacter(nextInputCharacter)) {
|
|
NSString *reason = [NSString stringWithFormat:@"Non-Unicode character found (an isolated low surrogate: 0x%X)", (unsigned int)nextInputCharacter];
|
|
[self emitParseError:reason];
|
|
return nextInputCharacter;
|
|
}
|
|
|
|
if (CFStringIsSurrogateHighCharacter(nextInputCharacter)) {
|
|
UniChar surrogateLow = CFStringGetCharacterFromInlineBuffer(&_buffer, _location + 1);
|
|
if (CFStringIsSurrogateLowCharacter(surrogateLow) == NO) {
|
|
NSString *reason = [NSString stringWithFormat:@"Non-Unicode character found (an isolated high surrogate: 0x%X)", (unsigned int)nextInputCharacter];
|
|
[self emitParseError:reason];
|
|
return nextInputCharacter;
|
|
}
|
|
|
|
_consume = 2;
|
|
nextInputCharacter = CFStringGetLongCharacterForSurrogatePair(nextInputCharacter, surrogateLow);
|
|
}
|
|
|
|
if (isControlOrUndefinedCharacter(nextInputCharacter)) {
|
|
NSString *reason = [NSString stringWithFormat:@"A control/undefined character found: (0x%X)", (unsigned int)nextInputCharacter];
|
|
[self emitParseError:reason];
|
|
}
|
|
|
|
return nextInputCharacter;
|
|
}
|
|
|
|
- (UTF32Char)inputCharacterPointAtOffset:(NSUInteger)offset
|
|
{
|
|
return CFStringGetCharacterFromInlineBuffer(&_buffer, _location + offset);
|
|
}
|
|
|
|
- (UTF32Char)consumeNextInputCharacter
|
|
{
|
|
if (_reconsume) {
|
|
_reconsume = NO;
|
|
return _currentInputCharacter;
|
|
}
|
|
|
|
UTF32Char nextInputCharacter = [self nextInputCharacter];
|
|
_location += _consume;
|
|
_scanner.scanLocation = _location;
|
|
_currentInputCharacter = nextInputCharacter;
|
|
return nextInputCharacter;
|
|
}
|
|
|
|
- (BOOL)consumeCharacter:(UTF32Char)character
|
|
{
|
|
UTF32Char nextInputCharacter = [self nextInputCharacter];
|
|
if (nextInputCharacter == character) {
|
|
if (!_reconsume) {
|
|
_location += _consume;
|
|
_scanner.scanLocation = _location;
|
|
_currentInputCharacter = nextInputCharacter;
|
|
}
|
|
_reconsume = NO;
|
|
return YES;
|
|
}
|
|
return NO;
|
|
}
|
|
|
|
- (BOOL)consumeNumber:(unsigned long long *)result
|
|
{
|
|
unsigned long long scanned;
|
|
BOOL success = [_scanner scanUnsignedLongLong:&scanned];
|
|
if (success == NO) return NO;
|
|
|
|
*result = scanned;
|
|
_location = _scanner.scanLocation;
|
|
return success;
|
|
}
|
|
|
|
- (BOOL)consumeDecimalNumber:(NSDecimal *)result
|
|
{
|
|
NSDecimal scanned;
|
|
BOOL success = [_scanner scanDecimal:&scanned];
|
|
if (success == NO) return NO;
|
|
|
|
*result = scanned;
|
|
_location = _scanner.scanLocation;
|
|
return success;
|
|
}
|
|
|
|
- (BOOL)consumeHexNumber:(unsigned long long *)result
|
|
{
|
|
NSCharacterSet *set = [NSCharacterSet HTMLHexNumberCharacterSet];
|
|
|
|
NSString *string = nil;
|
|
BOOL success = [_scanner scanCharactersFromSet:set intoString:&string];
|
|
if (success == NO) return NO;
|
|
|
|
unsigned long long scanned = strtoull(string.UTF8String, NULL, 16);
|
|
*result = scanned;
|
|
_location = _scanner.scanLocation;
|
|
return success;
|
|
}
|
|
|
|
- (BOOL)consumeString:(NSString *)string caseSensitive:(BOOL)caseSensitive
|
|
{
|
|
_scanner.caseSensitive = caseSensitive;
|
|
BOOL success = [_scanner scanString:string intoString:nil];
|
|
_location = _scanner.scanLocation;
|
|
return success;
|
|
}
|
|
|
|
- (NSString *)consumeCharactersUpToCharactersInString:(NSString *)characters
|
|
{
|
|
NSCharacterSet *set = [NSCharacterSet characterSetWithCharactersInString:characters];
|
|
|
|
NSMutableString *consumed = [NSMutableString string];
|
|
|
|
while (YES) {
|
|
UTF32Char nextCharacter = [self consumeNextInputCharacter];
|
|
if ([set longCharacterIsMember:nextCharacter] || nextCharacter == EOF) {
|
|
break;
|
|
}
|
|
[consumed appendString:StringFromUTF32Char(nextCharacter)];
|
|
}
|
|
[self unconsumeCurrentInputCharacter];
|
|
|
|
return consumed.length > 0 ? consumed : nil;
|
|
}
|
|
|
|
- (NSString *)consumeCharactersUpToString:(NSString *)string
|
|
{
|
|
NSString *consumed;
|
|
[_scanner scanUpToString:string intoString:&consumed];
|
|
_location = _scanner.scanLocation;
|
|
consumed = [consumed stringByReplacingOccurrencesOfString:@"\r\n" withString:@"\r"];
|
|
consumed = [consumed stringByReplacingOccurrencesOfString:@"\r" withString:@"\n"];
|
|
return consumed;
|
|
}
|
|
|
|
- (NSString *)consumeCharactersInString:(NSString *)characters
|
|
{
|
|
NSCharacterSet *set = [NSCharacterSet characterSetWithCharactersInString:characters];
|
|
|
|
if (_reconsume) {
|
|
_scanner.scanLocation--;
|
|
}
|
|
|
|
NSString *string = nil;
|
|
BOOL success = [_scanner scanCharactersFromSet:set intoString:&string];
|
|
if (success == NO) {
|
|
_scanner.scanLocation++;
|
|
return nil;
|
|
}
|
|
|
|
_reconsume = NO;
|
|
_location = _scanner.scanLocation;
|
|
return string;
|
|
}
|
|
|
|
- (NSString *)consumeAlphanumericCharacters
|
|
{
|
|
NSCharacterSet *set = [NSCharacterSet alphanumericCharacterSet];
|
|
NSString *consumed = nil;
|
|
|
|
[_scanner scanCharactersFromSet:set intoString:&consumed];
|
|
_location = _scanner.scanLocation;
|
|
return consumed;
|
|
}
|
|
|
|
- (void)reconsumeCurrentInputCharacter
|
|
{
|
|
_reconsume = YES;
|
|
}
|
|
|
|
- (void)unconsumeCurrentInputCharacter
|
|
{
|
|
_location -= _consume;
|
|
_scanner.scanLocation = _location;
|
|
_consume = 0;
|
|
}
|
|
|
|
- (void)markCurrentLocation
|
|
{
|
|
_mark = _location;
|
|
}
|
|
|
|
- (void)rewindToMarkedLocation
|
|
{
|
|
_location = _mark;
|
|
_scanner.scanLocation = _mark;
|
|
_consume = 0;
|
|
}
|
|
|
|
- (void)reset
|
|
{
|
|
_mark = 0;
|
|
_location = 0;
|
|
_scanner.scanLocation = 0;
|
|
_consume = 0;
|
|
}
|
|
|
|
@end
|