Fix logic for Named Entity replacement
This will be improved further later on
This commit is contained in:
+22
-18
@@ -317,36 +317,40 @@
|
||||
|
||||
NSString *entityName = nil;
|
||||
|
||||
#warning Improve Named Entity Search
|
||||
UTF32Char inputCharacter = [_inputStreamReader consumeNextInputCharacter];
|
||||
NSArray *names = [HTMLTokenizerEntities entityNames];
|
||||
// NSArray *names = [HTMLTokenizerEntities entityNames];
|
||||
NSArray *names = NAMES();
|
||||
NSMutableString *name = [NSMutableString stringWithString:StringFromUTF32Char(inputCharacter)];
|
||||
|
||||
NSUInteger searchIndex = 0;
|
||||
|
||||
while (YES) {
|
||||
NSPredicate *predicate = [NSPredicate predicateWithFormat:@"SELF BEGINSWITH %@", name];
|
||||
names = [names filteredArrayUsingPredicate:predicate];
|
||||
if (names.count == 0) break;
|
||||
searchIndex= [names indexOfObject:name
|
||||
inSortedRange:NSMakeRange(searchIndex, names.count - searchIndex)
|
||||
options:NSBinarySearchingInsertionIndex | NSBinarySearchingFirstEqual
|
||||
usingComparator:^NSComparisonResult(id obj1, id obj2) {
|
||||
return [obj1 compare:obj2];
|
||||
}];
|
||||
|
||||
if (searchIndex >= names.count) break;
|
||||
|
||||
if ([[names objectAtIndex:searchIndex] isEqualToString:name]) {
|
||||
entityName = [name copy];
|
||||
}
|
||||
|
||||
if ([name hasSuffix:@";"]) break;
|
||||
|
||||
inputCharacter = [_inputStreamReader consumeNextInputCharacter];
|
||||
if (inputCharacter == EOF) break;
|
||||
|
||||
[name appendString:StringFromUTF32Char(inputCharacter)];
|
||||
|
||||
if ([names containsObject:name]) {
|
||||
entityName = [name copy];
|
||||
if ([entityName hasSuffix:@";"]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (entityName == nil) {
|
||||
if ([name hasSuffix:@";"]) {
|
||||
[self emitParseError:@"Undefined named entity with semicolon found"];
|
||||
} else {
|
||||
NSString *nextAlphanumeric = [_inputStreamReader consumeAlphanumericCharacters];
|
||||
if (nextAlphanumeric != nil) {
|
||||
[name appendString:nextAlphanumeric];
|
||||
}
|
||||
[_inputStreamReader rewindToMarkedLocation];
|
||||
|
||||
if ([_inputStreamReader consumeAlphanumericCharacters] != nil) {
|
||||
if ([_inputStreamReader consumeString:@";" caseSensitive:NO]) {
|
||||
[self emitParseError:@"Undefined named entity with semicolon found"];
|
||||
}
|
||||
|
||||
@@ -14,3 +14,5 @@
|
||||
+ (NSString *)replacementForNamedCharacterEntity:(NSString *)entity;
|
||||
|
||||
@end
|
||||
|
||||
extern NSArray * NAMES();
|
||||
|
||||
@@ -2243,6 +2243,16 @@
|
||||
|
||||
static NSDictionary *_entities;
|
||||
|
||||
static NSString * x[] = {
|
||||
#define NAMED_CHARACTER_REFERENCE( name, value ) @name,
|
||||
NAMED_CHARACTER_REFERENCES
|
||||
#undef NAMED_CHARACTER_REFERENCE
|
||||
};
|
||||
|
||||
NSArray * NAMES() {
|
||||
return [[NSArray alloc] initWithObjects:x count:2231];
|
||||
}
|
||||
|
||||
@implementation HTMLTokenizerEntities
|
||||
|
||||
+ (void)initialize
|
||||
|
||||
Reference in New Issue
Block a user