diff --git a/HTMLKit/HTMLTokenizer.m b/HTMLKit/HTMLTokenizer.m
index a0247e7..c019a44 100644
--- a/HTMLKit/HTMLTokenizer.m
+++ b/HTMLKit/HTMLTokenizer.m
@@ -317,36 +317,40 @@
NSString *entityName = nil;
+#warning Improve Named Entity Search
UTF32Char inputCharacter = [_inputStreamReader consumeNextInputCharacter];
- NSArray *names = [HTMLTokenizerEntities entityNames];
+// NSArray *names = [HTMLTokenizerEntities entityNames];
+ NSArray *names = NAMES();
NSMutableString *name = [NSMutableString stringWithString:StringFromUTF32Char(inputCharacter)];
+ NSUInteger searchIndex = 0;
+
while (YES) {
- NSPredicate *predicate = [NSPredicate predicateWithFormat:@"SELF BEGINSWITH %@", name];
- names = [names filteredArrayUsingPredicate:predicate];
- if (names.count == 0) break;
+ searchIndex= [names indexOfObject:name
+ inSortedRange:NSMakeRange(searchIndex, names.count - searchIndex)
+ options:NSBinarySearchingInsertionIndex | NSBinarySearchingFirstEqual
+ usingComparator:^NSComparisonResult(id obj1, id obj2) {
+ return [obj1 compare:obj2];
+ }];
+
+ if (searchIndex >= names.count) break;
+
+ if ([[names objectAtIndex:searchIndex] isEqualToString:name]) {
+ entityName = [name copy];
+ }
+
+ if ([name hasSuffix:@";"]) break;
inputCharacter = [_inputStreamReader consumeNextInputCharacter];
if (inputCharacter == EOF) break;
[name appendString:StringFromUTF32Char(inputCharacter)];
-
- if ([names containsObject:name]) {
- entityName = [name copy];
- if ([entityName hasSuffix:@";"]) {
- break;
- }
- }
}
if (entityName == nil) {
- if ([name hasSuffix:@";"]) {
- [self emitParseError:@"Undefined named entity with semicolon found"];
- } else {
- NSString *nextAlphanumeric = [_inputStreamReader consumeAlphanumericCharacters];
- if (nextAlphanumeric != nil) {
- [name appendString:nextAlphanumeric];
- }
+ [_inputStreamReader rewindToMarkedLocation];
+
+ if ([_inputStreamReader consumeAlphanumericCharacters] != nil) {
if ([_inputStreamReader consumeString:@";" caseSensitive:NO]) {
[self emitParseError:@"Undefined named entity with semicolon found"];
}
diff --git a/HTMLKit/HTMLTokenizerEntities.h b/HTMLKit/HTMLTokenizerEntities.h
index 49cee29..5c60c70 100644
--- a/HTMLKit/HTMLTokenizerEntities.h
+++ b/HTMLKit/HTMLTokenizerEntities.h
@@ -14,3 +14,5 @@
+ (NSString *)replacementForNamedCharacterEntity:(NSString *)entity;
@end
+
+extern NSArray * NAMES();
diff --git a/HTMLKit/HTMLTokenizerEntities.m b/HTMLKit/HTMLTokenizerEntities.m
index cdddce3..f9e43dc 100644
--- a/HTMLKit/HTMLTokenizerEntities.m
+++ b/HTMLKit/HTMLTokenizerEntities.m
@@ -2243,6 +2243,16 @@
static NSDictionary *_entities;
+static NSString * x[] = {
+#define NAMED_CHARACTER_REFERENCE( name, value ) @name,
+ NAMED_CHARACTER_REFERENCES
+#undef NAMED_CHARACTER_REFERENCE
+};
+
+NSArray * NAMES() {
+ return [[NSArray alloc] initWithObjects:x count:2231];
+}
+
@implementation HTMLTokenizerEntities
+ (void)initialize