14 Commits

Author SHA1 Message Date
iska b3b9ab7f93 Merge branch 'release/3.1.0' 2019-08-20 18:20:03 +02:00
iska fa0201f2fc Bump HTMLKit version to 3.1.0 2019-08-20 18:19:31 +02:00
iska 9d0977d0dd Update podspec for 3.0.0 2019-08-20 18:19:08 +02:00
iska 0a8e3a0e3b Update jazzy.yaml for 3.1.0 2019-08-20 18:18:58 +02:00
iska 3a294796d8 Add Changelog entry for HTMLKit 3.0.0 2019-08-20 18:18:49 +02:00
iska a1ca351912 Update Readme 2019-08-20 18:18:06 +02:00
iska 153d48c19c Set iterator and range detach operations to noop
Leave the housekeeping to the NSHashTable itself, which should purge
null-references automatically (not necessarily right away)

Also update tests accordingly: Use `allObjects.count` instead of simply
`count` on the NSHashTable, since `allObjects.count` returns non-null
references.

This should fix #36
2019-08-06 21:26:21 +02:00
iska 1dda2921fe Update html5lib-tests to latest commit as of 2019.08.06 2019-08-06 20:40:25 +02:00
iska 23f4e40d08 Add tests for HTML Serialization
Fixes #33
2019-08-06 20:39:40 +02:00
iska f4f7f48845 Move test class into correct folder 2019-08-03 19:47:29 +02:00
iska 98f7c8304f Implement a HTML Serializer
This implements the HTML Fragments Serialization according to:
https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments
2019-08-02 00:11:02 +02:00
iska 12276e91b3 Implement a HTML Tree Visitor
The tree visitor walks the DOM in tree order and calls the provided
node visitor upon entring and leaving a node.
2019-07-31 21:58:07 +02:00
iska ad2790b653 Update example for new github html structure/css 2019-07-25 21:46:28 +02:00
iska 228e3ea2a7 Merge tag '3.0.0' into develop
no message
2019-03-28 22:20:12 +01:00
31 changed files with 781 additions and 102 deletions
+4 -1
View File
@@ -1,5 +1,5 @@
module: HTMLKit
module_version: 3.0.0
module_version: 3.1.0
author: Iskandar Abudiab
author_url: https://twitter.com/iabudiab
github_url: https://github.com/iabudiab/HTMLKit
@@ -33,6 +33,7 @@ custom_categories:
- HTMLTemplate
- HTMLDOMTokenList
- HTMLRange
- HTMLSerializer
- name: Iteration & Filtering
children:
@@ -41,7 +42,9 @@ custom_categories:
- HTMLNodeFilterShowOptions
- HTMLNodeFilterValue
- HTMLNodeFilterBlock
- HTMLNodeVisitor
- HTMLSelectorNodeFilter
- HTMLTreeVisitor
- HTMLTreeWalker
- name: Structures
+15
View File
@@ -1,5 +1,20 @@
# Change Log
## [3.1.0](https://github.com/iabudiab/HTMLKit/releases/tag/3.1.0)
Release on 2019.08.20
### Added
- `HTMLTreeVisitor` that walks the DOM in tree order
- New HTML serialization implementation based on visitor pattern
### Fixes
- HTML serialization for deeply nested DOM trees (issue #33)
- Occasional Internal Consistency exceptions when deallocating node iterator (issue #36)
## [3.0.0](https://github.com/iabudiab/HTMLKit/releases/tag/3.0.0)
Released on 2019.03.28
@@ -112,7 +112,7 @@ do {
try scraper.load()
// Parse the selector
let repositoryContent = try CSSSelectorParser.parseSelector("[role='main'] .repository-content > .file-wrap > .files tr.js-navigation-item")
let repositoryContent = try CSSSelectorParser.parseSelector(".repository-content > .file-wrap > table.files tr.js-navigation-item")
// Query matching elements
let files = try scraper.listElements(matching: repositoryContent)
@@ -131,13 +131,10 @@ do {
// The following selector: "[role='main'] div.file table.js-file-line-container td:nth-child(2)"
// can be defined in type-safe manner:
let selector = allOf([
descendantOfElementSelector(
attributeSelector(.exactMatch, "role", "main")
),
descendantOfElementSelector(
allOf([
typeSelector("div"),
classSelector("file")
classSelector("repository-content")
])
),
descendantOfElementSelector(
+1 -1
View File
@@ -1,6 +1,6 @@
Pod::Spec.new do |s|
s.name = "HTMLKit"
s.version = "3.0.0"
s.version = "3.1.0"
s.summary = "HTMLKit, an Objective-C framework for your everyday HTML needs."
s.license = "MIT"
s.homepage = "https://github.com/iabudiab/HTMLKit"
+89 -5
View File
@@ -321,6 +321,25 @@
62857D8F1D39A47F008DC254 /* CSSCombinatorSelector.h in Headers */ = {isa = PBXBuildFile; fileRef = 62FA04E11BCC360D009ABF98 /* CSSCombinatorSelector.h */; settings = {ATTRIBUTES = (Public, ); }; };
62857D901D39A47F008DC254 /* CSSCompoundSelector.h in Headers */ = {isa = PBXBuildFile; fileRef = 620C87791BD44CBE00FB3EEE /* CSSCompoundSelector.h */; settings = {ATTRIBUTES = (Public, ); }; };
628AF6301BC99A6C00496128 /* CSSNthExpressionsParserTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 628AF62E1BC99A6C00496128 /* CSSNthExpressionsParserTests.m */; };
6295FB0722F0E770005C6DE8 /* HTMLNodeVisitor.h in Headers */ = {isa = PBXBuildFile; fileRef = 6295FB0522F0E770005C6DE8 /* HTMLNodeVisitor.h */; settings = {ATTRIBUTES = (Public, ); }; };
6295FB0822F0E770005C6DE8 /* HTMLNodeVisitor.h in Headers */ = {isa = PBXBuildFile; fileRef = 6295FB0522F0E770005C6DE8 /* HTMLNodeVisitor.h */; settings = {ATTRIBUTES = (Public, ); }; };
6295FB0922F0E770005C6DE8 /* HTMLNodeVisitor.h in Headers */ = {isa = PBXBuildFile; fileRef = 6295FB0522F0E770005C6DE8 /* HTMLNodeVisitor.h */; settings = {ATTRIBUTES = (Public, ); }; };
6295FB0A22F0E770005C6DE8 /* HTMLNodeVisitor.h in Headers */ = {isa = PBXBuildFile; fileRef = 6295FB0522F0E770005C6DE8 /* HTMLNodeVisitor.h */; settings = {ATTRIBUTES = (Public, ); }; };
6295FB0B22F0E770005C6DE8 /* HTMLNodeVisitor.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB0622F0E770005C6DE8 /* HTMLNodeVisitor.m */; };
6295FB0C22F0E770005C6DE8 /* HTMLNodeVisitor.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB0622F0E770005C6DE8 /* HTMLNodeVisitor.m */; };
6295FB0D22F0E770005C6DE8 /* HTMLNodeVisitor.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB0622F0E770005C6DE8 /* HTMLNodeVisitor.m */; };
6295FB0E22F0E770005C6DE8 /* HTMLNodeVisitor.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB0622F0E770005C6DE8 /* HTMLNodeVisitor.m */; };
6295FB1122F0ECEE005C6DE8 /* HTMLTreeVisitor.h in Headers */ = {isa = PBXBuildFile; fileRef = 6295FB0F22F0ECEE005C6DE8 /* HTMLTreeVisitor.h */; settings = {ATTRIBUTES = (Public, ); }; };
6295FB1222F0ECEE005C6DE8 /* HTMLTreeVisitor.h in Headers */ = {isa = PBXBuildFile; fileRef = 6295FB0F22F0ECEE005C6DE8 /* HTMLTreeVisitor.h */; settings = {ATTRIBUTES = (Public, ); }; };
6295FB1322F0ECEE005C6DE8 /* HTMLTreeVisitor.h in Headers */ = {isa = PBXBuildFile; fileRef = 6295FB0F22F0ECEE005C6DE8 /* HTMLTreeVisitor.h */; settings = {ATTRIBUTES = (Public, ); }; };
6295FB1422F0ECEE005C6DE8 /* HTMLTreeVisitor.h in Headers */ = {isa = PBXBuildFile; fileRef = 6295FB0F22F0ECEE005C6DE8 /* HTMLTreeVisitor.h */; settings = {ATTRIBUTES = (Public, ); }; };
6295FB1522F0ECEE005C6DE8 /* HTMLTreeVisitor.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB1022F0ECEE005C6DE8 /* HTMLTreeVisitor.m */; };
6295FB1622F0ECEE005C6DE8 /* HTMLTreeVisitor.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB1022F0ECEE005C6DE8 /* HTMLTreeVisitor.m */; };
6295FB1722F0ECEE005C6DE8 /* HTMLTreeVisitor.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB1022F0ECEE005C6DE8 /* HTMLTreeVisitor.m */; };
6295FB1822F0ECEE005C6DE8 /* HTMLTreeVisitor.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB1022F0ECEE005C6DE8 /* HTMLTreeVisitor.m */; };
6295FB1A22F0F309005C6DE8 /* HTMLTreeVisitorTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB1922F0F309005C6DE8 /* HTMLTreeVisitorTests.m */; };
6295FB1B22F0F309005C6DE8 /* HTMLTreeVisitorTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB1922F0F309005C6DE8 /* HTMLTreeVisitorTests.m */; };
6295FB1C22F0F309005C6DE8 /* HTMLTreeVisitorTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 6295FB1922F0F309005C6DE8 /* HTMLTreeVisitorTests.m */; };
62A95A4D1FB0FBFC0009FF26 /* HTMLSerializationTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 62A95A4C1FB0FBFC0009FF26 /* HTMLSerializationTests.m */; };
62A95A4E1FB0FBFC0009FF26 /* HTMLSerializationTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 62A95A4C1FB0FBFC0009FF26 /* HTMLSerializationTests.m */; };
62A95A4F1FB0FBFC0009FF26 /* HTMLSerializationTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 62A95A4C1FB0FBFC0009FF26 /* HTMLSerializationTests.m */; };
@@ -336,6 +355,14 @@
62D91C281DE218A500BEFADE /* HTMLRange.m in Sources */ = {isa = PBXBuildFile; fileRef = 62D91C221DE218A500BEFADE /* HTMLRange.m */; };
62D91C291DE218A500BEFADE /* HTMLRange.m in Sources */ = {isa = PBXBuildFile; fileRef = 62D91C221DE218A500BEFADE /* HTMLRange.m */; };
62D91C2A1DE218A500BEFADE /* HTMLRange.m in Sources */ = {isa = PBXBuildFile; fileRef = 62D91C221DE218A500BEFADE /* HTMLRange.m */; };
62E0917F22EDEB9D00C61073 /* HTMLSerializer.h in Headers */ = {isa = PBXBuildFile; fileRef = 62E0917D22EDEB9D00C61073 /* HTMLSerializer.h */; settings = {ATTRIBUTES = (Public, ); }; };
62E0918022EDEB9D00C61073 /* HTMLSerializer.h in Headers */ = {isa = PBXBuildFile; fileRef = 62E0917D22EDEB9D00C61073 /* HTMLSerializer.h */; settings = {ATTRIBUTES = (Public, ); }; };
62E0918122EDEB9D00C61073 /* HTMLSerializer.h in Headers */ = {isa = PBXBuildFile; fileRef = 62E0917D22EDEB9D00C61073 /* HTMLSerializer.h */; settings = {ATTRIBUTES = (Public, ); }; };
62E0918222EDEB9D00C61073 /* HTMLSerializer.h in Headers */ = {isa = PBXBuildFile; fileRef = 62E0917D22EDEB9D00C61073 /* HTMLSerializer.h */; settings = {ATTRIBUTES = (Public, ); }; };
62E0918322EDEB9D00C61073 /* HTMLSerializer.m in Sources */ = {isa = PBXBuildFile; fileRef = 62E0917E22EDEB9D00C61073 /* HTMLSerializer.m */; };
62E0918422EDEB9D00C61073 /* HTMLSerializer.m in Sources */ = {isa = PBXBuildFile; fileRef = 62E0917E22EDEB9D00C61073 /* HTMLSerializer.m */; };
62E0918522EDEB9D00C61073 /* HTMLSerializer.m in Sources */ = {isa = PBXBuildFile; fileRef = 62E0917E22EDEB9D00C61073 /* HTMLSerializer.m */; };
62E0918622EDEB9D00C61073 /* HTMLSerializer.m in Sources */ = {isa = PBXBuildFile; fileRef = 62E0917E22EDEB9D00C61073 /* HTMLSerializer.m */; };
62E0BA971E25456700E4D193 /* HTMLCharacterDataTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 62E0BA961E25456700E4D193 /* HTMLCharacterDataTests.m */; };
62E0BA981E25456700E4D193 /* HTMLCharacterDataTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 62E0BA961E25456700E4D193 /* HTMLCharacterDataTests.m */; };
62E0BA991E25456700E4D193 /* HTMLCharacterDataTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 62E0BA961E25456700E4D193 /* HTMLCharacterDataTests.m */; };
@@ -728,6 +755,11 @@
628AF63C1BC9D6FB00496128 /* CSSStructuralPseudoSelectors.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSStructuralPseudoSelectors.h; path = include/CSSStructuralPseudoSelectors.h; sourceTree = "<group>"; };
628AF63D1BC9D6FB00496128 /* CSSStructuralPseudoSelectors.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CSSStructuralPseudoSelectors.m; sourceTree = "<group>"; };
628B7CE61A080E1000602C87 /* HTMLNamespaces.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = HTMLNamespaces.h; path = include/HTMLNamespaces.h; sourceTree = "<group>"; };
6295FB0522F0E770005C6DE8 /* HTMLNodeVisitor.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = HTMLNodeVisitor.h; path = include/HTMLNodeVisitor.h; sourceTree = "<group>"; };
6295FB0622F0E770005C6DE8 /* HTMLNodeVisitor.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = HTMLNodeVisitor.m; sourceTree = "<group>"; };
6295FB0F22F0ECEE005C6DE8 /* HTMLTreeVisitor.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = HTMLTreeVisitor.h; path = include/HTMLTreeVisitor.h; sourceTree = "<group>"; };
6295FB1022F0ECEE005C6DE8 /* HTMLTreeVisitor.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = HTMLTreeVisitor.m; sourceTree = "<group>"; };
6295FB1922F0F309005C6DE8 /* HTMLTreeVisitorTests.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; name = HTMLTreeVisitorTests.m; path = HTMLKitTests/HTMLTreeVisitorTests.m; sourceTree = "<group>"; };
62A95A4C1FB0FBFC0009FF26 /* HTMLSerializationTests.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; name = HTMLSerializationTests.m; path = HTMLKitTests/HTMLSerializationTests.m; sourceTree = "<group>"; };
62ACB8DB1BC5C13E00E283C1 /* CSSPseudoFunctionSelector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSPseudoFunctionSelector.h; path = include/CSSPseudoFunctionSelector.h; sourceTree = "<group>"; };
62ACB8DC1BC5C13E00E283C1 /* CSSPseudoFunctionSelector.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CSSPseudoFunctionSelector.m; sourceTree = "<group>"; };
@@ -746,6 +778,8 @@
62D8345819FB1AC4009205A9 /* HTML5LibTokenizerTest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = HTML5LibTokenizerTest.m; path = HTMLKitTests/HTML5LibTokenizerTest.m; sourceTree = "<group>"; };
62D91C211DE218A500BEFADE /* HTMLRange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HTMLRange.h; path = include/HTMLRange.h; sourceTree = "<group>"; };
62D91C221DE218A500BEFADE /* HTMLRange.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = HTMLRange.m; sourceTree = "<group>"; };
62E0917D22EDEB9D00C61073 /* HTMLSerializer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = HTMLSerializer.h; path = include/HTMLSerializer.h; sourceTree = "<group>"; };
62E0917E22EDEB9D00C61073 /* HTMLSerializer.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = HTMLSerializer.m; sourceTree = "<group>"; };
62E0BA961E25456700E4D193 /* HTMLCharacterDataTests.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = HTMLCharacterDataTests.m; path = HTMLKitTests/HTMLCharacterDataTests.m; sourceTree = "<group>"; };
62E1A3B41E610E5300C41C3B /* Fixtures */ = {isa = PBXFileReference; lastKnownFileType = folder; path = Fixtures; sourceTree = "<group>"; };
62E7CAAE19CDFFB500465A83 /* HTMLTokenizerCharacters.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = HTMLTokenizerCharacters.h; path = include/HTMLTokenizerCharacters.h; sourceTree = "<group>"; };
@@ -875,12 +909,16 @@
623406E01ADB04F9004677A3 /* HTMLTemplate.m */,
62567F4E1C0CB5750025D458 /* HTMLDOMTokenList.h */,
62567F4F1C0CB5750025D458 /* HTMLDOMTokenList.m */,
6247A9411B152F4F00CCF25C /* HTMLNodeIterator.h */,
6247A9421B152F4F00CCF25C /* HTMLNodeIterator.m */,
624717BA1B22009200B38302 /* HTMLTreeWalker.h */,
624717BB1B22009200B38302 /* HTMLTreeWalker.m */,
6247A9461B152F8C00CCF25C /* HTMLNodeFilter.h */,
624717B71B21FE5400B38302 /* HTMLNodeFilter.m */,
6247A9411B152F4F00CCF25C /* HTMLNodeIterator.h */,
6247A9421B152F4F00CCF25C /* HTMLNodeIterator.m */,
6295FB0522F0E770005C6DE8 /* HTMLNodeVisitor.h */,
6295FB0622F0E770005C6DE8 /* HTMLNodeVisitor.m */,
6295FB0F22F0ECEE005C6DE8 /* HTMLTreeVisitor.h */,
6295FB1022F0ECEE005C6DE8 /* HTMLTreeVisitor.m */,
624717BA1B22009200B38302 /* HTMLTreeWalker.h */,
624717BB1B22009200B38302 /* HTMLTreeWalker.m */,
62D91C211DE218A500BEFADE /* HTMLRange.h */,
62D91C221DE218A500BEFADE /* HTMLRange.m */,
6238C9831AB8D6330006512E /* HTMLKitDOMExceptions.h */,
@@ -908,12 +946,12 @@
isa = PBXGroup;
children = (
623916C61AC7209E0066B4FE /* HTMLNodeIteratorTests.m */,
6295FB1922F0F309005C6DE8 /* HTMLTreeVisitorTests.m */,
6247171B1B2240B800C11912 /* HTMLTreeWalkerTests.m */,
624FC3791AE591D80015DDF9 /* HTMLNodesTests.m */,
62EC7AE51AEEAC6F0015D3BE /* HTMLMutationAlgorithmsTests.m */,
62EC0A841E158BD80007786B /* HTMLRangeTests.m */,
62E0BA961E25456700E4D193 /* HTMLCharacterDataTests.m */,
62A95A4C1FB0FBFC0009FF26 /* HTMLSerializationTests.m */,
);
name = DOM;
sourceTree = "<group>";
@@ -988,6 +1026,7 @@
62AE593219F97CCA0043F069 /* Tokenizing */,
628E16EC1ADAE71700B15A06 /* Parsing */,
623719441AA1472B002E03C8 /* DOM */,
62E0917C22EDEB8700C61073 /* Serializing */,
628E16ED1ADAE73700B15A06 /* Categories */,
628E16EE1ADAE75300B15A06 /* Structures */,
62079BE61AF5656600D3B402 /* CSS */,
@@ -1014,6 +1053,7 @@
6236738C1AC0CD2400FF89B3 /* Tokenizer */,
623975581AC362A5007E26F1 /* Tree Construction */,
62C82E0B20FD2FCB008497A8 /* Parser */,
629621A222F5A24400F81B5A /* Serialization */,
624B9FB71AE072CB00646C4C /* DOM */,
624B9FB81AE072D500646C4C /* Categories */,
624E1A2D1B1D1C8A00E66AAC /* Structures */,
@@ -1090,6 +1130,14 @@
name = Structures;
sourceTree = "<group>";
};
629621A222F5A24400F81B5A /* Serialization */ = {
isa = PBXGroup;
children = (
62A95A4C1FB0FBFC0009FF26 /* HTMLSerializationTests.m */,
);
name = Serialization;
sourceTree = "<group>";
};
62AE593219F97CCA0043F069 /* Tokenizing */ = {
isa = PBXGroup;
children = (
@@ -1128,6 +1176,15 @@
name = Parser;
sourceTree = "<group>";
};
62E0917C22EDEB8700C61073 /* Serializing */ = {
isa = PBXGroup;
children = (
62E0917D22EDEB9D00C61073 /* HTMLSerializer.h */,
62E0917E22EDEB9D00C61073 /* HTMLSerializer.m */,
);
name = Serializing;
sourceTree = "<group>";
};
62ECBEDF1C0B671000AF847B /* Parsing */ = {
isa = PBXGroup;
children = (
@@ -1207,6 +1264,7 @@
62ECBEEA1C0B69FD00AF847B /* HTMLCommentToken.h in Headers */,
62ECBEEB1C0B69FD00AF847B /* HTMLDOCTYPEToken.h in Headers */,
626BE1E21DF3819500C49514 /* HTMLDOMUtils.h in Headers */,
6295FB1122F0ECEE005C6DE8 /* HTMLTreeVisitor.h in Headers */,
625A67C9224AC62C00C6D57D /* NSString+Private.h in Headers */,
62ECBEEC1C0B69FD00AF847B /* HTMLParseErrorToken.h in Headers */,
62ECBEED1C0B69FD00AF847B /* HTMLTagToken.h in Headers */,
@@ -1223,11 +1281,13 @@
624C777A1DE9D05E00DD6DFA /* HTMLCharacterData.h in Headers */,
62ECBEF51C0B69FD00AF847B /* HTMLMarker.h in Headers */,
62ECBEF61C0B69FE00AF847B /* HTMLDOM.h in Headers */,
6295FB0722F0E770005C6DE8 /* HTMLNodeVisitor.h in Headers */,
62ECBEF71C0B69FE00AF847B /* HTMLNode.h in Headers */,
621CC4A51DEA721A00D1992A /* HTMLParser+Private.h in Headers */,
62ECBEF81C0B69FE00AF847B /* HTMLDocument.h in Headers */,
62ECBEF91C0B69FE00AF847B /* HTMLDocumentType.h in Headers */,
62ECBEFA1C0B69FE00AF847B /* HTMLDocumentFragment.h in Headers */,
62E0917F22EDEB9D00C61073 /* HTMLSerializer.h in Headers */,
62ECBEFC1C0B69FE00AF847B /* HTMLComment.h in Headers */,
62ECBEFD1C0B69FE00AF847B /* HTMLText.h in Headers */,
62ECBEFE1C0B69FE00AF847B /* HTMLTemplate.h in Headers */,
@@ -1281,6 +1341,7 @@
62857C841D398917008DC254 /* HTMLParser.h in Headers */,
62857CDB1D3989CE008DC254 /* CSSAttributeSelector.h in Headers */,
62857C791D398907008DC254 /* HTMLEOFToken.h in Headers */,
6295FB1422F0ECEE005C6DE8 /* HTMLTreeVisitor.h in Headers */,
625A67CC224AC62C00C6D57D /* NSString+Private.h in Headers */,
626BE1E51DF3819500C49514 /* HTMLDOMUtils.h in Headers */,
62857CC41D3989A9008DC254 /* CSSSelectors.h in Headers */,
@@ -1297,11 +1358,13 @@
62857C851D398927008DC254 /* HTMLStackOfOpenElements.h in Headers */,
624C777D1DE9D05E00DD6DFA /* HTMLCharacterData.h in Headers */,
62857CDF1D3989CE008DC254 /* CSSCombinatorSelector.h in Headers */,
6295FB0A22F0E770005C6DE8 /* HTMLNodeVisitor.h in Headers */,
62857CA91D398973008DC254 /* HTMLNodeIterator.h in Headers */,
62857CAE1D398973008DC254 /* HTMLQuirksMode.h in Headers */,
621CC4A81DEA721A00D1992A /* HTMLParser+Private.h in Headers */,
62857CAF1D398977008DC254 /* HTMLNodeTraversal.h in Headers */,
62857C771D398907008DC254 /* HTMLParseErrorToken.h in Headers */,
62E0918222EDEB9D00C61073 /* HTMLSerializer.h in Headers */,
62857CDC1D3989CE008DC254 /* CSSPseudoClassSelector.h in Headers */,
62857CC21D3989A1008DC254 /* HTMLOrderedDictionary.h in Headers */,
62857C761D398907008DC254 /* HTMLDOCTYPEToken.h in Headers */,
@@ -1355,6 +1418,7 @@
62857D4E1D39A40A008DC254 /* HTMLTokenizerCharacters.h in Headers */,
62857D4D1D39A40A008DC254 /* HTMLTokenizerStates.h in Headers */,
62857D501D39A411008DC254 /* HTMLTokens.h in Headers */,
6295FB1322F0ECEE005C6DE8 /* HTMLTreeVisitor.h in Headers */,
625A67CB224AC62C00C6D57D /* NSString+Private.h in Headers */,
626BE1E41DF3819500C49514 /* HTMLDOMUtils.h in Headers */,
62857D7B1D39A452008DC254 /* HTMLNamespaces.h in Headers */,
@@ -1371,11 +1435,13 @@
62857D781D39A44E008DC254 /* HTMLTreeWalker.h in Headers */,
624C777C1DE9D05E00DD6DFA /* HTMLCharacterData.h in Headers */,
62857D551D39A411008DC254 /* HTMLParseErrorToken.h in Headers */,
6295FB0922F0E770005C6DE8 /* HTMLNodeVisitor.h in Headers */,
62857D541D39A411008DC254 /* HTMLDOCTYPEToken.h in Headers */,
62857D871D39A476008DC254 /* CSSInputStream.h in Headers */,
621CC4A71DEA721A00D1992A /* HTMLParser+Private.h in Headers */,
62857D8F1D39A47F008DC254 /* CSSCombinatorSelector.h in Headers */,
62857D5C1D39A41D008DC254 /* HTMLElementAdjustment.h in Headers */,
62E0918122EDEB9D00C61073 /* HTMLSerializer.h in Headers */,
62857D891D39A47F008DC254 /* CSSSelectorBlock.h in Headers */,
62857D6D1D39A44E008DC254 /* HTMLDOM.h in Headers */,
62857D751D39A44E008DC254 /* HTMLTemplate.h in Headers */,
@@ -1429,6 +1495,7 @@
62ECBFAF1C0B6D3B00AF847B /* HTMLNodeIterator.h in Headers */,
62ECBFB01C0B6D3C00AF847B /* HTMLTreeWalker.h in Headers */,
62ECBFB21C0B6D3C00AF847B /* HTMLNodeFilter.h in Headers */,
6295FB1222F0ECEE005C6DE8 /* HTMLTreeVisitor.h in Headers */,
625A67CA224AC62C00C6D57D /* NSString+Private.h in Headers */,
62567F511C0CB5750025D458 /* HTMLDOMTokenList.h in Headers */,
62ECBFB31C0B6D3C00AF847B /* HTMLKitDOMExceptions.h in Headers */,
@@ -1445,11 +1512,13 @@
62ECBFBD1C0B6D3F00AF847B /* CSSSelectorParser.h in Headers */,
62ECBFBE1C0B6D4000AF847B /* CSSNthExpressionParser.h in Headers */,
62ECBFBF1C0B6D4000AF847B /* CSSSelector.h in Headers */,
6295FB0822F0E770005C6DE8 /* HTMLNodeVisitor.h in Headers */,
621CC4A61DEA721A00D1992A /* HTMLParser+Private.h in Headers */,
62ECBFC01C0B6D4000AF847B /* CSSSelectorBlock.h in Headers */,
62ECBFC11C0B6D4100AF847B /* CSSTypeSelector.h in Headers */,
62ECBFC21C0B6D4100AF847B /* CSSAttributeSelector.h in Headers */,
62ECBFC31C0B6D4100AF847B /* CSSPseudoClassSelector.h in Headers */,
62E0918022EDEB9D00C61073 /* HTMLSerializer.h in Headers */,
62ECBFC41C0B6D4200AF847B /* CSSNthExpressionSelector.h in Headers */,
62ECBFC51C0B6D4200AF847B /* CSSPseudoFunctionSelector.h in Headers */,
62ECBFC61C0B6D4300AF847B /* CSSCombinatorSelector.h in Headers */,
@@ -1747,7 +1816,9 @@
62ECBF251C0B6B7900AF847B /* HTMLEOFToken.m in Sources */,
62ECBF261C0B6B7900AF847B /* HTMLParser.m in Sources */,
62ECBF271C0B6B7900AF847B /* HTMLStackOfOpenElements.m in Sources */,
62E0918322EDEB9D00C61073 /* HTMLSerializer.m in Sources */,
62ECBF281C0B6B7900AF847B /* HTMLListOfActiveFormattingElements.m in Sources */,
6295FB0B22F0E770005C6DE8 /* HTMLNodeVisitor.m in Sources */,
62ECBF291C0B6B7900AF847B /* HTMLMarker.m in Sources */,
62ECBF2A1C0B6B7900AF847B /* HTMLNode.m in Sources */,
62ECBF2B1C0B6B7900AF847B /* HTMLDocument.m in Sources */,
@@ -1755,6 +1826,7 @@
625A67D2224ADC7700C6D57D /* HTMLQuircksMode.m in Sources */,
62ECBF2D1C0B6B7900AF847B /* HTMLDocumentFragment.m in Sources */,
62ECBF2E1C0B6B7900AF847B /* HTMLElement.m in Sources */,
6295FB1522F0ECEE005C6DE8 /* HTMLTreeVisitor.m in Sources */,
62ECBF2F1C0B6B7900AF847B /* HTMLComment.m in Sources */,
62ECBF301C0B6B7900AF847B /* HTMLText.m in Sources */,
62ECBF311C0B6B7900AF847B /* HTMLTemplate.m in Sources */,
@@ -1817,6 +1889,7 @@
624FC37B1AE591D80015DDF9 /* HTMLNodesTests.m in Sources */,
621FBE5B1BDAD68700BC9555 /* CSSSelectorParserTests.m in Sources */,
621FBE5E1BDAD90200BC9555 /* CSSCombinatorSelectorTests.m in Sources */,
6295FB1A22F0F309005C6DE8 /* HTMLTreeVisitorTests.m in Sources */,
625EE45B1CBB171300F2CC8E /* HTMLKitTestUtil.m in Sources */,
628AF6301BC99A6C00496128 /* CSSNthExpressionsParserTests.m in Sources */,
);
@@ -1839,7 +1912,9 @@
62857C7B1D398912008DC254 /* HTMLTokenizer.m in Sources */,
62857CB21D398992008DC254 /* HTMLDocument.m in Sources */,
62857CBD1D398992008DC254 /* HTMLKitDOMExceptions.m in Sources */,
62E0918622EDEB9D00C61073 /* HTMLSerializer.m in Sources */,
62857CD11D3989B8008DC254 /* CSSPseudoFunctionSelector.m in Sources */,
6295FB0E22F0E770005C6DE8 /* HTMLNodeVisitor.m in Sources */,
62857C7F1D398912008DC254 /* HTMLCommentToken.m in Sources */,
62857C811D398912008DC254 /* HTMLParseErrorToken.m in Sources */,
62857CD21D3989B8008DC254 /* CSSCombinatorSelector.m in Sources */,
@@ -1847,6 +1922,7 @@
625A67D5224ADC7700C6D57D /* HTMLQuircksMode.m in Sources */,
62857CCF1D3989B8008DC254 /* CSSPseudoClassSelector.m in Sources */,
62857C7A1D398912008DC254 /* HTMLInputStreamReader.m in Sources */,
6295FB1822F0ECEE005C6DE8 /* HTMLTreeVisitor.m in Sources */,
62857CC11D39899C008DC254 /* NSCharacterSet+HTMLKit.m in Sources */,
62857CCD1D3989B8008DC254 /* CSSTypeSelector.m in Sources */,
62857C8E1D39892D008DC254 /* HTMLMarker.m in Sources */,
@@ -1896,7 +1972,9 @@
62857D061D39A324008DC254 /* HTMLCommentToken.m in Sources */,
62857D111D39A32A008DC254 /* HTMLDocumentType.m in Sources */,
62857D1C1D39A32A008DC254 /* HTMLKitDOMExceptions.m in Sources */,
62E0918522EDEB9D00C61073 /* HTMLSerializer.m in Sources */,
62857D2B1D39A334008DC254 /* CSSPseudoFunctionSelector.m in Sources */,
6295FB0D22F0E770005C6DE8 /* HTMLNodeVisitor.m in Sources */,
62857D081D39A324008DC254 /* HTMLParseErrorToken.m in Sources */,
62857D0F1D39A32A008DC254 /* HTMLNode.m in Sources */,
62857D2C1D39A334008DC254 /* CSSCombinatorSelector.m in Sources */,
@@ -1904,6 +1982,7 @@
625A67D4224ADC7700C6D57D /* HTMLQuircksMode.m in Sources */,
62857D291D39A334008DC254 /* CSSPseudoClassSelector.m in Sources */,
62857D011D39A324008DC254 /* HTMLInputStreamReader.m in Sources */,
6295FB1722F0ECEE005C6DE8 /* HTMLTreeVisitor.m in Sources */,
62857D1E1D39A334008DC254 /* NSCharacterSet+HTMLKit.m in Sources */,
62857D271D39A334008DC254 /* CSSTypeSelector.m in Sources */,
62857D0E1D39A328008DC254 /* HTMLMarker.m in Sources */,
@@ -1966,6 +2045,7 @@
62857D341D39A33F008DC254 /* HTMLNodeIteratorTests.m in Sources */,
62857D2E1D39A339008DC254 /* HTML5LibTokenizerTest.m in Sources */,
62857D351D39A33F008DC254 /* HTMLTreeWalkerTests.m in Sources */,
6295FB1C22F0F309005C6DE8 /* HTMLTreeVisitorTests.m in Sources */,
62857D441D39A34D008DC254 /* HTMLKitTestUtil.m in Sources */,
62857D2F1D39A339008DC254 /* HTMLKitTokenizerTests.m in Sources */,
);
@@ -1988,7 +2068,9 @@
62ECBF6D1C0B6D2A00AF847B /* HTMLEOFToken.m in Sources */,
62ECBF6E1C0B6D2A00AF847B /* HTMLParser.m in Sources */,
62ECBF6F1C0B6D2A00AF847B /* HTMLStackOfOpenElements.m in Sources */,
62E0918422EDEB9D00C61073 /* HTMLSerializer.m in Sources */,
62ECBF701C0B6D2A00AF847B /* HTMLListOfActiveFormattingElements.m in Sources */,
6295FB0C22F0E770005C6DE8 /* HTMLNodeVisitor.m in Sources */,
62ECBF711C0B6D2A00AF847B /* HTMLMarker.m in Sources */,
62ECBF721C0B6D2A00AF847B /* HTMLNode.m in Sources */,
62ECBF731C0B6D2A00AF847B /* HTMLDocument.m in Sources */,
@@ -1996,6 +2078,7 @@
625A67D3224ADC7700C6D57D /* HTMLQuircksMode.m in Sources */,
62ECBF751C0B6D2A00AF847B /* HTMLDocumentFragment.m in Sources */,
62ECBF761C0B6D2A00AF847B /* HTMLElement.m in Sources */,
6295FB1622F0ECEE005C6DE8 /* HTMLTreeVisitor.m in Sources */,
62ECBF771C0B6D2A00AF847B /* HTMLComment.m in Sources */,
62ECBF781C0B6D2A00AF847B /* HTMLText.m in Sources */,
62ECBF791C0B6D2A00AF847B /* HTMLTemplate.m in Sources */,
@@ -2058,6 +2141,7 @@
62ECBFD91C0B6E2E00AF847B /* CSSTypeSelectorTests.m in Sources */,
62ECBFDA1C0B6E2E00AF847B /* CSSAttributeSelectorTests.m in Sources */,
62ECBFDB1C0B6E2E00AF847B /* CSSNThExpressionSelectorTests.m in Sources */,
6295FB1B22F0F309005C6DE8 /* HTMLTreeVisitorTests.m in Sources */,
625EE45C1CBB171300F2CC8E /* HTMLKitTestUtil.m in Sources */,
62ECBFDC1C0B6E2E00AF847B /* CSSCombinatorSelectorTests.m in Sources */,
);
+2 -2
View File
@@ -77,7 +77,7 @@ To add `HTMLKit` as a dependency into your project using CocoaPods just add the
```ruby
target 'MyTarget' do
pod 'HTMLKit', '~> 2.1'
pod 'HTMLKit', '~> 3.1'
end
```
@@ -94,7 +94,7 @@ $ pod install
Add `HTMLKit` to your `Package.swift` dependecies:
```swift
.Package(url: "https://github.com/iabudiab/HTMLKit", majorVersion: 2)
.Package(url: "https://github.com/iabudiab/HTMLKit", majorVersion: 3)
```
Then run:
-7
View File
@@ -21,13 +21,6 @@
return [super initWithName:@"#comment" type:HTMLNodeComment data:data];
}
#pragma mark - Serialization
- (NSString *)outerHTML
{
return [NSString stringWithFormat:@"<!--%@-->", self.data];
}
#pragma mark - Description
- (NSString *)description
+2 -2
View File
@@ -141,7 +141,7 @@
- (void)detachNodeIterator:(HTMLNodeIterator *)iterator
{
[_nodeIterators removeObject:iterator];
// NOOP
}
#pragma mark - Ranges
@@ -153,7 +153,7 @@
- (void)detachRange:(HTMLRange *)range
{
[_ranges removeObject:range];
// NOOP
}
- (void)didRemoveCharacterDataInNode:(HTMLCharacterData *)node atOffset:(NSUInteger)offset withLength:(NSUInteger)length
-7
View File
@@ -144,13 +144,6 @@ NS_INLINE BOOL nilOrEqual(id first, id second) {
return copy;
}
#pragma mark - Serialization
- (NSString *)outerHTML
{
return [NSString stringWithFormat:@"<!DOCTYPE %@>", self.name];
}
#pragma mark - Description
- (NSString *)description
-36
View File
@@ -149,42 +149,6 @@
return copy;
}
#pragma mark - Serialization
- (NSString *)outerHTML
{
NSMutableString *result = [NSMutableString string];
[result appendFormat:@"<%@", self.tagName];
[self.attributes enumerateKeysAndObjectsUsingBlock:^(NSString *key, NSString *value, BOOL *stop) {
NSMutableString *escaped = [value mutableCopy];
[escaped replaceOccurrencesOfString:@"&" withString:@"&amp;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@"0x00A0" withString:@"&nbsp;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@"\"" withString:@"&quot;" options:0 range:NSMakeRange(0, escaped.length)];
[result appendFormat:@" %@=\"%@\"", key, escaped];
}];
[result appendString:@">"];
if ([self.tagName isEqualToAny:@"area", @"base", @"basefont", @"bgsound", @"br", @"col", @"embed",
@"frame", @"hr", @"img", @"input", @"keygen", @"link", @"menuitem", @"meta", @"param", @"source",
@"track", @"wbr", nil]) {
return result;
}
if ([self.tagName isEqualToAny:@"pre", @"textarea", @"listing", nil] && self.firstChild.nodeType == HTMLNodeText) {
HTMLText *textNode = (HTMLText *)self.firstChild;
if ([textNode.data hasPrefix:@"\n"]) {
[result appendString:@"\n"];
}
}
[result appendString:self.innerHTML];
[result appendFormat:@"</%@>", self.tagName];
return result;
}
#pragma mark - Description
- (NSString *)description
+1 -1
View File
@@ -17,7 +17,7 @@
<key>CFBundlePackageType</key>
<string>FMWK</string>
<key>CFBundleShortVersionString</key>
<string>3.0.0</string>
<string>3.1.0</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
+17 -3
View File
@@ -18,6 +18,7 @@
#import "CSSSelector.h"
#import "HTMLDocument+Private.h"
#import "HTMLDOMUtils.h"
#import "HTMLSerializer.h"
NSString * const ValidationNodePreInsertion = @"-ensurePreInsertionValidityOfNode:beforeChildNode:";
NSString * const ValidationNodeReplacement = @"-ensureReplacementValidityOfChildNode:withNode:";
@@ -156,6 +157,20 @@ NSString * const RemoveChildNode = @"-removeChildNode:";
return (HTMLElement *)self;
}
- (HTMLText *)asText
{
return (HTMLText *)self;
}
- (HTMLComment *)asComment
{
return (HTMLComment *)self;
}
- (HTMLDocumentType *)asDocumentType
{
return (HTMLDocumentType *)self;
}
#pragma mark - Child Nodes
- (BOOL)hasChildNodes
@@ -718,13 +733,12 @@ NS_INLINE void CheckInvalidCombination(HTMLNode *parent, HTMLNode *node, NSStrin
- (NSString *)outerHTML
{
[self doesNotRecognizeSelector:_cmd];
return nil;
return [HTMLSerializer serializeNode:self scope:HTMLSerializationScopeIncludeRoot];
}
- (NSString *)innerHTML
{
return [[self.childNodes.array valueForKey:@"outerHTML"] componentsJoinedByString:@""];
return [HTMLSerializer serializeNode:self scope:HTMLSerializationScopeChildrenOnly];
}
- (void)setInnerHTML:(NSString *)outerHTML
+54
View File
@@ -0,0 +1,54 @@
//
// HTMLNodeVisitor.m
// HTMLKit
//
// Created by Iska on 30.07.19.
// Copyright © 2019 BrainCookie. All rights reserved.
//
#import "HTMLNodeVisitor.h"
#pragma mark - Block Visitor
@interface HTMLNodeVisitorBlock ()
{
void (^ _enter)(HTMLNode *);
void (^ _leave)(HTMLNode *);
}
@end
@implementation HTMLNodeVisitorBlock
+ (instancetype)visitorWithEnterBlock:(void (^)(HTMLNode * _Nonnull))enterBlock
leaveBlock:(void (^)(HTMLNode * _Nonnull))leaveBlock
{
return [[HTMLNodeVisitorBlock alloc] initWithEnterBlock:enterBlock leaveBlock:leaveBlock];
}
- (instancetype)initWithEnterBlock:(void (^)(HTMLNode * _Nonnull))enterBlock
leaveBlock:(void (^)(HTMLNode * _Nonnull))leaveBlock
{
self = [super init];
if (self) {
_enter = [enterBlock copy];
_leave = [leaveBlock copy];
}
return self;
}
- (void)enter:(HTMLNode *)node
{
if (_enter) {
_enter(node);
}
}
- (void)leave:(HTMLNode *)node
{
if (_leave) {
_leave(node);
}
}
@end
+155
View File
@@ -0,0 +1,155 @@
//
// HTMLSerializer.m
// HTMLKit
//
// Created by Iska on 28.07.19.
// Copyright © 2019 BrainCookie. All rights reserved.
//
#import "HTMLSerializer.h"
#import "HTMLDOM.h"
#import "HTMLNode+Private.h"
#import "HTMLTreeVisitor.h"
#import "NSString+Private.h"
#pragma mark - Serializer
@interface HTMLSerializer ()
{
HTMLNode *_root;
HTMLTreeVisitor *_treeVisitor;
NSUInteger _ignore;
NSMutableString *_result;
}
- (instancetype)initWithNode:(HTMLNode *)node;
- (NSString *)serializeWithScope:(HTMLSerializationScope)scope;
@end
@implementation HTMLSerializer
+ (NSString *)serializeNode:(HTMLNode *)node scope:(HTMLSerializationScope)scope
{
HTMLSerializer *serializer = [[HTMLSerializer alloc] initWithNode:node];
return [serializer serializeWithScope:scope];
}
#pragma mark - Lifecycle
- (instancetype)initWithNode:(HTMLNode *)node
{
self = [super init];
if (self) {
_root = node;
_treeVisitor = [[HTMLTreeVisitor alloc] initWithNode:node];
_result = [NSMutableString new];
_ignore = 0;
}
return self;
}
#pragma mark - Serialization
- (NSString *)serializeWithScope:(HTMLSerializationScope)scope
{
[_result setString:@""];
HTMLNodeVisitorBlock *nodeVisitor = [HTMLNodeVisitorBlock visitorWithEnterBlock:^(HTMLNode * node) {
if (scope == HTMLSerializationScopeChildrenOnly && node == _root) {
return;
}
if (_ignore > 0) {
return;
}
switch (node.nodeType) {
case HTMLNodeElement:
[self openElement:node.asElement];
break;
case HTMLNodeComment:
[self serializeComment:node.asComment];
break;
case HTMLNodeText:
[self serializeText:node.asText];
break;
case HTMLNodeDocumentFragment:
[self serializeDocumentType:node.asDocumentType];
break;
default:
break;
}
} leaveBlock:^(HTMLNode * _Nonnull node) {
if (scope == HTMLSerializationScopeChildrenOnly && node == _root) {
return;
}
switch (node.nodeType) {
case HTMLNodeElement:
if ([node.asElement.tagName isEqualToAny:@"area", @"base", @"basefont", @"bgsound", @"br", @"col", @"embed",
@"frame", @"hr", @"img", @"input", @"keygen", @"link", @"menuitem", @"meta", @"param", @"source",
@"track", @"wbr", nil]) {
_ignore--;
break;
}
[self closeElement:node.asElement];
default:
break;
}
}];
[_treeVisitor walkWithNodeVisitor:nodeVisitor];
return [_result copy];
}
- (void)openElement:(HTMLElement *)element
{
[_result appendFormat:@"<%@", element.tagName];
[element.attributes enumerateKeysAndObjectsUsingBlock:^(NSString *key, NSString *value, BOOL *stop) {
NSMutableString *escaped = [value mutableCopy];
[escaped replaceOccurrencesOfString:@"&" withString:@"&amp;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@"0x00A0" withString:@"&nbsp;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@"\"" withString:@"&quot;" options:0 range:NSMakeRange(0, escaped.length)];
[_result appendFormat:@" %@=\"%@\"", key, escaped];
}];
[_result appendString:@">"];
if ([element.tagName isEqualToAny:@"area", @"base", @"basefont", @"bgsound", @"br", @"col", @"embed",
@"frame", @"hr", @"img", @"input", @"keygen", @"link", @"menuitem", @"meta", @"param", @"source",
@"track", @"wbr", nil]) {
_ignore++;
}
}
- (void)closeElement:(HTMLElement *)element
{
[_result appendFormat:@"</%@>", element.tagName];
}
- (void)serializeText:(HTMLText *)text
{
if ([text.parentElement.tagName isEqualToAny:@"style", @"script", @"xmp", @"iframe", @"noembed", @"noframes",
@"plaintext", @"noscript", nil]) {
[_result appendString:text.data];
} else {
NSMutableString *escaped = [text.data mutableCopy];
[escaped replaceOccurrencesOfString:@"&" withString:@"&amp;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@"\00A0" withString:@"&nbsp;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@"<" withString:@"&lt;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@">" withString:@"&gt;" options:0 range:NSMakeRange(0, escaped.length)];
[_result appendString:escaped];
}
}
- (void)serializeComment:(HTMLComment *)comment
{
[_result appendFormat:@"<!--%@-->", comment.data];
}
- (void)serializeDocumentType:(HTMLDocumentType *)doctype
{
[_result appendFormat:@"<!DOCTYPE %@>", doctype.name];
}
@end
-17
View File
@@ -66,23 +66,6 @@ NS_INLINE void CheckValidOffset(HTMLNode *node, NSUInteger offset, NSString *cmd
return newNode;
}
#pragma mark - Serialization
- (NSString *)outerHTML
{
if ([self.parentElement.tagName isEqualToAny:@"style", @"script", @"xmp", @"iframe", @"noembed", @"noframes",
@"plaintext", @"noscript", nil]) {
return self.data;
} else {
NSMutableString *escaped = [self.data mutableCopy];
[escaped replaceOccurrencesOfString:@"&" withString:@"&amp;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@"\00A0" withString:@"&nbsp;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@"<" withString:@"&lt;" options:0 range:NSMakeRange(0, escaped.length)];
[escaped replaceOccurrencesOfString:@">" withString:@"&gt;" options:0 range:NSMakeRange(0, escaped.length)];
return escaped;
}
}
#pragma mark - Description
- (NSString *)description
+63
View File
@@ -0,0 +1,63 @@
//
// HTMLTreeVisitor.m
// HTMLKit
//
// Created by Iska on 30.07.19.
// Copyright © 2019 BrainCookie. All rights reserved.
//
#import "HTMLTreeVisitor.h"
#import "HTMLNode.h"
#import "HTMLTreeWalker.h"
@interface HTMLTreeVisitor()
{
HTMLNode *_root;
HTMLTreeWalker *_treeWalker;
}
@end
@implementation HTMLTreeVisitor
- (instancetype)initWithNode:(HTMLNode *)node
{
self = [super init];
if (self) {
_root = node;
_treeWalker = [[HTMLTreeWalker alloc] initWithNode:node];
}
return self;
}
- (void)walkWithNodeVisitor:(id<HTMLNodeVisitor>)visitor
{
HTMLNode *currentNode = _treeWalker.currentNode;
while (currentNode) {
[visitor enter:currentNode];
if (currentNode.hasChildNodes) {
currentNode = [_treeWalker firstChild];
continue;
}
HTMLNode *next = [_treeWalker nextSibling];
if (next) {
[visitor leave:currentNode];
currentNode = next;
continue;
}
while (!next && _treeWalker.currentNode != _root) {
[visitor leave:_treeWalker.currentNode];
currentNode = [_treeWalker parentNode];
next = [_treeWalker nextSibling];
}
[visitor leave:currentNode];
currentNode = _treeWalker.currentNode;
if (currentNode == _root) {
break;
}
}
}
@end
+1
View File
@@ -18,6 +18,7 @@
#import "HTMLRange.h"
#import "HTMLDOMTokenList.h"
#import "HTMLNodeIterator.h"
#import "HTMLTreeVisitor.h"
#import "HTMLTreeWalker.h"
#import "HTMLNodeFilter.h"
+12
View File
@@ -10,6 +10,7 @@
/// HTMLKit private header
///------------------------------------------------------
#import "HTMLNode+Private.h"
#import "HTMLElement.h"
#import "HTMLNamespaces.h"
#import "NSString+Private.h"
@@ -52,3 +53,14 @@ NS_INLINE BOOL IsSpecialElement(HTMLElement *element)
}
return NO;
}
NS_INLINE BOOL DoesNodeSerializeAsVoid(HTMLNode *node)
{
if (node.nodeType != HTMLNodeElement) {
return false;
}
return [node.asElement.tagName isEqualToAny:@"area", @"base", @"basefont", @"bgsound", @"br", @"col", @"embed",
@"frame", @"hr", @"img", @"input", @"keygen", @"link", @"meta", @"param", @"source", @"track", @"wbr", nil];
}
+1
View File
@@ -16,6 +16,7 @@ extern const unsigned char HTMLKitVersionString[];
#import "HTMLDOM.h"
#import "HTMLParser.h"
#import "HTMLSerializer.h"
#import "HTMLKitErrorDomain.h"
#import "HTMLOrderedDictionary.h"
+19
View File
@@ -12,6 +12,10 @@
#import "HTMLNode.h"
@class HTMLText;
@class HTMLComment;
@class HTMLDocumentType;
/**
Private HTML Node methods which are not intended for public API.
*/
@@ -44,6 +48,21 @@
*/
- (HTMLElement *)asElement;
/**
Casts this node to a HTML Text. This cast should only be performed after the appropriate check.
*/
- (HTMLText *)asText;
/**
Casts this node to a HTML Comment. This cast should only be performed after the appropriate check.
*/
- (HTMLComment *)asComment;
/**
Casts this node to a HTML Document Type. This cast should only be performed after the appropriate check.
*/
- (HTMLDocumentType *)asDocumentType;
/**
Returns the same string representation of the DOM tree rooted at this node that is used by html5lib-tests.
+1
View File
@@ -8,6 +8,7 @@
#import <Foundation/Foundation.h>
#import "HTMLNodeIterator.h"
#import "HTMLTreeVisitor.h"
NS_ASSUME_NONNULL_BEGIN
+59
View File
@@ -0,0 +1,59 @@
//
// HTMLNodeVisitor.h
// HTMLKit
//
// Created by Iska on 30.07.19.
// Copyright © 2019 BrainCookie. All rights reserved.
//
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@class HTMLNode;
#pragma mark - Node Visitor
/**
A HTML Node Visitor which can be used with a tree visitor.
@see HTMLTreeVisitor
*/
@protocol HTMLNodeVisitor <NSObject>
@required
/**
Called when visiting the node for the first time
@param node The node that is beaing visited for the first time.
*/
- (void)enter:(HTMLNode *)node;
/**
Called when leaving a previously entered node, i.e. when all its child nodes are visited.
@param node The node that beaing leaved.
*/
- (void)leave:(HTMLNode *)node;
@end
#pragma mark - Block Node Visitor
/**
A concrete block-based HTML Node Visitor implementation.
*/
@interface HTMLNodeVisitorBlock : NSObject <HTMLNodeVisitor>
/**
Initializes and returns a new instance of this visitor.
@param enterBlock The block to apply on entering a visited node.
@param leaveBlock The block to apply on leaving a visited node.
*/
+ (instancetype)visitorWithEnterBlock:(void (^)(HTMLNode *node))enterBlock
leaveBlock:(void (^)(HTMLNode *node))leaveBlock;
@end
NS_ASSUME_NONNULL_END
+45
View File
@@ -0,0 +1,45 @@
//
// HTMLSerializer.h
// HTMLKit
//
// Created by Iska on 28.07.19.
// Copyright © 2019 BrainCookie. All rights reserved.
//
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@class HTMLNode;
/**
The scope for HTML Serialization.
*/
typedef NS_ENUM(unsigned short, HTMLSerializationScope)
{
HTMLSerializationScopeIncludeRoot = 1,
HTMLSerializationScopeChildrenOnly = 2
};
/**
A HTML DOM Serializer. Used to serialize HTML Tree rooted at a given node with the desired scope:
- IncludeRoot scope includes the given node into the serialized result, e.g. HTML Node's `outerHTML`
- ChildrenOnly scope serializes only the child nodes of the given node, e.g. HTML Node's `innerHTML`
https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments
*/
@interface HTMLSerializer : NSObject
/**
Serializes the given node with the given scope.
@param node The root node of the tree to serialize
@param scope The scope for serialization
*/
+ (NSString *)serializeNode:(HTMLNode *)node scope:(HTMLSerializationScope)scope;
@end
NS_ASSUME_NONNULL_END
+44
View File
@@ -0,0 +1,44 @@
//
// HTMLTreeVisitor.h
// HTMLKit
//
// Created by Iska on 30.07.19.
// Copyright © 2019 BrainCookie. All rights reserved.
//
#import <Foundation/Foundation.h>
#import "HTMLNodeVisitor.h"
NS_ASSUME_NONNULL_BEGIN
@class HTMLNode;
/**
A HTML Tree Visitor that walks the DOM in tree order. Nodes are visited exacly once
The provided node visitor is called for each node twice, once when entering the node,
and once again when leaving the node.
@see HTMLNodeVisitor
*/
@interface HTMLTreeVisitor : NSObject
/**
Initializes a new tree visitor with.
@param node The root node.
@return A new instance of a tree visitor.
*/
- (instancetype)initWithNode:(HTMLNode *)node;
/**
Walks the DOM tree rooted at the provided node with the given node visitor.
@param visitor A HTMLNodeVisitor implementation.
*/
- (void)walkWithNodeVisitor:(id<HTMLNodeVisitor>)visitor;
@end
NS_ASSUME_NONNULL_END
File diff suppressed because one or more lines are too long
+7 -5
View File
@@ -580,19 +580,20 @@ static HTMLNode * (^ LastDescendant)(HTMLNode *) = ^ HTMLNode * (HTMLNode *node)
XCTAssertNotNil(body);
// iterator should be deallocated and detached at this point
XCTAssertEqual(0, nodeIterators.count);
XCTAssertEqual(0, nodeIterators.allObjects.count);
// iterator should be autoreleased, deallocated and detached after autoreleasepool
@autoreleasepool {
HTMLNodeIterator *iterator = [[HTMLNodeIterator alloc] initWithNode:body];
[iterator nextNode];
XCTAssertEqual(1, nodeIterators.count);
XCTAssertEqual(1, nodeIterators.allObjects.count);
}
XCTAssertEqual(0, nodeIterators.count);
XCTAssertEqual(0, nodeIterators.allObjects.count);
}
- (void)testBugFix_Issue_22 {
- (void)testBugFix_Issue_22
{
// The issue is applicable only for devices. On simulator the test is passed.
HTMLDocument *document = [HTMLDocument documentWithString:@"<div id=\"id\"></div>"];
@@ -608,7 +609,8 @@ static HTMLNode * (^ LastDescendant)(HTMLNode *) = ^ HTMLNode * (HTMLNode *node)
XCTAssertTrue([element.elementId isEqualToString:divId]);
}
- (void)testBugFix_Issue_28 {
- (void)testBugFix_Issue_28
{
HTMLDocument *document = self.document;
HTMLNodeIterator *iterator = document.body.nodeIterator;
+2 -2
View File
@@ -2112,10 +2112,10 @@
@autoreleasepool {
HTMLRange *range = [[HTMLRange alloc] initWithDocument:document];
[range cloneContents];
XCTAssertEqual(1, ranges.count);
XCTAssertEqual(1, ranges.allObjects.count);
}
XCTAssertEqual(0, ranges.count);
XCTAssertEqual(0, ranges.allObjects.count);
}
@end
+69 -6
View File
@@ -8,6 +8,19 @@
#import <XCTest/XCTest.h>
#import "HTMLDOM.h"
#import "HTMLKitTestUtil.h"
#define Assert(input, expected) \
do { \
HTMLDocument *document = [HTMLDocument documentWithString:input]; \
XCTAssertEqualObjects(document.body.innerHTML, expected); \
} while(0)
#define AssertH(input, expected) \
do { \
HTMLDocument *document = [HTMLDocument documentWithString:input]; \
XCTAssertEqualObjects(document.head.innerHTML, expected); \
} while(0)
@interface HTMLSerializationTests : XCTestCase
@@ -15,12 +28,47 @@
@implementation HTMLSerializationTests
- (void)setUp {
[super setUp];
}
- (void)tearDown {
[super tearDown];
- (void)testSerializer
{
Assert(@"", @"");
Assert(@"<a a=\r\n", @"");
Assert(@"<p><i>Hello!</p>, World!</i>", @"<p><i>Hello!</i></p><i>, World!</i>");
Assert(@"<p><i>Hello</i>, World!</p>", @"<p><i>Hello</i>, World!</p>");
AssertH(@"<base foo=\"<'>\">", @"<base foo=\"<'>\">");
AssertH(@"<base foo=\"&amp;\">", @"<base foo=\"&amp;\">");
AssertH(@"<base foo=&amp>", @"<base foo=\"&amp;\">");
AssertH(@"<base foo=x0x00A0y>", @"<base foo=\"x&nbsp;y\">");
AssertH(@"<base foo='\"'>", @"<base foo=\"&quot;\">");
Assert(@"<span foo=3 title='test \"with\" &amp;quot;'>", @"<span foo=\"3\" title=\"test &quot;with&quot; &amp;quot;\"></span>");
Assert(@"<p>\"'\"</p>", @"<p>\"'\"</p>");
Assert(@"<p>&amp;</p>", @"<p>&amp;</p>");
Assert(@"<p>&amp</p>", @"<p>&amp;</p>");
Assert(@"<p>&lt;</p>", @"<p>&lt;</p>");
Assert(@"<p>&gt;</p>", @"<p>&gt;</p>");
Assert(@"<p>></p>", @"<p>&gt;</p>");
AssertH(@"<script>(x & 1) < 2; y > \"foo\" + 'bar'</script>", @"<script>(x & 1) < 2; y > \"foo\" + 'bar'</script>");
AssertH(@"<style>(x & 1) < 2; y > \"foo\" + 'bar'</style>", @"<style>(x & 1) < 2; y > \"foo\" + 'bar'</style>");
Assert(@"<xmp>(x & 1) < 2; y > \"foo\" + 'bar'</xmp>", @"<xmp>(x & 1) < 2; y > \"foo\" + 'bar'</xmp>");
Assert(@"<iframe>(x & 1) < 2; y > \"foo\" + 'bar'</iframe>", @"<iframe>(x & 1) < 2; y > \"foo\" + 'bar'</iframe>");
Assert(@"<noembed>(x & 1) < 2; y > \"foo\" + 'bar'</noembed>", @"<noembed>(x & 1) < 2; y > \"foo\" + 'bar'</noembed>");
AssertH(@"<noframes>(x & 1) < 2; y > \"foo\" + 'bar'</noframes>", @"<noframes>(x & 1) < 2; y > \"foo\" + 'bar'</noframes>");
Assert(@"<pre>foo bar</pre>", @"<pre>foo bar</pre>");
Assert(@"<pre>\nfoo bar</pre>", @"<pre>foo bar</pre>");
Assert(@"<pre>\n\nfoo bar</pre>", @"<pre>\nfoo bar</pre>");
Assert(@"<textarea>foo bar</textarea>", @"<textarea>foo bar</textarea>");
Assert(@"<textarea>\nfoo bar</textarea>", @"<textarea>foo bar</textarea>");
Assert(@"<textarea>\n\nfoo bar</textarea>", @"<textarea>\nfoo bar</textarea>");
Assert(@"<listing>foo bar</listing>", @"<listing>foo bar</listing>");
Assert(@"<listing>\nfoo bar</listing>", @"<listing>foo bar</listing>");
Assert(@"<listing>\n\nfoo bar</listing>", @"<listing>\nfoo bar</listing>");
Assert(@"<p>hi <!--world--></p>", @"<p>hi <!--world--></p>");
Assert(@"<p>hi <!-- world--></p>", @"<p>hi <!-- world--></p>");
Assert(@"<p>hi <!--world --></p>", @"<p>hi <!--world --></p>");
Assert(@"<p>hi <!-- world --></p>", @"<p>hi <!-- world --></p>");
Assert(@"<svg xmlns=\"bleh\"></svg>", @"<svg xmlns=\"bleh\"></svg>");
Assert(@"<svg xmlns:foo=\"bleh\"></svg>", @"<svg xmlns:foo=\"bleh\"></svg>");
Assert(@"<svg xmlns:xlink=\"bleh\"></svg>", @"<svg xmlns:xlink=\"bleh\"></svg>");
Assert(@"<svg xlink:href=\"bleh\"></svg>", @"<svg xlink:href=\"bleh\"></svg>");
}
#pragma mark - Bug Fixes
@@ -41,4 +89,19 @@
XCTAssertEqualObjects(document.body.outerHTML, @"<body key=\"&amp; testing &nbsp;\"></body>");
}
- (void)testBugFix_Issue_33
{
NSString *path = [HTMLKitTestUtil pathForFixture:@"bug33" ofType:@"html" inDirectory:@"Fixtures"];
NSString *html = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:nil];
HTMLDocument *document = [HTMLDocument documentWithString:html];
XCTestExpectation *expectation = [self expectationWithDescription:@"HTML serializes despite limited recursion depth"];
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0), ^{
[document.rootElement outerHTML];
[expectation fulfill];
});
[self waitForExpectationsWithTimeout:500 handler:nil];
}
@end
+108
View File
@@ -0,0 +1,108 @@
//
// HTMLTreeVisitorTests.m
// HTMLKit
//
// Created by Iska on 30.07.19.
// Copyright © 2019 BrainCookie. All rights reserved.
//
#import <XCTest/XCTest.h>
#import "HTMLDOM.h"
#import "HTMLElement.h"
#import "HTMLNode+Private.h"
@interface HTMLTreeVisitorTests : XCTestCase
@end
@implementation HTMLTreeVisitorTests
#pragma mark - Asserts
#define AssertElementWithId(input, id) \
do { \
HTMLNode *node = input;\
XCTAssertEqual(node.nodeType, HTMLNodeElement);\
XCTAssertEqualObjects(node.asElement[@"id"], id);\
} while(0)
#define AssertTextWithValue(input, value) \
do { \
HTMLNode *node = input;\
XCTAssertEqual(node.nodeType, HTMLNodeText);\
XCTAssertEqualObjects(node.textContent, value);\
} while(0)
#define AssertCommentWithValue(input, value) \
do { \
HTMLNode *node = input;\
XCTAssertEqual(node.nodeType, HTMLNodeComment);\
XCTAssertEqualObjects(node.textContent, value);\
} while(0)
#pragma mark - Basic Walking
- (HTMLNode *)testDOM
{
// Tree structure:
// #a
// |
// +----+----+
// | |
// #b #c
// |
// +----+----+
// | |
// #d #j
// |
// +----+----+
// | | |
// #e #f #i
// |
// +--+--+
// | |
// #g #h
HTMLElement *div = [[HTMLElement alloc] initWithTagName:@"div" attributes:@{@"id": @"a"}];
[div appendNode:[[HTMLElement alloc] initWithTagName:@"div" attributes:@{@"id": @"b"}]];
HTMLElement *c = [[HTMLElement alloc] initWithTagName:@"div" attributes:@{@"id": @"c"}];
[div appendNode:c];
HTMLElement *d = [[HTMLElement alloc] initWithTagName:@"div" attributes:@{@"id": @"d"}];
[c appendNode:d];
[c appendNode:[[HTMLElement alloc] initWithTagName:@"div" attributes:@{@"id": @"j"}]];
[d appendNode:[[HTMLElement alloc] initWithTagName:@"div" attributes:@{@"id": @"e"}]];
HTMLElement *f = [[HTMLElement alloc] initWithTagName:@"div" attributes:@{@"id": @"f"}];
[d appendNode:f];
[d appendNode:[[HTMLElement alloc] initWithTagName:@"div" attributes:@{@"id": @"i"}]];
[f appendNode:[[HTMLElement alloc] initWithTagName:@"g" attributes:@{@"id": @"g"}]];
[f appendNode:[[HTMLElement alloc] initWithTagName:@"h" attributes:@{@"id": @"h"}]];
return div;
}
- (void)testTreeVisitor
{
HTMLNode *root = self.testDOM;
HTMLTreeVisitor *visitor = [[HTMLTreeVisitor alloc] initWithNode:root];
NSMutableArray *visited = [NSMutableArray array];
[visitor walkWithNodeVisitor:[HTMLNodeVisitorBlock visitorWithEnterBlock:^(HTMLNode *node) {
[visited addObject:[NSString stringWithFormat:@"E %@", node.asElement.elementId]];
} leaveBlock:^(HTMLNode *node) {
[visited addObject:[NSString stringWithFormat:@"L %@", node.asElement.elementId]];
}]];
NSArray *expected = @[@"E a", @"E b", @"L b", @"E c", @"E d", @"E e", @"L e", @"E f", @"E g", @"L g", @"E h", @"L h",
@"L f", @"E i", @"L i", @"L d", @"E j", @"L j", @"L c", @"L a"];
XCTAssertEqualObjects(visited, expected);
}
@end
-1
View File
@@ -7,7 +7,6 @@
//
#import <XCTest/XCTest.h>
#import "HTMLTreeWalker.h"
#import "HTMLDOM.h"
#import "HTMLNode+Private.h"