Add example project
This commit is contained in:
@@ -0,0 +1,257 @@
|
||||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 46;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
629A63CD1D9AFE0E0089679F /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = 629A63CC1D9AFE0E0089679F /* main.swift */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXCopyFilesBuildPhase section */
|
||||
629A63C71D9AFE0E0089679F /* CopyFiles */ = {
|
||||
isa = PBXCopyFilesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
dstPath = /usr/share/man/man1/;
|
||||
dstSubfolderSpec = 0;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 1;
|
||||
};
|
||||
/* End PBXCopyFilesBuildPhase section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
629A63C91D9AFE0E0089679F /* HTMLKitExample */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = HTMLKitExample; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
629A63CC1D9AFE0E0089679F /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
629A63C61D9AFE0E0089679F /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
629A63C01D9AFE0E0089679F = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
629A63CB1D9AFE0E0089679F /* HTMLKitExample */,
|
||||
629A63CA1D9AFE0E0089679F /* Products */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
629A63CA1D9AFE0E0089679F /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
629A63C91D9AFE0E0089679F /* HTMLKitExample */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
629A63CB1D9AFE0E0089679F /* HTMLKitExample */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
629A63CC1D9AFE0E0089679F /* main.swift */,
|
||||
);
|
||||
path = HTMLKitExample;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
629A63C81D9AFE0E0089679F /* HTMLKitExample */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 629A63D01D9AFE0E0089679F /* Build configuration list for PBXNativeTarget "HTMLKitExample" */;
|
||||
buildPhases = (
|
||||
629A63C51D9AFE0E0089679F /* Sources */,
|
||||
629A63C61D9AFE0E0089679F /* Frameworks */,
|
||||
629A63C71D9AFE0E0089679F /* CopyFiles */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = HTMLKitExample;
|
||||
productName = HTMLKitExample;
|
||||
productReference = 629A63C91D9AFE0E0089679F /* HTMLKitExample */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
629A63C11D9AFE0E0089679F /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
attributes = {
|
||||
LastSwiftUpdateCheck = 0800;
|
||||
LastUpgradeCheck = 0800;
|
||||
ORGANIZATIONNAME = iabudiab;
|
||||
TargetAttributes = {
|
||||
629A63C81D9AFE0E0089679F = {
|
||||
CreatedOnToolsVersion = 8.0;
|
||||
ProvisioningStyle = Automatic;
|
||||
};
|
||||
};
|
||||
};
|
||||
buildConfigurationList = 629A63C41D9AFE0E0089679F /* Build configuration list for PBXProject "HTMLKitExample" */;
|
||||
compatibilityVersion = "Xcode 3.2";
|
||||
developmentRegion = English;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
);
|
||||
mainGroup = 629A63C01D9AFE0E0089679F;
|
||||
productRefGroup = 629A63CA1D9AFE0E0089679F /* Products */;
|
||||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
629A63C81D9AFE0E0089679F /* HTMLKitExample */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
629A63C51D9AFE0E0089679F /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
629A63CD1D9AFE0E0089679F /* main.swift in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
629A63CE1D9AFE0E0089679F /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
|
||||
CLANG_CXX_LIBRARY = "libc++";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_SUSPICIOUS_MOVES = YES;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
CODE_SIGN_IDENTITY = "-";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = dwarf;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_TESTABILITY = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"DEBUG=1",
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.11;
|
||||
MTL_ENABLE_DEBUG_INFO = YES;
|
||||
ONLY_ACTIVE_ARCH = YES;
|
||||
SDKROOT = macosx;
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
629A63CF1D9AFE0E0089679F /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
|
||||
CLANG_CXX_LIBRARY = "libc++";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_SUSPICIOUS_MOVES = YES;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
CODE_SIGN_IDENTITY = "-";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
ENABLE_NS_ASSERTIONS = NO;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.11;
|
||||
MTL_ENABLE_DEBUG_INFO = NO;
|
||||
SDKROOT = macosx;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
629A63D11D9AFE0E0089679F /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 3.0;
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
629A63D21D9AFE0E0089679F /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 3.0;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
629A63C41D9AFE0E0089679F /* Build configuration list for PBXProject "HTMLKitExample" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
629A63CE1D9AFE0E0089679F /* Debug */,
|
||||
629A63CF1D9AFE0E0089679F /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
629A63D01D9AFE0E0089679F /* Build configuration list for PBXNativeTarget "HTMLKitExample" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
629A63D11D9AFE0E0089679F /* Debug */,
|
||||
629A63D21D9AFE0E0089679F /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
rootObject = 629A63C11D9AFE0E0089679F /* Project object */;
|
||||
}
|
||||
Generated
+7
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Workspace
|
||||
version = "1.0">
|
||||
<FileRef
|
||||
location = "self:HTMLKitExample.xcodeproj">
|
||||
</FileRef>
|
||||
</Workspace>
|
||||
@@ -0,0 +1,167 @@
|
||||
//
|
||||
// main.swift
|
||||
// HTMLKitExample
|
||||
//
|
||||
// Created by Iska on 27/09/16.
|
||||
// Copyright © 2016 iabudiab. All rights reserved.
|
||||
//
|
||||
|
||||
import HTMLKit
|
||||
|
||||
// Simple scraper that is able to load a page, query via CSS Selectors, and following links
|
||||
class Scraper {
|
||||
|
||||
enum ScrapingError: Error {
|
||||
case DocumentNotLoaded
|
||||
case ElementNotFound(String)
|
||||
case InvalidAnchorUrl(String)
|
||||
case CouldNotLoadPage(URL)
|
||||
}
|
||||
|
||||
private var url: URL
|
||||
private(set) var document: HTMLDocument?
|
||||
|
||||
init(url: URL) {
|
||||
self.url = url
|
||||
self.document = nil
|
||||
}
|
||||
|
||||
func load() throws {
|
||||
try loadDocument(at: url)
|
||||
}
|
||||
|
||||
func listElements(matching selector: CSSSelector) throws -> [HTMLElement] {
|
||||
guard let document = document else {
|
||||
throw ScrapingError.DocumentNotLoaded
|
||||
}
|
||||
|
||||
return document.elements(matching: selector)
|
||||
}
|
||||
|
||||
func followLink(matchingSelector selector: CSSSelector) throws {
|
||||
guard let document = document else {
|
||||
throw ScrapingError.DocumentNotLoaded
|
||||
}
|
||||
|
||||
guard let link = document.firstElement(matching: selector) else {
|
||||
throw ScrapingError.ElementNotFound(selector.debugDescription)
|
||||
}
|
||||
|
||||
guard let targetUrl = URL(string: link["href"], relativeTo: url) else {
|
||||
throw ScrapingError.InvalidAnchorUrl(link["href"])
|
||||
}
|
||||
|
||||
try loadDocument(at: targetUrl)
|
||||
}
|
||||
|
||||
private func loadDocument(at url: URL) throws {
|
||||
guard let content = try? String(contentsOf: url) else {
|
||||
throw ScrapingError.CouldNotLoadPage(url)
|
||||
}
|
||||
document = HTMLDocument(string: content)
|
||||
}
|
||||
}
|
||||
|
||||
// A custom block-based selector, that matches only elements having the given text content:
|
||||
// i.e. textContentSelector("Hello") will match <p>Hello</p> and <a href='example.com'>Hello</a>
|
||||
// but wont match <div>World</div> or <p>Hello there</p>
|
||||
func textContentSelector(text: String) -> CSSSelector {
|
||||
return namedBlockSelector("[@textContent='\(text)']") { (element) -> Bool in
|
||||
return element.textContent == text
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to create a typed-selector matching an anchor element that has the given
|
||||
// text content.
|
||||
func anchorElement(havingContent: String) -> CSSSelector {
|
||||
return allOf(
|
||||
[
|
||||
typeSelector("a"),
|
||||
textContentSelector(text: havingContent)
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
// Helper function to print the content of a github repository file content
|
||||
func printRepositoryFile(element: HTMLElement) {
|
||||
|
||||
// A node iterator filter that iterates only <td> elements of class "content" i.e. <td class='content'>
|
||||
let contentIterator = element.nodeIterator(showOptions: .element) { (node) -> HTMLNodeFilterValue in
|
||||
guard let element = node as? HTMLElement else { return .reject }
|
||||
|
||||
if element.tagName == "td" && element["class"] == "content" {
|
||||
return .accept
|
||||
}
|
||||
|
||||
return .reject
|
||||
}
|
||||
|
||||
for td in contentIterator {
|
||||
// The cast is necessary because Swift3 wont import the generics info of the NSEnumerator class
|
||||
// i.e. the nextObject() function alwasy has the following signature 'func nextObject() -> Any?'
|
||||
let title = (td as AnyObject).textContent.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
|
||||
print("- \(title)")
|
||||
}
|
||||
}
|
||||
|
||||
let htmlKitUrl = URL(string: "https://github.com/iabudiab/HTMLKit")!
|
||||
let scraper = Scraper(url: htmlKitUrl)
|
||||
|
||||
do {
|
||||
// Load the page
|
||||
try scraper.load()
|
||||
|
||||
// Parse the selector
|
||||
let repositoryContent = try CSSSelectorParser.parseSelector("[role='main'] .repository-content > .file-wrap > .files tr.js-navigation-item")
|
||||
|
||||
// Query matching elements
|
||||
let files = try scraper.listElements(matching: repositoryContent)
|
||||
|
||||
print("HTMLKit repositroy root:")
|
||||
files.forEach(printRepositoryFile)
|
||||
} catch let error {
|
||||
print(error)
|
||||
}
|
||||
|
||||
do {
|
||||
// Follow some links
|
||||
try scraper.followLink(matchingSelector: anchorElement(havingContent: "Sources"))
|
||||
try scraper.followLink(matchingSelector: anchorElement(havingContent: "HTMLEOFToken.m"))
|
||||
|
||||
// The following selector: "[role='main'] div.file table.js-file-line-container td:nth-child(2)"
|
||||
// can be defined in type-safe manner:
|
||||
let selector = allOf([
|
||||
descendantOfElementSelector(
|
||||
attributeSelector(.exactMatch, "role", "main")
|
||||
),
|
||||
descendantOfElementSelector(
|
||||
allOf([
|
||||
typeSelector("div"),
|
||||
classSelector("file")
|
||||
])
|
||||
),
|
||||
descendantOfElementSelector(
|
||||
allOf([
|
||||
typeSelector("table"),
|
||||
classSelector("js-file-line-container")
|
||||
])
|
||||
),
|
||||
typeSelector("td"),
|
||||
nthChildSelector(
|
||||
CSSNthExpressionMake(0, 2)
|
||||
)
|
||||
])
|
||||
|
||||
// Query matching elements
|
||||
let elements = try scraper.listElements(matching: selector)
|
||||
|
||||
// This will print the source code for the "HTMLEOFToken.m" file under this url:
|
||||
// https://github.com/iabudiab/HTMLKit/blob/master/Sources/HTMLEOFToken.m
|
||||
|
||||
print("\nHTMLEOFToken:")
|
||||
elements.forEach {
|
||||
print($0.textContent)
|
||||
}
|
||||
} catch let error {
|
||||
print(error)
|
||||
}
|
||||
+3
@@ -7,4 +7,7 @@
|
||||
<FileRef
|
||||
location = "group:HTMLKit.playground">
|
||||
</FileRef>
|
||||
<FileRef
|
||||
location = "group:Example/HTMLKitExample/HTMLKitExample.xcodeproj">
|
||||
</FileRef>
|
||||
</Workspace>
|
||||
|
||||
Reference in New Issue
Block a user