reordering structure, removed windows scripts

2024-02-29 17:41:08 +05:00
parent 2451ba27b7
commit 39a6739754
114 changed files with 961 additions and 13222 deletions
@@ -0,0 +1,37 @@
+FROM debian
+
+WORKDIR /opt/crawl
+
+RUN apt update && \
+apt install -y --no-install-recommends sudo tmux iproute2 nano less iputils-ping locales && \
+apt install -y --no-install-recommends wget curl file sqlite3 cifs-utils python3 python3-pip xz-utils && \
+apt install -y --no-install-recommends lynx uchardet catdoc unzip python3-pdfminer p7zip-full && \
+apt install -y --no-install-recommends maildir-utils mpack libemail-outlook-message-perl libemail-sender-perl binwalk && \
+apt install -y --no-install-recommends graphicsmagick-imagemagick-compat tesseract-ocr tesseract-ocr-eng tesseract-ocr-rus ffmpeg && \
+pip3 install --break-system-packages vosk && \
+wget https://github.com/radareorg/radare2/releases/download/5.8.8/radare2-5.8.8-static.tar.xz -O /tmp/radare2.tar.xz && tar xvf /tmp/radare2.tar.xz -C /opt/ && rm /tmp/radare2.tar.xz && ln -s /opt/r2-static/usr/bin/rabin2 /usr/local/bin/rabin2
+
+COPY bin bin
+COPY cron cron
+COPY www www
+COPY spider.sh .
+COPY crawl.sh .
+COPY import.sh .
+COPY search.sh .
+COPY opensearch.py .
+
+RUN apt install -y --no-install-recommends nodejs npm openjdk-17-jre && \
+pip3 install --break-system-packages opensearch-py colorama && \
+cd www/ && npm install && cd - && \
+wget https://artifacts.opensearch.org/releases/bundle/opensearch/2.11.0/opensearch-2.11.0-linux-x64.tar.gz -O /tmp/opensearch.tar.gz && tar xvf /tmp/opensearch.tar.gz -C /opt/ && rm /tmp/opensearch.tar.gz
+
+RUN echo 'LANG="ru_RU.UTF-8"' > /etc/default/locale && \
+localedef -i ru_RU -f UTF-8 ru_RU.UTF-8 && \
+locale-gen && \
+echo 241 | dpkg-reconfigure locales && \
+echo "LANG=ru_RU.UTF-8" > /etc/default/locale && \
+useradd -s /bin/bash -g users -N -M -d /opt/crawl user && \
+chown -R user.users /opt/ && \
+chmod +w /etc/sudoers && echo 'user    ALL=(root) NOPASSWD: ALL' >> /etc/sudoers && chmod -w /etc/sudoers
+
+EXPOSE 8080
@@ -0,0 +1,127 @@
+## Crawling
+
+Each crawler goes through some source and pulls out exclusively useful data - text. Does not depend on extension. Easily customizable.
+Supported file types: `text`, `html`, `doc`/`docx`, `xls`/`xlsx`, `pdf`, `archives`, `exe`/`bin`, `eml`/`msg`, `images`, `sounds`.
+You can easily add your own file types (GNU power)
+
+![crawl.sh](img/crawl.png)
+
+![search.sh](img/search.png)
+
+## Installation
+
+### System
+
+Depends:
+
+* lynx, uchardet - html
+* catdoc - doc
+* xls2csv - xls
+* unzip - docx,xlsx
+* pdf2txt - pdf
+* rabin2 - exe,dll
+* 7z - archives
+* identify, tesseract - images
+* vosk-transcriber - audios
+* msgconvert, munpack, mu - emails
+* binwalk - disk images
+
+```
+sudo apt install sqlite3 cifs-utils
+sudo apt install file uchardet cifs-utils lynx catdoc unzip python3-pdfminer radare2 p7zip-full
+sudo apt install maildir-utils mpack libemail-outlook-message-perl libemail-sender-perl binwalk
+sudo apt install graphicsmagick-imagemagick-compat tesseract-ocr tesseract-ocr-eng tesseract-ocr-rus ffmpeg
+sudo pip3 install vosk
+```
+
+### Docker
+
+```
+sudo docker build -t crawl .
+sudo docker run --privileged --cap-add SYS_ADMIN --cap-add DAC_READ_SEARCH --cap-add NET_BIND_SERVICE --cap-add CAP_SYSLOG -u 1000 -p 8080:8080 --name crawl -it crawl /bin/bash
+```
+
+### SMB crawling
+
+Making a network drive local and crawl it:
+
+```
+mount.cifs "//10.10.10.10/Docs" /mnt/Docs -o ro,dom=corp.net,user=username,pass=password
+./crawl.sh /mnt/Docs -size -10M
+```
+
+It will create `Docs.csv` index file.
+
+### Web crawling
+
+Depends:
+
+* wget with controllable download limit (https://yurichev.com/wget.html)
+
+Making site content local and crawl it:
+
+```
+./spider.sh --limit-size=500k http://target.com/
+./crawl.sh target.com/
+```
+
+It will create `target.com.csv` index file.
+
+### FTP crawling
+
+Making FTP content local and crawl it:
+
+```
+./spider.sh --limit-size=500k ftp://target.com/`
+./crawl.sh target.com/
+```
+
+It will create `target.com.csv` index file.
+
+## Searching
+
+After crawling, the extracted text is stored in `csv` files.
+Data can be searched using simple `grep`:
+
+`grep -ia -o -P ".{0,100}password..{0,100}" *.csv | grep -ai --color=auto "password"`
+
+Or search for data using a fuzzy search (written with errors):
+
+`tre-agrep -i -E 2 passw *.csv`
+
+### Searching CLI (pentesters)
+
+Data can be converted into a `sqlite3` database with full-text search support:
+
+`./import.sh INBOX.csv`
+
+Searching for data in the database is now more convenient:
+
+```
+./search.sh INBOX.db 's3cr3t'
+./search.sh INBOX.db 'password' -c 10 -o 20
+./search.sh INBOX.db 'password' -m 'admin'
+```
+
+### Searching GUI (enterprise)
+
+Depends:
+
+```
+sudo apt install nodejs npm openjdk-17-jre
+cd www && npm install
+wget wget https://artifacts.opensearch.org/releases/bundle/opensearch/2.11.0/opensearch-2.11.0-linux-x64.tar.gz -O /tmp/opensearch.tar.gz && tar xvf /tmp/opensearch.tar.gz -C /opt/
+JAVA_LIBRARY_PATH=/opt/opensearch/plugins/opensearch-knn/lib /opt/opensearch/opensearch-tar-install.sh
+```
+
+Searching for data using opensearch:
+
+```
+JAVA_LIBRARY_PATH=/opt/opensearch/plugins/opensearch-knn/lib /opt/opensearch/bin/opensearch
+./opensearch.py localhost:9200 -i test -init
+./opensearch.py localhost:9200 -i test -import INBOX.csv
+cd www && node index.js
+chrome http://localhost:8080/test/
+```
+
+Continuous crawling (your Google in local area) - just use a few easy cron scripts cron/README.md
@@ -7,7 +7,9 @@ RESET=$'\x1b[39m'

 [[ $# -lt 1 ]] && {
 	echo "$0 where/ [/usr/bin/find options]"
-	echo "example: $0 /mnt/share/ -size -10M ! -iname '*.wav' ! -iname '*.mp3'"
+	echo "example: $0 /mnt/share/ -size -10M -not -iname '*.wav' -not -iname '*.mp3'"
+	echo "example: $0 /mnt/share/ -not -ipath '*/Program Files*/*' -not -ipath '*/Windows/*'"
+	echo "example: $0 /mnt/share/ -newermt '2012-12-21 00:00'"
 	exit
 }

@@ -64,10 +66,10 @@ find "$where" "${opts[@]}" -type f -print 2> /dev/null |
 while read path
 do
 	[[ $is_resume = 1 && $(session_is_file_done $path) = 1 ]] && {
-		echo "(skip $path)"
+		echo $GREY"$path"$RESET
 		continue
 	}
-	printf "\n" >> "$index"
+	[[ -s "$index" ]] && printf "\n" >> "$index"
 	echo -n "$(date +%s)," >> "$index"
 	echo -n "$path"
 	echo -n "$path" | escape >> "$index"
@@ -78,115 +80,142 @@ do
 	[[ $filename = $ext ]] && ext=''
 	echo -n "$ext" | escape >> "$index"
 	echo -n "," >> "$index"
-	mime=$(file -bi "$path")
-	mime=${mime%' '*}
+	mime=$(file -b --mime-type "$path")
 	case $mime in
-		*/xml\;)
-			echo -n "xml," >> "$index"
-			cat "$path" | escape >> "$index"
-			echo $GREEN " [xml]" $RESET
-			;;
-		*/*html*)
+		*/*html*|application/javascript)
 			echo -n "html," >> "$index"
 			codepage=$(uchardet "$path")
 			cat "$path" | iconv -f $codepage | lynx -nolist -dump -stdin | escape >> "$index"
 			echo $GREEN " [html]" $RESET
 			;;
-		text/*|*/*script\;)
+		text/*|*/*script|*/xml|*/json|*-ini)
 			echo -n "text," >> "$index"
-			cat "$path" | escape >> "$index"
+			codepage=$(uchardet "$path")
+			cat "$path" | iconv -f $codepage | escape >> "$index"
 			echo $GREEN " [text]" $RESET
 			;;
-		application/msword\;)
+		application/msword)
 			echo -n "doc," >> "$index"
 			catdoc "$path" | escape >> "$index"
 			echo $GREEN " [doc]" $RESET
 			;;
-		application/vnd.openxmlformats-officedocument.wordprocessingml.document\;)
+		application/vnd.openxmlformats-officedocument.wordprocessingml.document)
 			echo -n "doc," >> "$index"
-			unzip -p "$path" | grep -a '<w:r' | sed 's/<w:p[^<\/]*>/ /g' | sed 's/<[^<]*>//g' | grep -a -v '^[[:space:]]*$' | sed G | escape >> "$index"
+			unzip -p "$path" 2> /dev/null | grep -a '<w:r' | sed 's/<w:p[^<\/]*>/ /g' | sed 's/<[^<]*>//g' | grep -a -v '^[[:space:]]*$' | sed G | escape >> "$index"
 			echo $GREEN " [docx]" $RESET
+			if unzip -l "$path" | grep -q 'word/media/'; then
+				temp=$(tempfile 2>/dev/null)
+				rm $temp && mkdir -p "$temp/$path"
+				unzip "$path" 'word/media/*' -d "$temp/$path" > /dev/null
+				fork "$temp"
+				rm -r "$temp"
+				#session_file_done $path
+			fi
 			;;
-		application/vnd.ms-excel\;)
+		application/vnd.ms-excel)
 			echo -n "xls," >> "$index"
 			xls2csv -x "$path" | escape >> "$index"
 			echo $GREEN " [xls]" $RESET
 			;;
-		application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\;)
+		application/vnd.openxmlformats-officedocument.spreadsheetml.sheet)
 			echo -n "xlsx," >> "$index"
-			unzip -p "$path" | grep -a -e '<si><t>' -e '<vt:lpstr>' | sed 's/<[^<\/]*>/ /g' | sed 's/<[^<]*>//g' | escape >> "$index"
+			#libreoffice --convert-to csv "$path" out.csv
+			unzip -p "$path" 2> /dev/null | grep -a -e '<si><t' -e '<vt:lpstr>' | sed 's/<[^<\/]*>/ /g' | sed 's/<[^<]*>//g' | escape >> "$index"
 			echo $GREEN " [xlsx]" $RESET
 			;;
-		application/pdf\;)
+		application/pdf)
 			echo -n "pdf," >> "$index"
 			pdf2txt -t text "$path" 2> /dev/null | escape >> "$index"
 			echo $GREEN " [pdf]" $RESET
 			;;
-		application/x-executable\;|application/x*dos*)
+		application/x-executable|application/*microsoft*-executable|application/x*dos*)
 			echo -n "exe," >> "$index"
 			rabin2 -z "$path" 2> /dev/null | escape >> "$index"
 			echo $GREEN " [exe]" $RESET
 			;;
-		application/x-object\;|application/x-sharedlib|application/x-executable\;)
+		application/x-object|application/x-sharedlib|application/x-executable)
 			echo -n "elf," >> "$index"
 			rabin2 -z "$path" 2> /dev/null | escape >> "$index"
 			echo $GREEN " [elf]" $RESET
 			;;
-		application/*compressed*|application/*zip*|application/*rar*|application/*tar*|application/*gzip*)
+		application/*compressed*|application/*zip*|application/*rar*|application/*tar*|application/*gzip*|application/*-msi|*/java-archive)
 			echo -n "zip," >> "$index"
-			7z l "$path" | tail -n +13 | escape >> "$index"
+			7z l -p '' "$path" 2> /dev/null | tail -n +13 | escape >> "$index"
 			echo $GREEN " [archive]" $RESET
-			temp=$(tempfile)
+			temp=$(tempfile 2>/dev/null)
 			rm $temp && mkdir -p "$temp/$path"
-			7z x "$path" -o"$temp/$path" 1> /dev/null 2> /dev/null
+			7z x -p '' "$path" -o"$temp/$path" 1> /dev/null 2> /dev/null
 			fork "$temp"
 			rm -r "$temp"
-			session_file_done $path
-			#break
+			#session_file_done $path
 			;;
 		image/*)
 			echo -n "image," >> "$index"
-			identify -verbose "$path" 2> /dev/null | escape >> "$index"
-			#tesseract "$path" stdout -l eng >> "$index"
-			#tesseract "$path" stdout -l rus >> "$index"
+			#identify -verbose "$path" 2> /dev/null | escape >> "$index"
+			tesseract "$path" stdout -l eng 2> /dev/null | escape >> "$index"
+			tesseract "$path" stdout -l rus 2> /dev/null | escape >> "$index"
+			#curl -X POST --form "photo=@$path" http://10.250.153.11/ | escape >> "$index"
 			echo $GREEN " [img]" $RESET
 			;;
+		audio/*)
+			echo -n "audio," >> "$index"
+			vosk-transcriber --lang en-us --input "$path" 2> /dev/null | escape >> "$index"
+			echo $GREEN " [snd]" $RESET
+			;;
+		application/vnd.ms-outlook)
+			echo -n "message," >> "$index"
+			temp=$(tempfile 2>/dev/null)
+			rm $temp && mkdir -p "$temp/$path"
+			msgconvert --outfile "$temp/$path/out.eml" "$path" 2> /dev/null
+			mu view "$temp/$path/out.eml" 2> /dev/null | escape >> "$index"
+			echo $GREEN " [message]" $RESET
+			munpack -t -f -C "$(realpath $temp/$path)" 'out.eml' > /dev/null
+			rm "$temp/$path/out.eml"
+			fork "$temp"
+			rm -r "$temp"
+			#session_file_done $path
+			;;
 		message/*)
 			echo -n "message," >> "$index"
-			mu view "$path" | escape >> "$index"
+			mu view "$path" 2> /dev/null | escape >> "$index"
 			echo $GREEN " [message]" $RESET
-			temp=$(tempfile)
+			temp=$(tempfile 2>/dev/null)
 			rm $temp && mkdir -p "$temp/$path"
 			cp "$path" "$temp/$path/"
 			munpack -t -f -C "$(realpath $temp/$path)" "$(basename $path)" > /dev/null
 			rm "$temp/$path/$(basename $path)"
 			fork "$temp"
 			rm -r "$temp"
-			session_file_done $path
-			#break
+			#session_file_done $path
 			;;
-		application/octet-stream\;)
-			echo -n "raw," >> "$index"
-			#strings "$path" | escape >> "$index"
-			echo -n "," >> "$index"
-			echo $GREEN " [raw]" $RESET
+		*.tcpdump.pcap)
+			echo -n "pcap," >> "$index"
+			tcpdump -r "$path" -nn -A | escape >> "$index"
+			echo $GREEN " [pcap]" $RESET
 			;;
-		application/x-raw-disk-image\;)
+		application/x-raw-disk-image)
 			echo -n "disk," >> "$index"
 			binwalk "$path" | escape >> "$index"
 			echo $GREEN " [disk]" $RESET
 			;;
+		application/octet-stream)
+			echo -n "raw," >> "$index"
+			#strings "$path" | escape >> "$index"
+			echo -n "" >> "$index"
+			echo $GREEN " [raw]" $RESET
+			;;
 		*)
-			echo -n "unknown," >> "$index"
 			file "$path" | grep text > /dev/null &&
 			{
+				echo -n "text," >> "$index"
 				cat "$path" | escape >> "$index"
-				echo $GREY " [unknown]" $RESET
+				echo $GREEN " [text]" $RESET
 			} || {
+				echo -n "unknown," >> "$index"
 				#strings "$path" >> "$index"
-				echo -n "," >> "$index"
+				echo -n "" >> "$index"
+				echo $RED " [unknown]" $RESET
 				echo "$path $mime" >> unknown_mime.log
-				echo $RED " [error]" $RESET
 			}
 			;;
 	esac
@@ -0,0 +1,15 @@
+## Continuous crawling
+
+```
+JAVA_LIBRARY_PATH=/opt/opensearch/plugins/opensearch-knn/lib /opt/opensearch/bin/opensearch
+cd /opt/crawl/www && node index.js
+```
+
+`/opt/crawl/opensearch.py localhost:9200 -i $INDEX -init`
+
+```
+crontab -e
+30 11 * * * tmux new-session -d '/opt/crawl/cron/targets.sh ; timeout 3600 /opt/crawl/cron/scan.sh ; tmux new-window -d 'timeout $[3600*8] /opt/crawl/cron/www.sh' & tmux new-window -d 'timeout $[3600*8] /opt/crawl/cron/ftp.sh' & tmux new-window -d 'timeout $[3600*8] /opt/crawl/cron/smb.sh'
+0 23 * * * tmux new-session -d '/opt/crawl/cron/import.sh'
+0 0 * * 1  /opt/crawl/cron/clean.sh
+```
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+rm smb-hosts.txt
+rm www-hosts.txt
+rm ftp-hosts.txt
+
+rm *.csv
+rm crawl.log
+rm .*.sess
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+cat ftp-hosts.txt | while read ip
+do echo "$ip"
+	timeout 300 /opt/crawl/spider.sh "ftp://$ip/"
+	timeout 300 /opt/crawl/crawl.sh "$ip"
+done
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+INDEX="company"
+
+for csv in *.csv
+do echo $csv
+	/opt/crawl/opensearch.py localhost:9200 -i $INDEX -import "$csv"
+done
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+#PORTS_WWW="80,443,8080,8443,8000,8088,8880,8808,8888,6443,7443,9443,10443,8081"
+PORTS_WWW="80,8080"
+PORTS_FTP='21'
+PORTS_SMB='445'
+
+for net in $(cat nets.txt)
+do echo "$net"
+	#nmap -Pn -n --max-retries 0 --max-rate 5 "$net" -p "$PORTS_WWW" --open -oG - | grep 'open' | tr '/' ' ' | awk '{print $2 " " $5}' >> www-hosts.txt
+	#nmap -Pn -n --max-retries 0 --max-rate 5 "$net" -p "$PORTS_FTP" --open -oG - | grep 'open' | tr '/' ' ' | awk '{print $2}' >> ftp-hosts.txt
+	nmap -Pn -n --max-retries 0 --max-rate 5 "$net" -p "$PORTS_SMB" --open -oG - | grep 'open' | tr '/' ' ' | awk '{print $2}' >> smb-hosts.txt
+done
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+DOMAIN='company.org'
+USER='iivanov'
+PASS='password'
+
+#cme -t 1 smb --shares smb-hosts.txt | grep ' READ ' | sed -rn 's/SMB\s+([^\s]+)\s+445\s+([^\s]+)\s+(.*)\s+READ.+/\1\t\2\t\3/p' > shares-anon.txt
+cme -t 1 smb -d "$DOMAIN" -u "$USER" -p "$PASS" --shares smb-hosts.txt | grep ' READ ' | sed -rn 's/SMB\s+([^\s]+)\s+445\s+([^\s]+)\s+(.*)\s+READ.+/\1\t\2\t\3/p' > shares-user.txt
+
+IFS=$'\t'
+for depth in {1..10}
+do
+	cat shares-user.txt | grep -v 'IPC$' | while read ip name share
+	do echo "$ip" "$share"
+		fgrep -q "+ $depth //$ip/$share" crawl.log 2> /dev/null && continue
+		mkdir "/mnt/$ip-$share"
+		sudo timeout 5 mount.cifs "//$ip/$share" "/mnt/$ip-$share" -o ro,dom="$DOMAIN",user="$USER",pass="$PASS" || { echo "- $depth //$ip/$share" >> crawl.log; continue; }
+		timeout 300 /opt/crawl/crawl.sh "/mnt/$ip-$share" -mindepth "$depth" -maxdepth "$depth" -size -100k
+		sudo umount "/mnt/$ip-$share"
+		rm -r "/mnt/$ip-$share"
+		echo "+ $DEPTH //$ip/$share" >> crawl.log
+	done
+done
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+USER='iivanov'
+PASS='password'
+DOMAIN='company.org'
+DC='192.168.12.6'
+DNS=$DC
+
+namespace=$(curl -s ldap://$DC | grep 'namingContexts:' | head -n 1 | awk '{print $2}')
+ldapsearch -o ldif-wrap=no -E pr=10000/noprompt -D "$USER@$DOMAIN" -w "$PASS" -x -H ldap://"$DC" -b "$namespace" '(objectClass=computer)' dnshostname | grep dNSHostName | awk '{print $2}' > hosts.txt
+
+cat hosts.txt | while read host
+do host "$host" "$DNS" | grep 'has address' | awk '{print $4}'
+done | sed -rn 's/([0-9]+\.[0-9]+\.[0-9]+)\.[0-9]+./\1\.0\/24/p' | sort | uniq -c | sort -n -r | awk '{print $2}' > nets.txt
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+cat www-hosts.txt | while read ip port
+do echo "$ip $port"
+	timeout 300 /opt/crawl/spider.sh "http://$ip:$port/"
+	timeout 300 /opt/crawl/crawl.sh "$ip:$port"
+done
@@ -1,25 +0,0 @@
-cd path/to/crawl/linux
-
-### Local crawling
-
-PATH=$PATH:bin ./crawl.sh /home/ -size -10M
-
-PATH=$PATH:bin ./grep.sh 'pass' / -size -10M
-
-./import.sh results.csv
-
-./search.sh results.db 's3cr3t'
-
-### Web crawling
-
-./spider.sh http://target.com/
-
-cd /tmp/spider/
-
-./crawl.sh target.com/ -size -10M
-
-### Mails crawling
-
-./imap.sh imap://server.com user:pass
-
-./crawl.sh INBOX
@@ -1,205 +0,0 @@
-#!/usr/bin/python3
-"""A command line tool for extracting text and images from PDF and
-output it to plain text, html, xml or tags."""
-import argparse
-import logging
-import sys
-sys.path = ['.'] + sys.path
-
-import pdfminer.high_level
-import pdfminer.layout
-
-logging.basicConfig()
-
-OUTPUT_TYPES = ((".htm", "html"),
-                (".html", "html"),
-                (".xml", "xml"),
-                (".tag", "tag"))
-
-
-def float_or_disabled(x):
-    if x.lower().strip() == "disabled":
-        return x
-    try:
-        x = float(x)
-    except ValueError:
-        raise argparse.ArgumentTypeError("invalid float value: {}".format(x))
-
-
-def extract_text(files=[], outfile='-',
-                 no_laparams=False, all_texts=None, detect_vertical=None,
-                 word_margin=None, char_margin=None, line_margin=None,
-                 boxes_flow=None, output_type='text', codec='utf-8',
-                 strip_control=False, maxpages=0, page_numbers=None,
-                 password="", scale=1.0, rotation=0, layoutmode='normal',
-                 output_dir=None, debug=False, disable_caching=False,
-                 **kwargs):
-    if not files:
-        raise ValueError("Must provide files to work upon!")
-
-    # If any LAParams group arguments were passed,
-    # create an LAParams object and
-    # populate with given args. Otherwise, set it to None.
-    if not no_laparams:
-        laparams = pdfminer.layout.LAParams()
-        for param in ("all_texts", "detect_vertical", "word_margin",
-                      "char_margin", "line_margin", "boxes_flow"):
-            paramv = locals().get(param, None)
-            if paramv is not None:
-                setattr(laparams, param, paramv)
-    else:
-        laparams = None
-
-    if output_type == "text" and outfile != "-":
-        for override, alttype in OUTPUT_TYPES:
-            if outfile.endswith(override):
-                output_type = alttype
-
-    if outfile == "-":
-        outfp = sys.stdout
-        if outfp.encoding is not None:
-            codec = 'utf-8'
-    else:
-        outfp = open(outfile, "wb")
-
-    for fname in files:
-        with open(fname, "rb") as fp:
-            pdfminer.high_level.extract_text_to_fp(fp, **locals())
-    return outfp
-
-
-def maketheparser():
-    parser = argparse.ArgumentParser(description=__doc__, add_help=True)
-    parser.add_argument(
-        "files", type=str, default=None, nargs="+",
-        help="One or more paths to PDF files.")
-
-    parser.add_argument(
-        "--version", "-v", action="version",
-        version="pdfminer.six v{}".format(pdfminer.__version__))
-    parser.add_argument(
-        "--debug", "-d", default=False, action="store_true",
-        help="Use debug logging level.")
-    parser.add_argument(
-        "--disable-caching", "-C", default=False, action="store_true",
-        help="If caching or resources, such as fonts, should be disabled.")
-
-    parse_params = parser.add_argument_group(
-        'Parser', description='Used during PDF parsing')
-    parse_params.add_argument(
-        "--page-numbers", type=int, default=None, nargs="+",
-        help="A space-seperated list of page numbers to parse.")
-    parse_params.add_argument(
-        "--pagenos", "-p", type=str,
-        help="A comma-separated list of page numbers to parse. "
-             "Included for legacy applications, use --page-numbers "
-             "for more idiomatic argument entry.")
-    parse_params.add_argument(
-        "--maxpages", "-m", type=int, default=0,
-        help="The maximum number of pages to parse.")
-    parse_params.add_argument(
-        "--password", "-P", type=str, default="",
-        help="The password to use for decrypting PDF file.")
-    parse_params.add_argument(
-        "--rotation", "-R", default=0, type=int,
-        help="The number of degrees to rotate the PDF "
-             "before other types of processing.")
-
-    la_params = parser.add_argument_group(
-        'Layout analysis', description='Used during layout analysis.')
-    la_params.add_argument(
-        "--no-laparams", "-n", default=False, action="store_true",
-        help="If layout analysis parameters should be ignored.")
-    la_params.add_argument(
-        "--detect-vertical", "-V", default=False, action="store_true",
-        help="If vertical text should be considered during layout analysis")
-    la_params.add_argument(
-        "--char-margin", "-M", type=float, default=2.0,
-        help="If two characters are closer together than this margin they "
-             "are considered to be part of the same line. The margin is "
-             "specified relative to the width of the character.")
-    la_params.add_argument(
-        "--word-margin", "-W", type=float, default=0.1,
-        help="If two characters on the same line are further apart than this "
-             "margin then they are considered to be two separate words, and "
-             "an intermediate space will be added for readability. The margin "
-             "is specified relative to the width of the character.")
-    la_params.add_argument(
-        "--line-margin", "-L", type=float, default=0.5,
-        help="If two lines are are close together they are considered to "
-             "be part of the same paragraph. The margin is specified "
-             "relative to the height of a line.")
-    la_params.add_argument(
-        "--boxes-flow", "-F", type=float_or_disabled, default=0.5,
-        help="Specifies how much a horizontal and vertical position of a "
-             "text matters when determining the order of lines. The value "
-             "should be within the range of -1.0 (only horizontal position "
-             "matters) to +1.0 (only vertical position matters). You can also "
-             "pass `disabled` to disable advanced layout analysis, and "
-             "instead return text based on the position of the bottom left "
-             "corner of the text box.")
-    la_params.add_argument(
-        "--all-texts", "-A", default=False, action="store_true",
-        help="If layout analysis should be performed on text in figures.")
-
-    output_params = parser.add_argument_group(
-        'Output', description='Used during output generation.')
-    output_params.add_argument(
-        "--outfile", "-o", type=str, default="-",
-        help="Path to file where output is written. "
-             "Or \"-\" (default) to write to stdout.")
-    output_params.add_argument(
-        "--output_type", "-t", type=str, default="text",
-        help="Type of output to generate {text,html,xml,tag}.")
-    output_params.add_argument(
-        "--codec", "-c", type=str, default="utf-8",
-        help="Text encoding to use in output file.")
-    output_params.add_argument(
-        "--output-dir", "-O", default=None,
-        help="The output directory to put extracted images in. If not given, "
-             "images are not extracted.")
-    output_params.add_argument(
-        "--layoutmode", "-Y", default="normal",
-        type=str, help="Type of layout to use when generating html "
-                       "{normal,exact,loose}. If normal,each line is"
-                       " positioned separately in the html. If exact"
-                       ", each character is positioned separately in"
-                       " the html. If loose, same result as normal "
-                       "but with an additional newline after each "
-                       "text line. Only used when output_type is html.")
-    output_params.add_argument(
-        "--scale", "-s", type=float, default=1.0,
-        help="The amount of zoom to use when generating html file. "
-             "Only used when output_type is html.")
-    output_params.add_argument(
-        "--strip-control", "-S", default=False, action="store_true",
-        help="Remove control statement from text. "
-             "Only used when output_type is xml.")
-    return parser
-
-
-# main
-
-
-def main(args=None):
-
-    P = maketheparser()
-    A = P.parse_args(args=args)
-
-    if A.page_numbers:
-        A.page_numbers = {x-1 for x in A.page_numbers}
-    if A.pagenos:
-        A.page_numbers = {int(x)-1 for x in A.pagenos.split(",")}
-
-    if A.output_type == "text" and A.outfile != "-":
-        for override, alttype in OUTPUT_TYPES:
-            if A.outfile.endswith(override):
-                A.output_type = alttype
-
-    outfp = extract_text(**vars(A))
-    outfp.close()
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
@@ -1,12 +0,0 @@
-import sys
-import warnings
-
-
-__version__ = '20201018'
-
-if sys.version_info < (3, 6):
-    warnings.warn('Python 3.4 and 3.5 are deprecated. '
-                  'Please upgrade to Python 3.6 or newer.')
-
-if __name__ == '__main__':
-    print(__version__)
@@ -1,35 +0,0 @@
-""" Python implementation of Arcfour encryption algorithm.
-See https://en.wikipedia.org/wiki/RC4
-This code is in the public domain.
-
-"""
-
-
-class Arcfour:
-
-    def __init__(self, key):
-        # because Py3 range is not indexable
-        s = [i for i in range(256)]
-        j = 0
-        klen = len(key)
-        for i in range(256):
-            j = (j + s[i] + key[i % klen]) % 256
-            (s[i], s[j]) = (s[j], s[i])
-        self.s = s
-        (self.i, self.j) = (0, 0)
-        return
-
-    def process(self, data):
-        (i, j) = (self.i, self.j)
-        s = self.s
-        r = b''
-        for c in iter(data):
-            i = (i+1) % 256
-            j = (j+s[i]) % 256
-            (s[i], s[j]) = (s[j], s[i])
-            k = s[(s[i]+s[j]) % 256]
-            r += bytes((c ^ k,))
-        (self.i, self.j) = (i, j)
-        return r
-
-    encrypt = decrypt = process
@@ -1,71 +0,0 @@
-""" Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
-
-This code is in the public domain.
-
-"""
-
-import re
-import struct
-
-
-# ascii85decode(data)
-def ascii85decode(data):
-    """
-    In ASCII85 encoding, every four bytes are encoded with five ASCII
-    letters, using 85 different types of characters (as 256**4 < 85**5).
-    When the length of the original bytes is not a multiple of 4, a special
-    rule is used for round up.
-
-    The Adobe's ASCII85 implementation is slightly different from
-    its original in handling the last characters.
-
-    """
-    n = b = 0
-    out = b''
-    for i in iter(data):
-        c = bytes((i,))
-        if b'!' <= c and c <= b'u':
-            n += 1
-            b = b*85+(ord(c)-33)
-            if n == 5:
-                out += struct.pack('>L', b)
-                n = b = 0
-        elif c == b'z':
-            assert n == 0, str(n)
-            out += b'\0\0\0\0'
-        elif c == b'~':
-            if n:
-                for _ in range(5-n):
-                    b = b*85+84
-                out += struct.pack('>L', b)[:n-1]
-            break
-    return out
-
-
-# asciihexdecode(data)
-hex_re = re.compile(br'([a-f\d]{2})', re.IGNORECASE)
-trail_re = re.compile(br'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
-
-
-def asciihexdecode(data):
-    """
-    ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
-    For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
-    ASCIIHexDecode filter produces one byte of binary data. All white-space
-    characters are ignored. A right angle bracket character (>) indicates
-    EOD. Any other characters will cause an error. If the filter encounters
-    the EOD marker after reading an odd number of hexadecimal digits, it
-    will behave as if a 0 followed the last digit.
-    """
-    def decode(x):
-        i = int(x, 16)
-        return bytes((i,))
-
-    out = b''
-    for x in hex_re.findall(data):
-        out += decode(x)
-
-    m = trail_re.search(data)
-    if m:
-        out += decode(m.group(1)+b'0')
-    return out
@@ -1,593 +0,0 @@
-# CCITT Fax decoder
-#
-# Bugs: uncompressed mode untested.
-#
-# cf.
-#  ITU-T Recommendation T.4
-#    "Standardization of Group 3 facsimile terminals
-#    for document transmission"
-#  ITU-T Recommendation T.6
-#    "FACSIMILE CODING SCHEMES AND CODING CONTROL FUNCTIONS
-#    FOR GROUP 4 FACSIMILE APPARATUS"
-
-
-import sys
-import array
-
-
-def get_bytes(data):
-    yield from data
-
-
-class BitParser:
-    def __init__(self):
-        self._pos = 0
-        return
-
-    @classmethod
-    def add(cls, root, v, bits):
-        p = root
-        b = None
-        for i in range(len(bits)):
-            if 0 < i:
-                if p[b] is None:
-                    p[b] = [None, None]
-                p = p[b]
-            if bits[i] == '1':
-                b = 1
-            else:
-                b = 0
-        p[b] = v
-        return
-
-    def feedbytes(self, data):
-        for byte in get_bytes(data):
-            for m in (128, 64, 32, 16, 8, 4, 2, 1):
-                self._parse_bit(byte & m)
-        return
-
-    def _parse_bit(self, x):
-        if x:
-            v = self._state[1]
-        else:
-            v = self._state[0]
-        self._pos += 1
-        if isinstance(v, list):
-            self._state = v
-        else:
-            self._state = self._accept(v)
-        return
-
-
-class CCITTG4Parser(BitParser):
-
-    MODE = [None, None]
-    BitParser.add(MODE, 0,   '1')
-    BitParser.add(MODE, +1,  '011')
-    BitParser.add(MODE, -1,  '010')
-    BitParser.add(MODE, 'h', '001')
-    BitParser.add(MODE, 'p', '0001')
-    BitParser.add(MODE, +2,  '000011')
-    BitParser.add(MODE, -2,  '000010')
-    BitParser.add(MODE, +3,  '0000011')
-    BitParser.add(MODE, -3,  '0000010')
-    BitParser.add(MODE, 'u', '0000001111')
-    BitParser.add(MODE, 'x1', '0000001000')
-    BitParser.add(MODE, 'x2', '0000001001')
-    BitParser.add(MODE, 'x3', '0000001010')
-    BitParser.add(MODE, 'x4', '0000001011')
-    BitParser.add(MODE, 'x5', '0000001100')
-    BitParser.add(MODE, 'x6', '0000001101')
-    BitParser.add(MODE, 'x7', '0000001110')
-    BitParser.add(MODE, 'e', '000000000001000000000001')
-
-    WHITE = [None, None]
-    BitParser.add(WHITE, 0, '00110101')
-    BitParser.add(WHITE, 1, '000111')
-    BitParser.add(WHITE, 2, '0111')
-    BitParser.add(WHITE, 3, '1000')
-    BitParser.add(WHITE, 4, '1011')
-    BitParser.add(WHITE, 5, '1100')
-    BitParser.add(WHITE, 6, '1110')
-    BitParser.add(WHITE, 7, '1111')
-    BitParser.add(WHITE, 8, '10011')
-    BitParser.add(WHITE, 9, '10100')
-    BitParser.add(WHITE, 10, '00111')
-    BitParser.add(WHITE, 11, '01000')
-    BitParser.add(WHITE, 12, '001000')
-    BitParser.add(WHITE, 13, '000011')
-    BitParser.add(WHITE, 14, '110100')
-    BitParser.add(WHITE, 15, '110101')
-    BitParser.add(WHITE, 16, '101010')
-    BitParser.add(WHITE, 17, '101011')
-    BitParser.add(WHITE, 18, '0100111')
-    BitParser.add(WHITE, 19, '0001100')
-    BitParser.add(WHITE, 20, '0001000')
-    BitParser.add(WHITE, 21, '0010111')
-    BitParser.add(WHITE, 22, '0000011')
-    BitParser.add(WHITE, 23, '0000100')
-    BitParser.add(WHITE, 24, '0101000')
-    BitParser.add(WHITE, 25, '0101011')
-    BitParser.add(WHITE, 26, '0010011')
-    BitParser.add(WHITE, 27, '0100100')
-    BitParser.add(WHITE, 28, '0011000')
-    BitParser.add(WHITE, 29, '00000010')
-    BitParser.add(WHITE, 30, '00000011')
-    BitParser.add(WHITE, 31, '00011010')
-    BitParser.add(WHITE, 32, '00011011')
-    BitParser.add(WHITE, 33, '00010010')
-    BitParser.add(WHITE, 34, '00010011')
-    BitParser.add(WHITE, 35, '00010100')
-    BitParser.add(WHITE, 36, '00010101')
-    BitParser.add(WHITE, 37, '00010110')
-    BitParser.add(WHITE, 38, '00010111')
-    BitParser.add(WHITE, 39, '00101000')
-    BitParser.add(WHITE, 40, '00101001')
-    BitParser.add(WHITE, 41, '00101010')
-    BitParser.add(WHITE, 42, '00101011')
-    BitParser.add(WHITE, 43, '00101100')
-    BitParser.add(WHITE, 44, '00101101')
-    BitParser.add(WHITE, 45, '00000100')
-    BitParser.add(WHITE, 46, '00000101')
-    BitParser.add(WHITE, 47, '00001010')
-    BitParser.add(WHITE, 48, '00001011')
-    BitParser.add(WHITE, 49, '01010010')
-    BitParser.add(WHITE, 50, '01010011')
-    BitParser.add(WHITE, 51, '01010100')
-    BitParser.add(WHITE, 52, '01010101')
-    BitParser.add(WHITE, 53, '00100100')
-    BitParser.add(WHITE, 54, '00100101')
-    BitParser.add(WHITE, 55, '01011000')
-    BitParser.add(WHITE, 56, '01011001')
-    BitParser.add(WHITE, 57, '01011010')
-    BitParser.add(WHITE, 58, '01011011')
-    BitParser.add(WHITE, 59, '01001010')
-    BitParser.add(WHITE, 60, '01001011')
-    BitParser.add(WHITE, 61, '00110010')
-    BitParser.add(WHITE, 62, '00110011')
-    BitParser.add(WHITE, 63, '00110100')
-    BitParser.add(WHITE, 64, '11011')
-    BitParser.add(WHITE, 128, '10010')
-    BitParser.add(WHITE, 192, '010111')
-    BitParser.add(WHITE, 256, '0110111')
-    BitParser.add(WHITE, 320, '00110110')
-    BitParser.add(WHITE, 384, '00110111')
-    BitParser.add(WHITE, 448, '01100100')
-    BitParser.add(WHITE, 512, '01100101')
-    BitParser.add(WHITE, 576, '01101000')
-    BitParser.add(WHITE, 640, '01100111')
-    BitParser.add(WHITE, 704, '011001100')
-    BitParser.add(WHITE, 768, '011001101')
-    BitParser.add(WHITE, 832, '011010010')
-    BitParser.add(WHITE, 896, '011010011')
-    BitParser.add(WHITE, 960, '011010100')
-    BitParser.add(WHITE, 1024, '011010101')
-    BitParser.add(WHITE, 1088, '011010110')
-    BitParser.add(WHITE, 1152, '011010111')
-    BitParser.add(WHITE, 1216, '011011000')
-    BitParser.add(WHITE, 1280, '011011001')
-    BitParser.add(WHITE, 1344, '011011010')
-    BitParser.add(WHITE, 1408, '011011011')
-    BitParser.add(WHITE, 1472, '010011000')
-    BitParser.add(WHITE, 1536, '010011001')
-    BitParser.add(WHITE, 1600, '010011010')
-    BitParser.add(WHITE, 1664, '011000')
-    BitParser.add(WHITE, 1728, '010011011')
-    BitParser.add(WHITE, 1792, '00000001000')
-    BitParser.add(WHITE, 1856, '00000001100')
-    BitParser.add(WHITE, 1920, '00000001101')
-    BitParser.add(WHITE, 1984, '000000010010')
-    BitParser.add(WHITE, 2048, '000000010011')
-    BitParser.add(WHITE, 2112, '000000010100')
-    BitParser.add(WHITE, 2176, '000000010101')
-    BitParser.add(WHITE, 2240, '000000010110')
-    BitParser.add(WHITE, 2304, '000000010111')
-    BitParser.add(WHITE, 2368, '000000011100')
-    BitParser.add(WHITE, 2432, '000000011101')
-    BitParser.add(WHITE, 2496, '000000011110')
-    BitParser.add(WHITE, 2560, '000000011111')
-
-    BLACK = [None, None]
-    BitParser.add(BLACK, 0, '0000110111')
-    BitParser.add(BLACK, 1, '010')
-    BitParser.add(BLACK, 2, '11')
-    BitParser.add(BLACK, 3, '10')
-    BitParser.add(BLACK, 4, '011')
-    BitParser.add(BLACK, 5, '0011')
-    BitParser.add(BLACK, 6, '0010')
-    BitParser.add(BLACK, 7, '00011')
-    BitParser.add(BLACK, 8, '000101')
-    BitParser.add(BLACK, 9, '000100')
-    BitParser.add(BLACK, 10, '0000100')
-    BitParser.add(BLACK, 11, '0000101')
-    BitParser.add(BLACK, 12, '0000111')
-    BitParser.add(BLACK, 13, '00000100')
-    BitParser.add(BLACK, 14, '00000111')
-    BitParser.add(BLACK, 15, '000011000')
-    BitParser.add(BLACK, 16, '0000010111')
-    BitParser.add(BLACK, 17, '0000011000')
-    BitParser.add(BLACK, 18, '0000001000')
-    BitParser.add(BLACK, 19, '00001100111')
-    BitParser.add(BLACK, 20, '00001101000')
-    BitParser.add(BLACK, 21, '00001101100')
-    BitParser.add(BLACK, 22, '00000110111')
-    BitParser.add(BLACK, 23, '00000101000')
-    BitParser.add(BLACK, 24, '00000010111')
-    BitParser.add(BLACK, 25, '00000011000')
-    BitParser.add(BLACK, 26, '000011001010')
-    BitParser.add(BLACK, 27, '000011001011')
-    BitParser.add(BLACK, 28, '000011001100')
-    BitParser.add(BLACK, 29, '000011001101')
-    BitParser.add(BLACK, 30, '000001101000')
-    BitParser.add(BLACK, 31, '000001101001')
-    BitParser.add(BLACK, 32, '000001101010')
-    BitParser.add(BLACK, 33, '000001101011')
-    BitParser.add(BLACK, 34, '000011010010')
-    BitParser.add(BLACK, 35, '000011010011')
-    BitParser.add(BLACK, 36, '000011010100')
-    BitParser.add(BLACK, 37, '000011010101')
-    BitParser.add(BLACK, 38, '000011010110')
-    BitParser.add(BLACK, 39, '000011010111')
-    BitParser.add(BLACK, 40, '000001101100')
-    BitParser.add(BLACK, 41, '000001101101')
-    BitParser.add(BLACK, 42, '000011011010')
-    BitParser.add(BLACK, 43, '000011011011')
-    BitParser.add(BLACK, 44, '000001010100')
-    BitParser.add(BLACK, 45, '000001010101')
-    BitParser.add(BLACK, 46, '000001010110')
-    BitParser.add(BLACK, 47, '000001010111')
-    BitParser.add(BLACK, 48, '000001100100')
-    BitParser.add(BLACK, 49, '000001100101')
-    BitParser.add(BLACK, 50, '000001010010')
-    BitParser.add(BLACK, 51, '000001010011')
-    BitParser.add(BLACK, 52, '000000100100')
-    BitParser.add(BLACK, 53, '000000110111')
-    BitParser.add(BLACK, 54, '000000111000')
-    BitParser.add(BLACK, 55, '000000100111')
-    BitParser.add(BLACK, 56, '000000101000')
-    BitParser.add(BLACK, 57, '000001011000')
-    BitParser.add(BLACK, 58, '000001011001')
-    BitParser.add(BLACK, 59, '000000101011')
-    BitParser.add(BLACK, 60, '000000101100')
-    BitParser.add(BLACK, 61, '000001011010')
-    BitParser.add(BLACK, 62, '000001100110')
-    BitParser.add(BLACK, 63, '000001100111')
-    BitParser.add(BLACK, 64, '0000001111')
-    BitParser.add(BLACK, 128, '000011001000')
-    BitParser.add(BLACK, 192, '000011001001')
-    BitParser.add(BLACK, 256, '000001011011')
-    BitParser.add(BLACK, 320, '000000110011')
-    BitParser.add(BLACK, 384, '000000110100')
-    BitParser.add(BLACK, 448, '000000110101')
-    BitParser.add(BLACK, 512, '0000001101100')
-    BitParser.add(BLACK, 576, '0000001101101')
-    BitParser.add(BLACK, 640, '0000001001010')
-    BitParser.add(BLACK, 704, '0000001001011')
-    BitParser.add(BLACK, 768, '0000001001100')
-    BitParser.add(BLACK, 832, '0000001001101')
-    BitParser.add(BLACK, 896, '0000001110010')
-    BitParser.add(BLACK, 960, '0000001110011')
-    BitParser.add(BLACK, 1024, '0000001110100')
-    BitParser.add(BLACK, 1088, '0000001110101')
-    BitParser.add(BLACK, 1152, '0000001110110')
-    BitParser.add(BLACK, 1216, '0000001110111')
-    BitParser.add(BLACK, 1280, '0000001010010')
-    BitParser.add(BLACK, 1344, '0000001010011')
-    BitParser.add(BLACK, 1408, '0000001010100')
-    BitParser.add(BLACK, 1472, '0000001010101')
-    BitParser.add(BLACK, 1536, '0000001011010')
-    BitParser.add(BLACK, 1600, '0000001011011')
-    BitParser.add(BLACK, 1664, '0000001100100')
-    BitParser.add(BLACK, 1728, '0000001100101')
-    BitParser.add(BLACK, 1792, '00000001000')
-    BitParser.add(BLACK, 1856, '00000001100')
-    BitParser.add(BLACK, 1920, '00000001101')
-    BitParser.add(BLACK, 1984, '000000010010')
-    BitParser.add(BLACK, 2048, '000000010011')
-    BitParser.add(BLACK, 2112, '000000010100')
-    BitParser.add(BLACK, 2176, '000000010101')
-    BitParser.add(BLACK, 2240, '000000010110')
-    BitParser.add(BLACK, 2304, '000000010111')
-    BitParser.add(BLACK, 2368, '000000011100')
-    BitParser.add(BLACK, 2432, '000000011101')
-    BitParser.add(BLACK, 2496, '000000011110')
-    BitParser.add(BLACK, 2560, '000000011111')
-
-    UNCOMPRESSED = [None, None]
-    BitParser.add(UNCOMPRESSED, '1', '1')
-    BitParser.add(UNCOMPRESSED, '01', '01')
-    BitParser.add(UNCOMPRESSED, '001', '001')
-    BitParser.add(UNCOMPRESSED, '0001', '0001')
-    BitParser.add(UNCOMPRESSED, '00001', '00001')
-    BitParser.add(UNCOMPRESSED, '00000', '000001')
-    BitParser.add(UNCOMPRESSED, 'T00', '00000011')
-    BitParser.add(UNCOMPRESSED, 'T10', '00000010')
-    BitParser.add(UNCOMPRESSED, 'T000', '000000011')
-    BitParser.add(UNCOMPRESSED, 'T100', '000000010')
-    BitParser.add(UNCOMPRESSED, 'T0000', '0000000011')
-    BitParser.add(UNCOMPRESSED, 'T1000', '0000000010')
-    BitParser.add(UNCOMPRESSED, 'T00000', '00000000011')
-    BitParser.add(UNCOMPRESSED, 'T10000', '00000000010')
-
-    class EOFB(Exception):
-        pass
-
-    class InvalidData(Exception):
-        pass
-
-    class ByteSkip(Exception):
-        pass
-
-    def __init__(self, width, bytealign=False):
-        BitParser.__init__(self)
-        self.width = width
-        self.bytealign = bytealign
-        self.reset()
-        return
-
-    def feedbytes(self, data):
-        for byte in get_bytes(data):
-            try:
-                for m in (128, 64, 32, 16, 8, 4, 2, 1):
-                    self._parse_bit(byte & m)
-            except self.ByteSkip:
-                self._accept = self._parse_mode
-                self._state = self.MODE
-            except self.EOFB:
-                break
-        return
-
-    def _parse_mode(self, mode):
-        if mode == 'p':
-            self._do_pass()
-            self._flush_line()
-            return self.MODE
-        elif mode == 'h':
-            self._n1 = 0
-            self._accept = self._parse_horiz1
-            if self._color:
-                return self.WHITE
-            else:
-                return self.BLACK
-        elif mode == 'u':
-            self._accept = self._parse_uncompressed
-            return self.UNCOMPRESSED
-        elif mode == 'e':
-            raise self.EOFB
-        elif isinstance(mode, int):
-            self._do_vertical(mode)
-            self._flush_line()
-            return self.MODE
-        else:
-            raise self.InvalidData(mode)
-
-    def _parse_horiz1(self, n):
-        if n is None:
-            raise self.InvalidData
-        self._n1 += n
-        if n < 64:
-            self._n2 = 0
-            self._color = 1-self._color
-            self._accept = self._parse_horiz2
-        if self._color:
-            return self.WHITE
-        else:
-            return self.BLACK
-
-    def _parse_horiz2(self, n):
-        if n is None:
-            raise self.InvalidData
-        self._n2 += n
-        if n < 64:
-            self._color = 1-self._color
-            self._accept = self._parse_mode
-            self._do_horizontal(self._n1, self._n2)
-            self._flush_line()
-            return self.MODE
-        elif self._color:
-            return self.WHITE
-        else:
-            return self.BLACK
-
-    def _parse_uncompressed(self, bits):
-        if not bits:
-            raise self.InvalidData
-        if bits.startswith('T'):
-            self._accept = self._parse_mode
-            self._color = int(bits[1])
-            self._do_uncompressed(bits[2:])
-            return self.MODE
-        else:
-            self._do_uncompressed(bits)
-            return self.UNCOMPRESSED
-
-    def _get_bits(self):
-        return ''.join(str(b) for b in self._curline[:self._curpos])
-
-    def _get_refline(self, i):
-        if i < 0:
-            return '[]'+''.join(str(b) for b in self._refline)
-        elif len(self._refline) <= i:
-            return ''.join(str(b) for b in self._refline)+'[]'
-        else:
-            return (''.join(str(b) for b in self._refline[:i]) +
-                    '['+str(self._refline[i])+']' +
-                    ''.join(str(b) for b in self._refline[i+1:]))
-
-    def reset(self):
-        self._y = 0
-        self._curline = array.array('b', [1]*self.width)
-        self._reset_line()
-        self._accept = self._parse_mode
-        self._state = self.MODE
-        return
-
-    def output_line(self, y, bits):
-        print(y, ''.join(str(b) for b in bits))
-        return
-
-    def _reset_line(self):
-        self._refline = self._curline
-        self._curline = array.array('b', [1]*self.width)
-        self._curpos = -1
-        self._color = 1
-        return
-
-    def _flush_line(self):
-        if self.width <= self._curpos:
-            self.output_line(self._y, self._curline)
-            self._y += 1
-            self._reset_line()
-            if self.bytealign:
-                raise self.ByteSkip
-        return
-
-    def _do_vertical(self, dx):
-        x1 = self._curpos+1
-        while 1:
-            if x1 == 0:
-                if (self._color == 1 and self._refline[x1] != self._color):
-                    break
-            elif x1 == len(self._refline):
-                break
-            elif (self._refline[x1-1] == self._color and
-                  self._refline[x1] != self._color):
-                break
-            x1 += 1
-        x1 += dx
-        x0 = max(0, self._curpos)
-        x1 = max(0, min(self.width, x1))
-        if x1 < x0:
-            for x in range(x1, x0):
-                self._curline[x] = self._color
-        elif x0 < x1:
-            for x in range(x0, x1):
-                self._curline[x] = self._color
-        self._curpos = x1
-        self._color = 1-self._color
-        return
-
-    def _do_pass(self):
-        x1 = self._curpos+1
-        while 1:
-            if x1 == 0:
-                if (self._color == 1 and self._refline[x1] != self._color):
-                    break
-            elif x1 == len(self._refline):
-                break
-            elif (self._refline[x1-1] == self._color and
-                  self._refline[x1] != self._color):
-                break
-            x1 += 1
-        while 1:
-            if x1 == 0:
-                if (self._color == 0 and self._refline[x1] == self._color):
-                    break
-            elif x1 == len(self._refline):
-                break
-            elif (self._refline[x1-1] != self._color and
-                  self._refline[x1] == self._color):
-                break
-            x1 += 1
-        for x in range(self._curpos, x1):
-            self._curline[x] = self._color
-        self._curpos = x1
-        return
-
-    def _do_horizontal(self, n1, n2):
-        if self._curpos < 0:
-            self._curpos = 0
-        x = self._curpos
-        for _ in range(n1):
-            if len(self._curline) <= x:
-                break
-            self._curline[x] = self._color
-            x += 1
-        for _ in range(n2):
-            if len(self._curline) <= x:
-                break
-            self._curline[x] = 1-self._color
-            x += 1
-        self._curpos = x
-        return
-
-    def _do_uncompressed(self, bits):
-        for c in bits:
-            self._curline[self._curpos] = int(c)
-            self._curpos += 1
-            self._flush_line()
-        return
-
-
-class CCITTFaxDecoder(CCITTG4Parser):
-
-    def __init__(self, width, bytealign=False, reversed=False):
-        CCITTG4Parser.__init__(self, width, bytealign=bytealign)
-        self.reversed = reversed
-        self._buf = b''
-        return
-
-    def close(self):
-        return self._buf
-
-    def output_line(self, y, bits):
-        bytes = array.array('B', [0]*((len(bits)+7)//8))
-        if self.reversed:
-            bits = [1-b for b in bits]
-        for (i, b) in enumerate(bits):
-            if b:
-                bytes[i//8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
-        self._buf += bytes.tostring()
-        return
-
-
-def ccittfaxdecode(data, params):
-    K = params.get('K')
-    cols = params.get('Columns')
-    bytealign = params.get('EncodedByteAlign')
-    reversed = params.get('BlackIs1')
-    if K == -1:
-        parser = CCITTFaxDecoder(cols, bytealign=bytealign, reversed=reversed)
-    else:
-        raise ValueError(K)
-    parser.feedbytes(data)
-    return parser.close()
-
-
-# test
-def main(argv):
-    if not argv[1:]:
-        import unittest
-        return unittest.main()
-
-    class Parser(CCITTG4Parser):
-        def __init__(self, width, bytealign=False):
-            import pygame
-            CCITTG4Parser.__init__(self, width, bytealign=bytealign)
-            self.img = pygame.Surface((self.width, 1000))
-            return
-
-        def output_line(self, y, bits):
-            for (x, b) in enumerate(bits):
-                if b:
-                    self.img.set_at((x, y), (255, 255, 255))
-                else:
-                    self.img.set_at((x, y), (0, 0, 0))
-            return
-
-        def close(self):
-            import pygame
-            pygame.image.save(self.img, 'out.bmp')
-            return
-    for path in argv[1:]:
-        fp = open(path, 'rb')
-        (_, _, k, w, h, _) = path.split('.')
-        parser = Parser(int(w))
-        parser.feedbytes(fp.read())
-        parser.close()
-        fp.close()
-    return
-
-
-if __name__ == '__main__':
-    sys.exit(main(sys.argv))
@@ -1,425 +0,0 @@
-""" Adobe character mapping (CMap) support.
-
-CMaps provide the mapping between character codes and Unicode
-code-points to character ids (CIDs).
-
-More information is available on the Adobe website:
-
-  http://opensource.adobe.com/wiki/display/cmap/CMap+Resources
-
-"""
-
-import sys
-import os
-import os.path
-import gzip
-import pickle as pickle
-import struct
-import logging
-from .psparser import PSStackParser
-from .psparser import PSSyntaxError
-from .psparser import PSEOF
-from .psparser import PSLiteral
-from .psparser import literal_name
-from .psparser import KWD
-from .encodingdb import name2unicode
-from .utils import choplist
-from .utils import nunpack
-
-
-log = logging.getLogger(__name__)
-
-
-class CMapError(Exception):
-    pass
-
-
-class CMapBase:
-
-    debug = 0
-
-    def __init__(self, **kwargs):
-        self.attrs = kwargs.copy()
-        return
-
-    def is_vertical(self):
-        return self.attrs.get('WMode', 0) != 0
-
-    def set_attr(self, k, v):
-        self.attrs[k] = v
-        return
-
-    def add_code2cid(self, code, cid):
-        return
-
-    def add_cid2unichr(self, cid, code):
-        return
-
-    def use_cmap(self, cmap):
-        return
-
-
-class CMap(CMapBase):
-
-    def __init__(self, **kwargs):
-        CMapBase.__init__(self, **kwargs)
-        self.code2cid = {}
-        return
-
-    def __repr__(self):
-        return '<CMap: %s>' % self.attrs.get('CMapName')
-
-    def use_cmap(self, cmap):
-        assert isinstance(cmap, CMap), str(type(cmap))
-
-        def copy(dst, src):
-            for (k, v) in src.items():
-                if isinstance(v, dict):
-                    d = {}
-                    dst[k] = d
-                    copy(d, v)
-                else:
-                    dst[k] = v
-        copy(self.code2cid, cmap.code2cid)
-        return
-
-    def decode(self, code):
-        log.debug('decode: %r, %r', self, code)
-        d = self.code2cid
-        for i in iter(code):
-            if i in d:
-                d = d[i]
-                if isinstance(d, int):
-                    yield d
-                    d = self.code2cid
-            else:
-                d = self.code2cid
-        return
-
-    def dump(self, out=sys.stdout, code2cid=None, code=None):
-        if code2cid is None:
-            code2cid = self.code2cid
-            code = ()
-        for (k, v) in sorted(code2cid.items()):
-            c = code+(k,)
-            if isinstance(v, int):
-                out.write('code %r = cid %d\n' % (c, v))
-            else:
-                self.dump(out=out, code2cid=v, code=c)
-        return
-
-
-class IdentityCMap(CMapBase):
-
-    def decode(self, code):
-        n = len(code)//2
-        if n:
-            return struct.unpack('>%dH' % n, code)
-        else:
-            return ()
-
-
-class IdentityCMapByte(IdentityCMap):
-
-    def decode(self, code):
-        n = len(code)
-        if n:
-            return struct.unpack('>%dB' % n, code)
-        else:
-            return ()
-
-
-class UnicodeMap(CMapBase):
-
-    def __init__(self, **kwargs):
-        CMapBase.__init__(self, **kwargs)
-        self.cid2unichr = {}
-        return
-
-    def __repr__(self):
-        return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
-
-    def get_unichr(self, cid):
-        log.debug('get_unichr: %r, %r', self, cid)
-        return self.cid2unichr[cid]
-
-    def dump(self, out=sys.stdout):
-        for (k, v) in sorted(self.cid2unichr.items()):
-            out.write('cid %d = unicode %r\n' % (k, v))
-        return
-
-
-class FileCMap(CMap):
-
-    def add_code2cid(self, code, cid):
-        assert isinstance(code, str) and isinstance(cid, int),\
-            str((type(code), type(cid)))
-        d = self.code2cid
-        for c in code[:-1]:
-            c = ord(c)
-            if c in d:
-                d = d[c]
-            else:
-                t = {}
-                d[c] = t
-                d = t
-        c = ord(code[-1])
-        d[c] = cid
-        return
-
-
-class FileUnicodeMap(UnicodeMap):
-
-    def add_cid2unichr(self, cid, code):
-        assert isinstance(cid, int), str(type(cid))
-        if isinstance(code, PSLiteral):
-            # Interpret as an Adobe glyph name.
-            self.cid2unichr[cid] = name2unicode(code.name)
-        elif isinstance(code, bytes):
-            # Interpret as UTF-16BE.
-            self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore')
-        elif isinstance(code, int):
-            self.cid2unichr[cid] = chr(code)
-        else:
-            raise TypeError(code)
-        return
-
-
-class PyCMap(CMap):
-
-    def __init__(self, name, module):
-        CMap.__init__(self, CMapName=name)
-        self.code2cid = module.CODE2CID
-        if module.IS_VERTICAL:
-            self.attrs['WMode'] = 1
-        return
-
-
-class PyUnicodeMap(UnicodeMap):
-
-    def __init__(self, name, module, vertical):
-        UnicodeMap.__init__(self, CMapName=name)
-        if vertical:
-            self.cid2unichr = module.CID2UNICHR_V
-            self.attrs['WMode'] = 1
-        else:
-            self.cid2unichr = module.CID2UNICHR_H
-        return
-
-
-class CMapDB:
-
-    _cmap_cache = {}
-    _umap_cache = {}
-
-    class CMapNotFound(CMapError):
-        pass
-
-    @classmethod
-    def _load_data(cls, name):
-        name = name.replace("\0", "")
-        filename = '%s.pickle.gz' % name
-        log.info('loading: %r', name)
-        cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'),
-                      os.path.join(os.path.dirname(__file__), 'cmap'),)
-        for directory in cmap_paths:
-            path = os.path.join(directory, filename)
-            if os.path.exists(path):
-                gzfile = gzip.open(path)
-                try:
-                    return type(str(name), (), pickle.loads(gzfile.read()))
-                finally:
-                    gzfile.close()
-        else:
-            raise CMapDB.CMapNotFound(name)
-
-    @classmethod
-    def get_cmap(cls, name):
-        if name == 'Identity-H':
-            return IdentityCMap(WMode=0)
-        elif name == 'Identity-V':
-            return IdentityCMap(WMode=1)
-        elif name == 'OneByteIdentityH':
-            return IdentityCMapByte(WMode=0)
-        elif name == 'OneByteIdentityV':
-            return IdentityCMapByte(WMode=1)
-        try:
-            return cls._cmap_cache[name]
-        except KeyError:
-            pass
-        data = cls._load_data(name)
-        cls._cmap_cache[name] = cmap = PyCMap(name, data)
-        return cmap
-
-    @classmethod
-    def get_unicode_map(cls, name, vertical=False):
-        try:
-            return cls._umap_cache[name][vertical]
-        except KeyError:
-            pass
-        data = cls._load_data('to-unicode-%s' % name)
-        cls._umap_cache[name] = [PyUnicodeMap(name, data, v)
-                                 for v in (False, True)]
-        return cls._umap_cache[name][vertical]
-
-
-class CMapParser(PSStackParser):
-
-    def __init__(self, cmap, fp):
-        PSStackParser.__init__(self, fp)
-        self.cmap = cmap
-        # some ToUnicode maps don't have "begincmap" keyword.
-        self._in_cmap = True
-        return
-
-    def run(self):
-        try:
-            self.nextobject()
-        except PSEOF:
-            pass
-        return
-
-    KEYWORD_BEGINCMAP = KWD(b'begincmap')
-    KEYWORD_ENDCMAP = KWD(b'endcmap')
-    KEYWORD_USECMAP = KWD(b'usecmap')
-    KEYWORD_DEF = KWD(b'def')
-    KEYWORD_BEGINCODESPACERANGE = KWD(b'begincodespacerange')
-    KEYWORD_ENDCODESPACERANGE = KWD(b'endcodespacerange')
-    KEYWORD_BEGINCIDRANGE = KWD(b'begincidrange')
-    KEYWORD_ENDCIDRANGE = KWD(b'endcidrange')
-    KEYWORD_BEGINCIDCHAR = KWD(b'begincidchar')
-    KEYWORD_ENDCIDCHAR = KWD(b'endcidchar')
-    KEYWORD_BEGINBFRANGE = KWD(b'beginbfrange')
-    KEYWORD_ENDBFRANGE = KWD(b'endbfrange')
-    KEYWORD_BEGINBFCHAR = KWD(b'beginbfchar')
-    KEYWORD_ENDBFCHAR = KWD(b'endbfchar')
-    KEYWORD_BEGINNOTDEFRANGE = KWD(b'beginnotdefrange')
-    KEYWORD_ENDNOTDEFRANGE = KWD(b'endnotdefrange')
-
-    def do_keyword(self, pos, token):
-        if token is self.KEYWORD_BEGINCMAP:
-            self._in_cmap = True
-            self.popall()
-            return
-        elif token is self.KEYWORD_ENDCMAP:
-            self._in_cmap = False
-            return
-        if not self._in_cmap:
-            return
-        #
-        if token is self.KEYWORD_DEF:
-            try:
-                ((_, k), (_, v)) = self.pop(2)
-                self.cmap.set_attr(literal_name(k), v)
-            except PSSyntaxError:
-                pass
-            return
-
-        if token is self.KEYWORD_USECMAP:
-            try:
-                ((_, cmapname),) = self.pop(1)
-                self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
-            except PSSyntaxError:
-                pass
-            except CMapDB.CMapNotFound:
-                pass
-            return
-
-        if token is self.KEYWORD_BEGINCODESPACERANGE:
-            self.popall()
-            return
-        if token is self.KEYWORD_ENDCODESPACERANGE:
-            self.popall()
-            return
-
-        if token is self.KEYWORD_BEGINCIDRANGE:
-            self.popall()
-            return
-        if token is self.KEYWORD_ENDCIDRANGE:
-            objs = [obj for (__, obj) in self.popall()]
-            for (s, e, cid) in choplist(3, objs):
-                if (not isinstance(s, str) or not isinstance(e, str) or
-                   not isinstance(cid, int) or len(s) != len(e)):
-                    continue
-                sprefix = s[:-4]
-                eprefix = e[:-4]
-                if sprefix != eprefix:
-                    continue
-                svar = s[-4:]
-                evar = e[-4:]
-                s1 = nunpack(svar)
-                e1 = nunpack(evar)
-                vlen = len(svar)
-                for i in range(e1-s1+1):
-                    x = sprefix+struct.pack('>L', s1+i)[-vlen:]
-                    self.cmap.add_code2cid(x, cid+i)
-            return
-
-        if token is self.KEYWORD_BEGINCIDCHAR:
-            self.popall()
-            return
-        if token is self.KEYWORD_ENDCIDCHAR:
-            objs = [obj for (__, obj) in self.popall()]
-            for (cid, code) in choplist(2, objs):
-                if isinstance(code, str) and isinstance(cid, str):
-                    self.cmap.add_code2cid(code, nunpack(cid))
-            return
-
-        if token is self.KEYWORD_BEGINBFRANGE:
-            self.popall()
-            return
-        if token is self.KEYWORD_ENDBFRANGE:
-            objs = [obj for (__, obj) in self.popall()]
-            for (s, e, code) in choplist(3, objs):
-                if (not isinstance(s, bytes) or not isinstance(e, bytes) or
-                   len(s) != len(e)):
-                    continue
-                s1 = nunpack(s)
-                e1 = nunpack(e)
-                if isinstance(code, list):
-                    for i in range(e1-s1+1):
-                        self.cmap.add_cid2unichr(s1+i, code[i])
-                else:
-                    var = code[-4:]
-                    base = nunpack(var)
-                    prefix = code[:-4]
-                    vlen = len(var)
-                    for i in range(e1-s1+1):
-                        x = prefix+struct.pack('>L', base+i)[-vlen:]
-                        self.cmap.add_cid2unichr(s1+i, x)
-            return
-
-        if token is self.KEYWORD_BEGINBFCHAR:
-            self.popall()
-            return
-        if token is self.KEYWORD_ENDBFCHAR:
-            objs = [obj for (__, obj) in self.popall()]
-            for (cid, code) in choplist(2, objs):
-                if isinstance(cid, bytes) and isinstance(code, bytes):
-                    self.cmap.add_cid2unichr(nunpack(cid), code)
-            return
-
-        if token is self.KEYWORD_BEGINNOTDEFRANGE:
-            self.popall()
-            return
-        if token is self.KEYWORD_ENDNOTDEFRANGE:
-            self.popall()
-            return
-
-        self.push((pos, token))
-        return
-
-
-def main(argv):
-    args = argv[1:]
-    for fname in args:
-        fp = open(fname, 'rb')
-        cmap = FileUnicodeMap()
-        CMapParser(cmap, fp).run()
-        fp.close()
-        cmap.dump()
-    return
-
-
-if __name__ == '__main__':
-    sys.exit(main(sys.argv))
@@ -1,587 +0,0 @@
-import logging
-import re
-import sys
-from .pdfdevice import PDFTextDevice
-from .pdffont import PDFUnicodeNotDefined
-from .layout import LTContainer
-from .layout import LTPage
-from .layout import LTText
-from .layout import LTLine
-from .layout import LTRect
-from .layout import LTCurve
-from .layout import LTFigure
-from .layout import LTImage
-from .layout import LTChar
-from .layout import LTTextLine
-from .layout import LTTextBox
-from .layout import LTTextBoxVertical
-from .layout import LTTextGroup
-from .utils import apply_matrix_pt
-from .utils import mult_matrix
-from .utils import enc
-from .utils import bbox2str
-from . import utils
-
-
-log = logging.getLogger(__name__)
-
-
-class PDFLayoutAnalyzer(PDFTextDevice):
-
-    def __init__(self, rsrcmgr, pageno=1, laparams=None):
-        PDFTextDevice.__init__(self, rsrcmgr)
-        self.pageno = pageno
-        self.laparams = laparams
-        self._stack = []
-        return
-
-    def begin_page(self, page, ctm):
-        (x0, y0, x1, y1) = page.mediabox
-        (x0, y0) = apply_matrix_pt(ctm, (x0, y0))
-        (x1, y1) = apply_matrix_pt(ctm, (x1, y1))
-        mediabox = (0, 0, abs(x0-x1), abs(y0-y1))
-        self.cur_item = LTPage(self.pageno, mediabox)
-        return
-
-    def end_page(self, page):
-        assert not self._stack, str(len(self._stack))
-        assert isinstance(self.cur_item, LTPage), str(type(self.cur_item))
-        if self.laparams is not None:
-            self.cur_item.analyze(self.laparams)
-        self.pageno += 1
-        self.receive_layout(self.cur_item)
-        return
-
-    def begin_figure(self, name, bbox, matrix):
-        self._stack.append(self.cur_item)
-        self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
-        return
-
-    def end_figure(self, _):
-        fig = self.cur_item
-        assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
-        self.cur_item = self._stack.pop()
-        self.cur_item.add(fig)
-        return
-
-    def render_image(self, name, stream):
-        assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
-        item = LTImage(name, stream,
-                       (self.cur_item.x0, self.cur_item.y0,
-                        self.cur_item.x1, self.cur_item.y1))
-        self.cur_item.add(item)
-        return
-
-    def paint_path(self, gstate, stroke, fill, evenodd, path):
-        """Paint paths described in section 4.4 of the PDF reference manual"""
-        shape = ''.join(x[0] for x in path)
-
-        if shape.count('m') > 1:
-            # recurse if there are multiple m's in this shape
-            for m in re.finditer(r'm[^m]+', shape):
-                subpath = path[m.start(0):m.end(0)]
-                self.paint_path(gstate, stroke, fill, evenodd, subpath)
-
-        else:
-            if shape == 'ml':
-                # single line segment
-                (x0, y0) = apply_matrix_pt(self.ctm, path[0][1:])
-                (x1, y1) = apply_matrix_pt(self.ctm, path[1][1:])
-                if x0 == x1 or y0 == y1:
-                    line = LTLine(gstate.linewidth, (x0, y0), (x1, y1), stroke,
-                                  fill, evenodd, gstate.scolor, gstate.ncolor)
-                    self.cur_item.add(line)
-
-            elif shape == 'mlllh':
-                (x0, y0) = apply_matrix_pt(self.ctm, path[0][1:])
-                (x1, y1) = apply_matrix_pt(self.ctm, path[1][1:])
-                (x2, y2) = apply_matrix_pt(self.ctm, path[2][1:])
-                (x3, y3) = apply_matrix_pt(self.ctm, path[3][1:])
-
-                if (x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or \
-                        (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0):
-                    rect = LTRect(gstate.linewidth, (x0, y0, x2, y2), stroke,
-                                  fill, evenodd, gstate.scolor, gstate.ncolor)
-                    self.cur_item.add(rect)
-                else:
-                    curve = self._create_curve(gstate, stroke, fill, evenodd,
-                                               path)
-                    self.cur_item.add(curve)
-
-            else:
-                curve = self._create_curve(gstate, stroke, fill, evenodd, path)
-                self.cur_item.add(curve)
-
-    def _create_curve(self, gstate, stroke, fill, evenodd, path):
-        """Create a `LTCurve` object for the paint path operator"""
-        pts = [
-            apply_matrix_pt(self.ctm, point)
-            for p in path
-            for point in zip(p[1::2], p[2::2])
-        ]
-        curve = LTCurve(gstate.linewidth, pts, stroke, fill, evenodd,
-                        gstate.scolor, gstate.ncolor)
-        return curve
-
-    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
-                    graphicstate):
-        try:
-            text = font.to_unichr(cid)
-            assert isinstance(text, str), str(type(text))
-        except PDFUnicodeNotDefined:
-            text = self.handle_undefined_char(font, cid)
-        textwidth = font.char_width(cid)
-        textdisp = font.char_disp(cid)
-        item = LTChar(matrix, font, fontsize, scaling, rise, text, textwidth,
-                      textdisp, ncs, graphicstate)
-        self.cur_item.add(item)
-        return item.adv
-
-    def handle_undefined_char(self, font, cid):
-        log.info('undefined: %r, %r', font, cid)
-        return '(cid:%d)' % cid
-
-    def receive_layout(self, ltpage):
-        return
-
-
-class PDFPageAggregator(PDFLayoutAnalyzer):
-    def __init__(self, rsrcmgr, pageno=1, laparams=None):
-        PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
-                                   laparams=laparams)
-        self.result = None
-        return
-
-    def receive_layout(self, ltpage):
-        self.result = ltpage
-        return
-
-    def get_result(self):
-        return self.result
-
-
-class PDFConverter(PDFLayoutAnalyzer):
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1,
-                 laparams=None):
-        PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
-                                   laparams=laparams)
-        self.outfp = outfp
-        self.codec = codec
-        if hasattr(self.outfp, 'mode'):
-            if 'b' in self.outfp.mode:
-                self.outfp_binary = True
-            else:
-                self.outfp_binary = False
-        else:
-            import io
-            if isinstance(self.outfp, io.BytesIO):
-                self.outfp_binary = True
-            elif isinstance(self.outfp, io.StringIO):
-                self.outfp_binary = False
-            else:
-                try:
-                    self.outfp.write("é")
-                    self.outfp_binary = False
-                except TypeError:
-                    self.outfp_binary = True
-        return
-
-
-class TextConverter(PDFConverter):
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
-                 showpageno=False, imagewriter=None):
-        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
-                              laparams=laparams)
-        self.showpageno = showpageno
-        self.imagewriter = imagewriter
-        return
-
-    def write_text(self, text):
-        text = utils.compatible_encode_method(text, self.codec, 'ignore')
-        if self.outfp_binary:
-            text = text.encode()
-        self.outfp.write(text)
-        return
-
-    def receive_layout(self, ltpage):
-        def render(item):
-            if isinstance(item, LTContainer):
-                for child in item:
-                    render(child)
-            elif isinstance(item, LTText):
-                self.write_text(item.get_text())
-            if isinstance(item, LTTextBox):
-                self.write_text('\n')
-            elif isinstance(item, LTImage):
-                if self.imagewriter is not None:
-                    self.imagewriter.export_image(item)
-        if self.showpageno:
-            self.write_text('Page %s\n' % ltpage.pageid)
-        render(ltpage)
-        self.write_text('\f')
-        return
-
-    # Some dummy functions to save memory/CPU when all that is wanted
-    # is text.  This stops all the image and drawing output from being
-    # recorded and taking up RAM.
-    def render_image(self, name, stream):
-        if self.imagewriter is None:
-            return
-        PDFConverter.render_image(self, name, stream)
-        return
-
-    def paint_path(self, gstate, stroke, fill, evenodd, path):
-        return
-
-
-class HTMLConverter(PDFConverter):
-    RECT_COLORS = {
-        'figure': 'yellow',
-        'textline': 'magenta',
-        'textbox': 'cyan',
-        'textgroup': 'red',
-        'curve': 'black',
-        'page': 'gray',
-    }
-
-    TEXT_COLORS = {
-        'textbox': 'blue',
-        'char': 'black',
-    }
-
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
-                 scale=1, fontscale=1.0, layoutmode='normal', showpageno=True,
-                 pagemargin=50, imagewriter=None, debug=0, rect_colors=None,
-                 text_colors=None):
-        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
-                              laparams=laparams)
-        if text_colors is None:
-            text_colors = {'char': 'black'}
-        if rect_colors is None:
-            rect_colors = {'curve': 'black', 'page': 'gray'}
-
-        self.scale = scale
-        self.fontscale = fontscale
-        self.layoutmode = layoutmode
-        self.showpageno = showpageno
-        self.pagemargin = pagemargin
-        self.imagewriter = imagewriter
-        self.rect_colors = rect_colors
-        self.text_colors = text_colors
-        if debug:
-            self.rect_colors.update(self.RECT_COLORS)
-            self.text_colors.update(self.TEXT_COLORS)
-        self._yoffset = self.pagemargin
-        self._font = None
-        self._fontstack = []
-        self.write_header()
-        return
-
-    def write(self, text):
-        if self.codec:
-            text = text.encode(self.codec)
-        if sys.version_info < (3, 0):
-            text = str(text)
-        self.outfp.write(text)
-        return
-
-    def write_header(self):
-        self.write('<html><head>\n')
-        if self.codec:
-            s = '<meta http-equiv="Content-Type" content="text/html; ' \
-                'charset=%s">\n' % self.codec
-        else:
-            s = '<meta http-equiv="Content-Type" content="text/html">\n'
-        self.write(s)
-        self.write('</head><body>\n')
-        return
-
-    def write_footer(self):
-        page_links = ['<a href="#{}">{}</a>'.format(i, i)
-                      for i in range(1, self.pageno)]
-        s = '<div style="position:absolute; top:0px;">Page: %s</div>\n' % \
-            ', '.join(page_links)
-        self.write(s)
-        self.write('</body></html>\n')
-        return
-
-    def write_text(self, text):
-        self.write(enc(text))
-        return
-
-    def place_rect(self, color, borderwidth, x, y, w, h):
-        color = self.rect_colors.get(color)
-        if color is not None:
-            s = '<span style="position:absolute; border: %s %dpx solid; ' \
-                'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' % \
-                (color, borderwidth, x * self.scale,
-                 (self._yoffset - y) * self.scale, w * self.scale,
-                 h * self.scale)
-            self.write(
-                s)
-        return
-
-    def place_border(self, color, borderwidth, item):
-        self.place_rect(color, borderwidth, item.x0, item.y1, item.width,
-                        item.height)
-        return
-
-    def place_image(self, item, borderwidth, x, y, w, h):
-        if self.imagewriter is not None:
-            name = self.imagewriter.export_image(item)
-            s = '<img src="%s" border="%d" style="position:absolute; ' \
-                'left:%dpx; top:%dpx;" width="%d" height="%d" />\n' % \
-                (enc(name), borderwidth, x * self.scale,
-                 (self._yoffset - y) * self.scale, w * self.scale,
-                 h * self.scale)
-            self.write(s)
-        return
-
-    def place_text(self, color, text, x, y, size):
-        color = self.text_colors.get(color)
-        if color is not None:
-            s = '<span style="position:absolute; color:%s; left:%dpx; ' \
-                'top:%dpx; font-size:%dpx;">' % \
-                (color, x * self.scale, (self._yoffset - y) * self.scale,
-                 size * self.scale * self.fontscale)
-            self.write(s)
-            self.write_text(text)
-            self.write('</span>\n')
-        return
-
-    def begin_div(self, color, borderwidth, x, y, w, h, writing_mode=False):
-        self._fontstack.append(self._font)
-        self._font = None
-        s = '<div style="position:absolute; border: %s %dpx solid; ' \
-            'writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; ' \
-            'height:%dpx;">' % \
-            (color, borderwidth, writing_mode, x * self.scale,
-             (self._yoffset - y) * self.scale, w * self.scale, h * self.scale)
-        self.write(s)
-        return
-
-    def end_div(self, color):
-        if self._font is not None:
-            self.write('</span>')
-        self._font = self._fontstack.pop()
-        self.write('</div>')
-        return
-
-    def put_text(self, text, fontname, fontsize):
-        font = (fontname, fontsize)
-        if font != self._font:
-            if self._font is not None:
-                self.write('</span>')
-            # Remove subset tag from fontname, see PDF Reference 5.5.3
-            fontname_without_subset_tag = fontname.split('+')[-1]
-            self.write('<span style="font-family: %s; font-size:%dpx">' %
-                       (fontname_without_subset_tag,
-                        fontsize * self.scale * self.fontscale))
-            self._font = font
-        self.write_text(text)
-        return
-
-    def put_newline(self):
-        self.write('<br>')
-        return
-
-    def receive_layout(self, ltpage):
-        def show_group(item):
-            if isinstance(item, LTTextGroup):
-                self.place_border('textgroup', 1, item)
-                for child in item:
-                    show_group(child)
-            return
-
-        def render(item):
-            if isinstance(item, LTPage):
-                self._yoffset += item.y1
-                self.place_border('page', 1, item)
-                if self.showpageno:
-                    self.write('<div style="position:absolute; top:%dpx;">' %
-                               ((self._yoffset-item.y1)*self.scale))
-                    self.write('<a name="{}">Page {}</a></div>\n'
-                               .format(item.pageid, item.pageid))
-                for child in item:
-                    render(child)
-                if item.groups is not None:
-                    for group in item.groups:
-                        show_group(group)
-            elif isinstance(item, LTCurve):
-                self.place_border('curve', 1, item)
-            elif isinstance(item, LTFigure):
-                self.begin_div('figure', 1, item.x0, item.y1, item.width,
-                               item.height)
-                for child in item:
-                    render(child)
-                self.end_div('figure')
-            elif isinstance(item, LTImage):
-                self.place_image(item, 1, item.x0, item.y1, item.width,
-                                 item.height)
-            else:
-                if self.layoutmode == 'exact':
-                    if isinstance(item, LTTextLine):
-                        self.place_border('textline', 1, item)
-                        for child in item:
-                            render(child)
-                    elif isinstance(item, LTTextBox):
-                        self.place_border('textbox', 1, item)
-                        self.place_text('textbox', str(item.index+1), item.x0,
-                                        item.y1, 20)
-                        for child in item:
-                            render(child)
-                    elif isinstance(item, LTChar):
-                        self.place_border('char', 1, item)
-                        self.place_text('char', item.get_text(), item.x0,
-                                        item.y1, item.size)
-                else:
-                    if isinstance(item, LTTextLine):
-                        for child in item:
-                            render(child)
-                        if self.layoutmode != 'loose':
-                            self.put_newline()
-                    elif isinstance(item, LTTextBox):
-                        self.begin_div('textbox', 1, item.x0, item.y1,
-                                       item.width, item.height,
-                                       item.get_writing_mode())
-                        for child in item:
-                            render(child)
-                        self.end_div('textbox')
-                    elif isinstance(item, LTChar):
-                        self.put_text(item.get_text(), item.fontname,
-                                      item.size)
-                    elif isinstance(item, LTText):
-                        self.write_text(item.get_text())
-            return
-        render(ltpage)
-        self._yoffset += self.pagemargin
-        return
-
-    def close(self):
-        self.write_footer()
-        return
-
-
-class XMLConverter(PDFConverter):
-
-    CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')
-
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
-                 imagewriter=None, stripcontrol=False):
-        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
-                              laparams=laparams)
-        self.imagewriter = imagewriter
-        self.stripcontrol = stripcontrol
-        self.write_header()
-        return
-
-    def write(self, text):
-        if self.codec:
-            text = text.encode(self.codec)
-        self.outfp.write(text)
-        return
-
-    def write_header(self):
-        if self.codec:
-            self.write('<?xml version="1.0" encoding="%s" ?>\n' % self.codec)
-        else:
-            self.write('<?xml version="1.0" ?>\n')
-        self.write('<pages>\n')
-        return
-
-    def write_footer(self):
-        self.write('</pages>\n')
-        return
-
-    def write_text(self, text):
-        if self.stripcontrol:
-            text = self.CONTROL.sub('', text)
-        self.write(enc(text))
-        return
-
-    def receive_layout(self, ltpage):
-        def show_group(item):
-            if isinstance(item, LTTextBox):
-                self.write('<textbox id="%d" bbox="%s" />\n' %
-                           (item.index, bbox2str(item.bbox)))
-            elif isinstance(item, LTTextGroup):
-                self.write('<textgroup bbox="%s">\n' % bbox2str(item.bbox))
-                for child in item:
-                    show_group(child)
-                self.write('</textgroup>\n')
-            return
-
-        def render(item):
-            if isinstance(item, LTPage):
-                s = '<page id="%s" bbox="%s" rotate="%d">\n' % \
-                    (item.pageid, bbox2str(item.bbox), item.rotate)
-                self.write(s)
-                for child in item:
-                    render(child)
-                if item.groups is not None:
-                    self.write('<layout>\n')
-                    for group in item.groups:
-                        show_group(group)
-                    self.write('</layout>\n')
-                self.write('</page>\n')
-            elif isinstance(item, LTLine):
-                s = '<line linewidth="%d" bbox="%s" />\n' % \
-                    (item.linewidth, bbox2str(item.bbox))
-                self.write(s)
-            elif isinstance(item, LTRect):
-                s = '<rect linewidth="%d" bbox="%s" />\n' % \
-                    (item.linewidth, bbox2str(item.bbox))
-                self.write(s)
-            elif isinstance(item, LTCurve):
-                s = '<curve linewidth="%d" bbox="%s" pts="%s"/>\n' % \
-                    (item.linewidth, bbox2str(item.bbox), item.get_pts())
-                self.write(s)
-            elif isinstance(item, LTFigure):
-                s = '<figure name="%s" bbox="%s">\n' % \
-                    (item.name, bbox2str(item.bbox))
-                self.write(s)
-                for child in item:
-                    render(child)
-                self.write('</figure>\n')
-            elif isinstance(item, LTTextLine):
-                self.write('<textline bbox="%s">\n' % bbox2str(item.bbox))
-                for child in item:
-                    render(child)
-                self.write('</textline>\n')
-            elif isinstance(item, LTTextBox):
-                wmode = ''
-                if isinstance(item, LTTextBoxVertical):
-                    wmode = ' wmode="vertical"'
-                s = '<textbox id="%d" bbox="%s"%s>\n' %\
-                    (item.index, bbox2str(item.bbox), wmode)
-                self.write(s)
-                for child in item:
-                    render(child)
-                self.write('</textbox>\n')
-            elif isinstance(item, LTChar):
-                s = '<text font="%s" bbox="%s" colourspace="%s" ' \
-                    'ncolour="%s" size="%.3f">' % \
-                    (enc(item.fontname), bbox2str(item.bbox),
-                     item.ncs.name, item.graphicstate.ncolor, item.size)
-                self.write(s)
-                self.write_text(item.get_text())
-                self.write('</text>\n')
-            elif isinstance(item, LTText):
-                self.write('<text>%s</text>\n' % item.get_text())
-            elif isinstance(item, LTImage):
-                if self.imagewriter is not None:
-                    name = self.imagewriter.export_image(item)
-                    self.write('<image src="%s" width="%d" height="%d" />\n' %
-                               (enc(name), item.width, item.height))
-                else:
-                    self.write('<image width="%d" height="%d" />\n' %
-                               (item.width, item.height))
-            else:
-                assert False, str(('Unhandled', item))
-            return
-        render(ltpage)
-        return
-
-    def close(self):
-        self.write_footer()
-        return
@@ -1,112 +0,0 @@
-import logging
-import re
-
-from .glyphlist import glyphname2unicode
-from .latin_enc import ENCODING
-from .psparser import PSLiteral
-
-HEXADECIMAL = re.compile(r'[0-9a-fA-F]+')
-
-log = logging.getLogger(__name__)
-
-
-def name2unicode(name):
-    """Converts Adobe glyph names to Unicode numbers.
-
-    In contrast to the specification, this raises a KeyError instead of return
-    an empty string when the key is unknown.
-    This way the caller must explicitly define what to do
-    when there is not a match.
-
-    Reference:
-    https://github.com/adobe-type-tools/agl-specification#2-the-mapping
-
-    :returns unicode character if name resembles something,
-    otherwise a KeyError
-    """
-    name = name.split('.')[0]
-    components = name.split('_')
-
-    if len(components) > 1:
-        return ''.join(map(name2unicode, components))
-
-    else:
-        if name in glyphname2unicode:
-            return glyphname2unicode.get(name)
-
-        elif name.startswith('uni'):
-            name_without_uni = name.strip('uni')
-
-            if HEXADECIMAL.match(name_without_uni) and \
-                    len(name_without_uni) % 4 == 0:
-                unicode_digits = [int(name_without_uni[i:i + 4], base=16)
-                                  for i in range(0, len(name_without_uni), 4)]
-                for digit in unicode_digits:
-                    raise_key_error_for_invalid_unicode(digit)
-                characters = map(chr, unicode_digits)
-                return ''.join(characters)
-
-        elif name.startswith('u'):
-            name_without_u = name.strip('u')
-
-            if HEXADECIMAL.match(name_without_u) and \
-                    4 <= len(name_without_u) <= 6:
-                unicode_digit = int(name_without_u, base=16)
-                raise_key_error_for_invalid_unicode(unicode_digit)
-                return chr(unicode_digit)
-
-    raise KeyError('Could not convert unicode name "%s" to character because '
-                   'it does not match specification' % name)
-
-
-def raise_key_error_for_invalid_unicode(unicode_digit):
-    """Unicode values should not be in the range D800 through DFFF because
-    that is used for surrogate pairs in UTF-16
-
-    :raises KeyError if unicode digit is invalid
-    """
-    if 55295 < unicode_digit < 57344:
-        raise KeyError('Unicode digit %d is invalid because '
-                       'it is in the range D800 through DFFF' % unicode_digit)
-
-
-class EncodingDB:
-
-    std2unicode = {}
-    mac2unicode = {}
-    win2unicode = {}
-    pdf2unicode = {}
-    for (name, std, mac, win, pdf) in ENCODING:
-        c = name2unicode(name)
-        if std:
-            std2unicode[std] = c
-        if mac:
-            mac2unicode[mac] = c
-        if win:
-            win2unicode[win] = c
-        if pdf:
-            pdf2unicode[pdf] = c
-
-    encodings = {
-        'StandardEncoding': std2unicode,
-        'MacRomanEncoding': mac2unicode,
-        'WinAnsiEncoding': win2unicode,
-        'PDFDocEncoding': pdf2unicode,
-    }
-
-    @classmethod
-    def get_encoding(cls, name, diff=None):
-        cid2unicode = cls.encodings.get(name, cls.std2unicode)
-        if diff:
-            cid2unicode = cid2unicode.copy()
-            cid = 0
-            for x in diff:
-                if isinstance(x, int):
-                    cid = x
-                elif isinstance(x, PSLiteral):
-                    try:
-                        cid2unicode[cid] = name2unicode(x.name)
-                    except (KeyError, ValueError) as e:
-                        log.debug(str(e))
-                    cid += 1
-        return cid2unicode
@@ -1,46 +0,0 @@
-""" Font metrics for the Adobe core 14 fonts.
-
-Font metrics are used to compute the boundary of each character
-written with a proportional font.
-
-The following data were extracted from the AFM files:
-
-  http://www.ctan.org/tex-archive/fonts/adobe/afm/
-
-"""
-
-###  BEGIN Verbatim copy of the license part
-
-#
-# Adobe Core 35 AFM Files with 314 Glyph Entries - ReadMe
-#
-# This file and the 35 PostScript(R) AFM files it accompanies may be
-# used, copied, and distributed for any purpose and without charge,
-# with or without modification, provided that all copyright notices
-# are retained; that the AFM files are not distributed without this
-# file; that all modifications to this file or any of the AFM files
-# are prominently noted in the modified file(s); and that this
-# paragraph is not modified. Adobe Systems has no responsibility or
-# obligation to support the use of the AFM files.
-#
-
-###  END Verbatim copy of the license part
-
-# flake8: noqa
-
-FONT_METRICS = {
- 'Courier': ({'FontName': 'Courier', 'Descent': -194.0, 'FontBBox': (-6.0, -249.0, 639.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': 0.0, 'Ascent': 627.0}, {' ': 600, '!': 600, '"': 600, '#': 600, '$': 600, '%': 600, '&': 600, "'": 600, '(': 600, ')': 600, '*': 600, '+': 600, ',': 600, '-': 600, '.': 600, '/': 600, '0': 600, '1': 600, '2': 600, '3': 600, '4': 600, '5': 600, '6': 600, '7': 600, '8': 600, '9': 600, ':': 600, ';': 600, '<': 600, '=': 600, '>': 600, '?': 600, '@': 600, 'A': 600, 'B': 600, 'C': 600, 'D': 600, 'E': 600, 'F': 600, 'G': 600, 'H': 600, 'I': 600, 'J': 600, 'K': 600, 'L': 600, 'M': 600, 'N': 600, 'O': 600, 'P': 600, 'Q': 600, 'R': 600, 'S': 600, 'T': 600, 'U': 600, 'V': 600, 'W': 600, 'X': 600, 'Y': 600, 'Z': 600, '[': 600, '\\': 600, ']': 600, '^': 600, '_': 600, '`': 600, 'a': 600, 'b': 600, 'c': 600, 'd': 600, 'e': 600, 'f': 600, 'g': 600, 'h': 600, 'i': 600, 'j': 600, 'k': 600, 'l': 600, 'm': 600, 'n': 600, 'o': 600, 'p': 600, 'q': 600, 'r': 600, 's': 600, 't': 600, 'u': 600, 'v': 600, 'w': 600, 'x': 600, 'y': 600, 'z': 600, '{': 600, '|': 600, '}': 600, '~': 600, '\xa1': 600, '\xa2': 600, '\xa3': 600, '\xa4': 600, '\xa5': 600, '\xa6': 600, '\xa7': 600, '\xa8': 600, '\xa9': 600, '\xaa': 600, '\xab': 600, '\xac': 600, '\xae': 600, '\xaf': 600, '\xb0': 600, '\xb1': 600, '\xb2': 600, '\xb3': 600, '\xb4': 600, '\xb5': 600, '\xb6': 600, '\xb7': 600, '\xb8': 600, '\xb9': 600, '\xba': 600, '\xbb': 600, '\xbc': 600, '\xbd': 600, '\xbe': 600, '\xbf': 600, '\xc0': 600, '\xc1': 600, '\xc2': 600, '\xc3': 600, '\xc4': 600, '\xc5': 600, '\xc6': 600, '\xc7': 600, '\xc8': 600, '\xc9': 600, '\xca': 600, '\xcb': 600, '\xcc': 600, '\xcd': 600, '\xce': 600, '\xcf': 600, '\xd0': 600, '\xd1': 600, '\xd2': 600, '\xd3': 600, '\xd4': 600, '\xd5': 600, '\xd6': 600, '\xd7': 600, '\xd8': 600, '\xd9': 600, '\xda': 600, '\xdb': 600, '\xdc': 600, '\xdd': 600, '\xde': 600, '\xdf': 600, '\xe0': 600, '\xe1': 600, '\xe2': 600, '\xe3': 600, '\xe4': 600, '\xe5': 600, '\xe6': 600, '\xe7': 600, '\xe8': 600, '\xe9': 600, '\xea': 600, '\xeb': 600, '\xec': 600, '\xed': 600, '\xee': 600, '\xef': 600, '\xf0': 600, '\xf1': 600, '\xf2': 600, '\xf3': 600, '\xf4': 600, '\xf5': 600, '\xf6': 600, '\xf7': 600, '\xf8': 600, '\xf9': 600, '\xfa': 600, '\xfb': 600, '\xfc': 600, '\xfd': 600, '\xfe': 600, '\xff': 600, '\u0100': 600, '\u0101': 600, '\u0102': 600, '\u0103': 600, '\u0104': 600, '\u0105': 600, '\u0106': 600, '\u0107': 600, '\u010c': 600, '\u010d': 600, '\u010e': 600, '\u010f': 600, '\u0110': 600, '\u0111': 600, '\u0112': 600, '\u0113': 600, '\u0116': 600, '\u0117': 600, '\u0118': 600, '\u0119': 600, '\u011a': 600, '\u011b': 600, '\u011e': 600, '\u011f': 600, '\u0122': 600, '\u0123': 600, '\u012a': 600, '\u012b': 600, '\u012e': 600, '\u012f': 600, '\u0130': 600, '\u0131': 600, '\u0136': 600, '\u0137': 600, '\u0139': 600, '\u013a': 600, '\u013b': 600, '\u013c': 600, '\u013d': 600, '\u013e': 600, '\u0141': 600, '\u0142': 600, '\u0143': 600, '\u0144': 600, '\u0145': 600, '\u0146': 600, '\u0147': 600, '\u0148': 600, '\u014c': 600, '\u014d': 600, '\u0150': 600, '\u0151': 600, '\u0152': 600, '\u0153': 600, '\u0154': 600, '\u0155': 600, '\u0156': 600, '\u0157': 600, '\u0158': 600, '\u0159': 600, '\u015a': 600, '\u015b': 600, '\u015e': 600, '\u015f': 600, '\u0160': 600, '\u0161': 600, '\u0162': 600, '\u0163': 600, '\u0164': 600, '\u0165': 600, '\u016a': 600, '\u016b': 600, '\u016e': 600, '\u016f': 600, '\u0170': 600, '\u0171': 600, '\u0172': 600, '\u0173': 600, '\u0178': 600, '\u0179': 600, '\u017a': 600, '\u017b': 600, '\u017c': 600, '\u017d': 600, '\u017e': 600, '\u0192': 600, '\u0218': 600, '\u0219': 600, '\u02c6': 600, '\u02c7': 600, '\u02d8': 600, '\u02d9': 600, '\u02da': 600, '\u02db': 600, '\u02dc': 600, '\u02dd': 600, '\u2013': 600, '\u2014': 600, '\u2018': 600, '\u2019': 600, '\u201a': 600, '\u201c': 600, '\u201d': 600, '\u201e': 600, '\u2020': 600, '\u2021': 600, '\u2022': 600, '\u2026': 600, '\u2030': 600, '\u2039': 600, '\u203a': 600, '\u2044': 600, '\u2122': 600, '\u2202': 600, '\u2206': 600, '\u2211': 600, '\u2212': 600, '\u221a': 600, '\u2260': 600, '\u2264': 600, '\u2265': 600, '\u25ca': 600, '\uf6c3': 600, '\ufb01': 600, '\ufb02': 600}),
- 'Courier-Bold': ({'FontName': 'Courier-Bold', 'Descent': -194.0, 'FontBBox': (-88.0, -249.0, 697.0, 811.0), 'FontWeight': 'Bold', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': 0.0, 'Ascent': 627.0}, {' ': 600, '!': 600, '"': 600, '#': 600, '$': 600, '%': 600, '&': 600, "'": 600, '(': 600, ')': 600, '*': 600, '+': 600, ',': 600, '-': 600, '.': 600, '/': 600, '0': 600, '1': 600, '2': 600, '3': 600, '4': 600, '5': 600, '6': 600, '7': 600, '8': 600, '9': 600, ':': 600, ';': 600, '<': 600, '=': 600, '>': 600, '?': 600, '@': 600, 'A': 600, 'B': 600, 'C': 600, 'D': 600, 'E': 600, 'F': 600, 'G': 600, 'H': 600, 'I': 600, 'J': 600, 'K': 600, 'L': 600, 'M': 600, 'N': 600, 'O': 600, 'P': 600, 'Q': 600, 'R': 600, 'S': 600, 'T': 600, 'U': 600, 'V': 600, 'W': 600, 'X': 600, 'Y': 600, 'Z': 600, '[': 600, '\\': 600, ']': 600, '^': 600, '_': 600, '`': 600, 'a': 600, 'b': 600, 'c': 600, 'd': 600, 'e': 600, 'f': 600, 'g': 600, 'h': 600, 'i': 600, 'j': 600, 'k': 600, 'l': 600, 'm': 600, 'n': 600, 'o': 600, 'p': 600, 'q': 600, 'r': 600, 's': 600, 't': 600, 'u': 600, 'v': 600, 'w': 600, 'x': 600, 'y': 600, 'z': 600, '{': 600, '|': 600, '}': 600, '~': 600, '\xa1': 600, '\xa2': 600, '\xa3': 600, '\xa4': 600, '\xa5': 600, '\xa6': 600, '\xa7': 600, '\xa8': 600, '\xa9': 600, '\xaa': 600, '\xab': 600, '\xac': 600, '\xae': 600, '\xaf': 600, '\xb0': 600, '\xb1': 600, '\xb2': 600, '\xb3': 600, '\xb4': 600, '\xb5': 600, '\xb6': 600, '\xb7': 600, '\xb8': 600, '\xb9': 600, '\xba': 600, '\xbb': 600, '\xbc': 600, '\xbd': 600, '\xbe': 600, '\xbf': 600, '\xc0': 600, '\xc1': 600, '\xc2': 600, '\xc3': 600, '\xc4': 600, '\xc5': 600, '\xc6': 600, '\xc7': 600, '\xc8': 600, '\xc9': 600, '\xca': 600, '\xcb': 600, '\xcc': 600, '\xcd': 600, '\xce': 600, '\xcf': 600, '\xd0': 600, '\xd1': 600, '\xd2': 600, '\xd3': 600, '\xd4': 600, '\xd5': 600, '\xd6': 600, '\xd7': 600, '\xd8': 600, '\xd9': 600, '\xda': 600, '\xdb': 600, '\xdc': 600, '\xdd': 600, '\xde': 600, '\xdf': 600, '\xe0': 600, '\xe1': 600, '\xe2': 600, '\xe3': 600, '\xe4': 600, '\xe5': 600, '\xe6': 600, '\xe7': 600, '\xe8': 600, '\xe9': 600, '\xea': 600, '\xeb': 600, '\xec': 600, '\xed': 600, '\xee': 600, '\xef': 600, '\xf0': 600, '\xf1': 600, '\xf2': 600, '\xf3': 600, '\xf4': 600, '\xf5': 600, '\xf6': 600, '\xf7': 600, '\xf8': 600, '\xf9': 600, '\xfa': 600, '\xfb': 600, '\xfc': 600, '\xfd': 600, '\xfe': 600, '\xff': 600, '\u0100': 600, '\u0101': 600, '\u0102': 600, '\u0103': 600, '\u0104': 600, '\u0105': 600, '\u0106': 600, '\u0107': 600, '\u010c': 600, '\u010d': 600, '\u010e': 600, '\u010f': 600, '\u0110': 600, '\u0111': 600, '\u0112': 600, '\u0113': 600, '\u0116': 600, '\u0117': 600, '\u0118': 600, '\u0119': 600, '\u011a': 600, '\u011b': 600, '\u011e': 600, '\u011f': 600, '\u0122': 600, '\u0123': 600, '\u012a': 600, '\u012b': 600, '\u012e': 600, '\u012f': 600, '\u0130': 600, '\u0131': 600, '\u0136': 600, '\u0137': 600, '\u0139': 600, '\u013a': 600, '\u013b': 600, '\u013c': 600, '\u013d': 600, '\u013e': 600, '\u0141': 600, '\u0142': 600, '\u0143': 600, '\u0144': 600, '\u0145': 600, '\u0146': 600, '\u0147': 600, '\u0148': 600, '\u014c': 600, '\u014d': 600, '\u0150': 600, '\u0151': 600, '\u0152': 600, '\u0153': 600, '\u0154': 600, '\u0155': 600, '\u0156': 600, '\u0157': 600, '\u0158': 600, '\u0159': 600, '\u015a': 600, '\u015b': 600, '\u015e': 600, '\u015f': 600, '\u0160': 600, '\u0161': 600, '\u0162': 600, '\u0163': 600, '\u0164': 600, '\u0165': 600, '\u016a': 600, '\u016b': 600, '\u016e': 600, '\u016f': 600, '\u0170': 600, '\u0171': 600, '\u0172': 600, '\u0173': 600, '\u0178': 600, '\u0179': 600, '\u017a': 600, '\u017b': 600, '\u017c': 600, '\u017d': 600, '\u017e': 600, '\u0192': 600, '\u0218': 600, '\u0219': 600, '\u02c6': 600, '\u02c7': 600, '\u02d8': 600, '\u02d9': 600, '\u02da': 600, '\u02db': 600, '\u02dc': 600, '\u02dd': 600, '\u2013': 600, '\u2014': 600, '\u2018': 600, '\u2019': 600, '\u201a': 600, '\u201c': 600, '\u201d': 600, '\u201e': 600, '\u2020': 600, '\u2021': 600, '\u2022': 600, '\u2026': 600, '\u2030': 600, '\u2039': 600, '\u203a': 600, '\u2044': 600, '\u2122': 600, '\u2202': 600, '\u2206': 600, '\u2211': 600, '\u2212': 600, '\u221a': 600, '\u2260': 600, '\u2264': 600, '\u2265': 600, '\u25ca': 600, '\uf6c3': 600, '\ufb01': 600, '\ufb02': 600}),
- 'Courier-BoldOblique': ({'FontName': 'Courier-BoldOblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 758.0, 811.0), 'FontWeight': 'Bold', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {' ': 600, '!': 600, '"': 600, '#': 600, '$': 600, '%': 600, '&': 600, "'": 600, '(': 600, ')': 600, '*': 600, '+': 600, ',': 600, '-': 600, '.': 600, '/': 600, '0': 600, '1': 600, '2': 600, '3': 600, '4': 600, '5': 600, '6': 600, '7': 600, '8': 600, '9': 600, ':': 600, ';': 600, '<': 600, '=': 600, '>': 600, '?': 600, '@': 600, 'A': 600, 'B': 600, 'C': 600, 'D': 600, 'E': 600, 'F': 600, 'G': 600, 'H': 600, 'I': 600, 'J': 600, 'K': 600, 'L': 600, 'M': 600, 'N': 600, 'O': 600, 'P': 600, 'Q': 600, 'R': 600, 'S': 600, 'T': 600, 'U': 600, 'V': 600, 'W': 600, 'X': 600, 'Y': 600, 'Z': 600, '[': 600, '\\': 600, ']': 600, '^': 600, '_': 600, '`': 600, 'a': 600, 'b': 600, 'c': 600, 'd': 600, 'e': 600, 'f': 600, 'g': 600, 'h': 600, 'i': 600, 'j': 600, 'k': 600, 'l': 600, 'm': 600, 'n': 600, 'o': 600, 'p': 600, 'q': 600, 'r': 600, 's': 600, 't': 600, 'u': 600, 'v': 600, 'w': 600, 'x': 600, 'y': 600, 'z': 600, '{': 600, '|': 600, '}': 600, '~': 600, '\xa1': 600, '\xa2': 600, '\xa3': 600, '\xa4': 600, '\xa5': 600, '\xa6': 600, '\xa7': 600, '\xa8': 600, '\xa9': 600, '\xaa': 600, '\xab': 600, '\xac': 600, '\xae': 600, '\xaf': 600, '\xb0': 600, '\xb1': 600, '\xb2': 600, '\xb3': 600, '\xb4': 600, '\xb5': 600, '\xb6': 600, '\xb7': 600, '\xb8': 600, '\xb9': 600, '\xba': 600, '\xbb': 600, '\xbc': 600, '\xbd': 600, '\xbe': 600, '\xbf': 600, '\xc0': 600, '\xc1': 600, '\xc2': 600, '\xc3': 600, '\xc4': 600, '\xc5': 600, '\xc6': 600, '\xc7': 600, '\xc8': 600, '\xc9': 600, '\xca': 600, '\xcb': 600, '\xcc': 600, '\xcd': 600, '\xce': 600, '\xcf': 600, '\xd0': 600, '\xd1': 600, '\xd2': 600, '\xd3': 600, '\xd4': 600, '\xd5': 600, '\xd6': 600, '\xd7': 600, '\xd8': 600, '\xd9': 600, '\xda': 600, '\xdb': 600, '\xdc': 600, '\xdd': 600, '\xde': 600, '\xdf': 600, '\xe0': 600, '\xe1': 600, '\xe2': 600, '\xe3': 600, '\xe4': 600, '\xe5': 600, '\xe6': 600, '\xe7': 600, '\xe8': 600, '\xe9': 600, '\xea': 600, '\xeb': 600, '\xec': 600, '\xed': 600, '\xee': 600, '\xef': 600, '\xf0': 600, '\xf1': 600, '\xf2': 600, '\xf3': 600, '\xf4': 600, '\xf5': 600, '\xf6': 600, '\xf7': 600, '\xf8': 600, '\xf9': 600, '\xfa': 600, '\xfb': 600, '\xfc': 600, '\xfd': 600, '\xfe': 600, '\xff': 600, '\u0100': 600, '\u0101': 600, '\u0102': 600, '\u0103': 600, '\u0104': 600, '\u0105': 600, '\u0106': 600, '\u0107': 600, '\u010c': 600, '\u010d': 600, '\u010e': 600, '\u010f': 600, '\u0110': 600, '\u0111': 600, '\u0112': 600, '\u0113': 600, '\u0116': 600, '\u0117': 600, '\u0118': 600, '\u0119': 600, '\u011a': 600, '\u011b': 600, '\u011e': 600, '\u011f': 600, '\u0122': 600, '\u0123': 600, '\u012a': 600, '\u012b': 600, '\u012e': 600, '\u012f': 600, '\u0130': 600, '\u0131': 600, '\u0136': 600, '\u0137': 600, '\u0139': 600, '\u013a': 600, '\u013b': 600, '\u013c': 600, '\u013d': 600, '\u013e': 600, '\u0141': 600, '\u0142': 600, '\u0143': 600, '\u0144': 600, '\u0145': 600, '\u0146': 600, '\u0147': 600, '\u0148': 600, '\u014c': 600, '\u014d': 600, '\u0150': 600, '\u0151': 600, '\u0152': 600, '\u0153': 600, '\u0154': 600, '\u0155': 600, '\u0156': 600, '\u0157': 600, '\u0158': 600, '\u0159': 600, '\u015a': 600, '\u015b': 600, '\u015e': 600, '\u015f': 600, '\u0160': 600, '\u0161': 600, '\u0162': 600, '\u0163': 600, '\u0164': 600, '\u0165': 600, '\u016a': 600, '\u016b': 600, '\u016e': 600, '\u016f': 600, '\u0170': 600, '\u0171': 600, '\u0172': 600, '\u0173': 600, '\u0178': 600, '\u0179': 600, '\u017a': 600, '\u017b': 600, '\u017c': 600, '\u017d': 600, '\u017e': 600, '\u0192': 600, '\u0218': 600, '\u0219': 600, '\u02c6': 600, '\u02c7': 600, '\u02d8': 600, '\u02d9': 600, '\u02da': 600, '\u02db': 600, '\u02dc': 600, '\u02dd': 600, '\u2013': 600, '\u2014': 600, '\u2018': 600, '\u2019': 600, '\u201a': 600, '\u201c': 600, '\u201d': 600, '\u201e': 600, '\u2020': 600, '\u2021': 600, '\u2022': 600, '\u2026': 600, '\u2030': 600, '\u2039': 600, '\u203a': 600, '\u2044': 600, '\u2122': 600, '\u2202': 600, '\u2206': 600, '\u2211': 600, '\u2212': 600, '\u221a': 600, '\u2260': 600, '\u2264': 600, '\u2265': 600, '\u25ca': 600, '\uf6c3': 600, '\ufb01': 600, '\ufb02': 600}),
- 'Courier-Oblique': ({'FontName': 'Courier-Oblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 749.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {' ': 600, '!': 600, '"': 600, '#': 600, '$': 600, '%': 600, '&': 600, "'": 600, '(': 600, ')': 600, '*': 600, '+': 600, ',': 600, '-': 600, '.': 600, '/': 600, '0': 600, '1': 600, '2': 600, '3': 600, '4': 600, '5': 600, '6': 600, '7': 600, '8': 600, '9': 600, ':': 600, ';': 600, '<': 600, '=': 600, '>': 600, '?': 600, '@': 600, 'A': 600, 'B': 600, 'C': 600, 'D': 600, 'E': 600, 'F': 600, 'G': 600, 'H': 600, 'I': 600, 'J': 600, 'K': 600, 'L': 600, 'M': 600, 'N': 600, 'O': 600, 'P': 600, 'Q': 600, 'R': 600, 'S': 600, 'T': 600, 'U': 600, 'V': 600, 'W': 600, 'X': 600, 'Y': 600, 'Z': 600, '[': 600, '\\': 600, ']': 600, '^': 600, '_': 600, '`': 600, 'a': 600, 'b': 600, 'c': 600, 'd': 600, 'e': 600, 'f': 600, 'g': 600, 'h': 600, 'i': 600, 'j': 600, 'k': 600, 'l': 600, 'm': 600, 'n': 600, 'o': 600, 'p': 600, 'q': 600, 'r': 600, 's': 600, 't': 600, 'u': 600, 'v': 600, 'w': 600, 'x': 600, 'y': 600, 'z': 600, '{': 600, '|': 600, '}': 600, '~': 600, '\xa1': 600, '\xa2': 600, '\xa3': 600, '\xa4': 600, '\xa5': 600, '\xa6': 600, '\xa7': 600, '\xa8': 600, '\xa9': 600, '\xaa': 600, '\xab': 600, '\xac': 600, '\xae': 600, '\xaf': 600, '\xb0': 600, '\xb1': 600, '\xb2': 600, '\xb3': 600, '\xb4': 600, '\xb5': 600, '\xb6': 600, '\xb7': 600, '\xb8': 600, '\xb9': 600, '\xba': 600, '\xbb': 600, '\xbc': 600, '\xbd': 600, '\xbe': 600, '\xbf': 600, '\xc0': 600, '\xc1': 600, '\xc2': 600, '\xc3': 600, '\xc4': 600, '\xc5': 600, '\xc6': 600, '\xc7': 600, '\xc8': 600, '\xc9': 600, '\xca': 600, '\xcb': 600, '\xcc': 600, '\xcd': 600, '\xce': 600, '\xcf': 600, '\xd0': 600, '\xd1': 600, '\xd2': 600, '\xd3': 600, '\xd4': 600, '\xd5': 600, '\xd6': 600, '\xd7': 600, '\xd8': 600, '\xd9': 600, '\xda': 600, '\xdb': 600, '\xdc': 600, '\xdd': 600, '\xde': 600, '\xdf': 600, '\xe0': 600, '\xe1': 600, '\xe2': 600, '\xe3': 600, '\xe4': 600, '\xe5': 600, '\xe6': 600, '\xe7': 600, '\xe8': 600, '\xe9': 600, '\xea': 600, '\xeb': 600, '\xec': 600, '\xed': 600, '\xee': 600, '\xef': 600, '\xf0': 600, '\xf1': 600, '\xf2': 600, '\xf3': 600, '\xf4': 600, '\xf5': 600, '\xf6': 600, '\xf7': 600, '\xf8': 600, '\xf9': 600, '\xfa': 600, '\xfb': 600, '\xfc': 600, '\xfd': 600, '\xfe': 600, '\xff': 600, '\u0100': 600, '\u0101': 600, '\u0102': 600, '\u0103': 600, '\u0104': 600, '\u0105': 600, '\u0106': 600, '\u0107': 600, '\u010c': 600, '\u010d': 600, '\u010e': 600, '\u010f': 600, '\u0110': 600, '\u0111': 600, '\u0112': 600, '\u0113': 600, '\u0116': 600, '\u0117': 600, '\u0118': 600, '\u0119': 600, '\u011a': 600, '\u011b': 600, '\u011e': 600, '\u011f': 600, '\u0122': 600, '\u0123': 600, '\u012a': 600, '\u012b': 600, '\u012e': 600, '\u012f': 600, '\u0130': 600, '\u0131': 600, '\u0136': 600, '\u0137': 600, '\u0139': 600, '\u013a': 600, '\u013b': 600, '\u013c': 600, '\u013d': 600, '\u013e': 600, '\u0141': 600, '\u0142': 600, '\u0143': 600, '\u0144': 600, '\u0145': 600, '\u0146': 600, '\u0147': 600, '\u0148': 600, '\u014c': 600, '\u014d': 600, '\u0150': 600, '\u0151': 600, '\u0152': 600, '\u0153': 600, '\u0154': 600, '\u0155': 600, '\u0156': 600, '\u0157': 600, '\u0158': 600, '\u0159': 600, '\u015a': 600, '\u015b': 600, '\u015e': 600, '\u015f': 600, '\u0160': 600, '\u0161': 600, '\u0162': 600, '\u0163': 600, '\u0164': 600, '\u0165': 600, '\u016a': 600, '\u016b': 600, '\u016e': 600, '\u016f': 600, '\u0170': 600, '\u0171': 600, '\u0172': 600, '\u0173': 600, '\u0178': 600, '\u0179': 600, '\u017a': 600, '\u017b': 600, '\u017c': 600, '\u017d': 600, '\u017e': 600, '\u0192': 600, '\u0218': 600, '\u0219': 600, '\u02c6': 600, '\u02c7': 600, '\u02d8': 600, '\u02d9': 600, '\u02da': 600, '\u02db': 600, '\u02dc': 600, '\u02dd': 600, '\u2013': 600, '\u2014': 600, '\u2018': 600, '\u2019': 600, '\u201a': 600, '\u201c': 600, '\u201d': 600, '\u201e': 600, '\u2020': 600, '\u2021': 600, '\u2022': 600, '\u2026': 600, '\u2030': 600, '\u2039': 600, '\u203a': 600, '\u2044': 600, '\u2122': 600, '\u2202': 600, '\u2206': 600, '\u2211': 600, '\u2212': 600, '\u221a': 600, '\u2260': 600, '\u2264': 600, '\u2265': 600, '\u25ca': 600, '\uf6c3': 600, '\ufb01': 600, '\ufb02': 600}),
- 'Helvetica': ({'FontName': 'Helvetica', 'Descent': -207.0, 'FontBBox': (-166.0, -225.0, 1000.0, 931.0), 'FontWeight': 'Medium', 'CapHeight': 718.0, 'FontFamily': 'Helvetica', 'Flags': 0, 'XHeight': 523.0, 'ItalicAngle': 0.0, 'Ascent': 718.0}, {' ': 278, '!': 278, '"': 355, '#': 556, '$': 556, '%': 889, '&': 667, "'": 191, '(': 333, ')': 333, '*': 389, '+': 584, ',': 278, '-': 333, '.': 278, '/': 278, '0': 556, '1': 556, '2': 556, '3': 556, '4': 556, '5': 556, '6': 556, '7': 556, '8': 556, '9': 556, ':': 278, ';': 278, '<': 584, '=': 584, '>': 584, '?': 556, '@': 1015, 'A': 667, 'B': 667, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 722, 'I': 278, 'J': 500, 'K': 667, 'L': 556, 'M': 833, 'N': 722, 'O': 778, 'P': 667, 'Q': 778, 'R': 722, 'S': 667, 'T': 611, 'U': 722, 'V': 667, 'W': 944, 'X': 667, 'Y': 667, 'Z': 611, '[': 278, '\\': 278, ']': 278, '^': 469, '_': 556, '`': 333, 'a': 556, 'b': 556, 'c': 500, 'd': 556, 'e': 556, 'f': 278, 'g': 556, 'h': 556, 'i': 222, 'j': 222, 'k': 500, 'l': 222, 'm': 833, 'n': 556, 'o': 556, 'p': 556, 'q': 556, 'r': 333, 's': 500, 't': 278, 'u': 556, 'v': 500, 'w': 722, 'x': 500, 'y': 500, 'z': 500, '{': 334, '|': 260, '}': 334, '~': 584, '\xa1': 333, '\xa2': 556, '\xa3': 556, '\xa4': 556, '\xa5': 556, '\xa6': 260, '\xa7': 556, '\xa8': 333, '\xa9': 737, '\xaa': 370, '\xab': 556, '\xac': 584, '\xae': 737, '\xaf': 333, '\xb0': 400, '\xb1': 584, '\xb2': 333, '\xb3': 333, '\xb4': 333, '\xb5': 556, '\xb6': 537, '\xb7': 278, '\xb8': 333, '\xb9': 333, '\xba': 365, '\xbb': 556, '\xbc': 834, '\xbd': 834, '\xbe': 834, '\xbf': 611, '\xc0': 667, '\xc1': 667, '\xc2': 667, '\xc3': 667, '\xc4': 667, '\xc5': 667, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 278, '\xcd': 278, '\xce': 278, '\xcf': 278, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 584, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 667, '\xde': 667, '\xdf': 611, '\xe0': 556, '\xe1': 556, '\xe2': 556, '\xe3': 556, '\xe4': 556, '\xe5': 556, '\xe6': 889, '\xe7': 500, '\xe8': 556, '\xe9': 556, '\xea': 556, '\xeb': 556, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 556, '\xf1': 556, '\xf2': 556, '\xf3': 556, '\xf4': 556, '\xf5': 556, '\xf6': 556, '\xf7': 584, '\xf8': 611, '\xf9': 556, '\xfa': 556, '\xfb': 556, '\xfc': 556, '\xfd': 500, '\xfe': 556, '\xff': 500, '\u0100': 667, '\u0101': 556, '\u0102': 667, '\u0103': 556, '\u0104': 667, '\u0105': 556, '\u0106': 722, '\u0107': 500, '\u010c': 722, '\u010d': 500, '\u010e': 722, '\u010f': 643, '\u0110': 722, '\u0111': 556, '\u0112': 667, '\u0113': 556, '\u0116': 667, '\u0117': 556, '\u0118': 667, '\u0119': 556, '\u011a': 667, '\u011b': 556, '\u011e': 778, '\u011f': 556, '\u0122': 778, '\u0123': 556, '\u012a': 278, '\u012b': 278, '\u012e': 278, '\u012f': 222, '\u0130': 278, '\u0131': 278, '\u0136': 667, '\u0137': 500, '\u0139': 556, '\u013a': 222, '\u013b': 556, '\u013c': 222, '\u013d': 556, '\u013e': 299, '\u0141': 556, '\u0142': 222, '\u0143': 722, '\u0144': 556, '\u0145': 722, '\u0146': 556, '\u0147': 722, '\u0148': 556, '\u014c': 778, '\u014d': 556, '\u0150': 778, '\u0151': 556, '\u0152': 1000, '\u0153': 944, '\u0154': 722, '\u0155': 333, '\u0156': 722, '\u0157': 333, '\u0158': 722, '\u0159': 333, '\u015a': 667, '\u015b': 500, '\u015e': 667, '\u015f': 500, '\u0160': 667, '\u0161': 500, '\u0162': 611, '\u0163': 278, '\u0164': 611, '\u0165': 317, '\u016a': 722, '\u016b': 556, '\u016e': 722, '\u016f': 556, '\u0170': 722, '\u0171': 556, '\u0172': 722, '\u0173': 556, '\u0178': 667, '\u0179': 611, '\u017a': 500, '\u017b': 611, '\u017c': 500, '\u017d': 611, '\u017e': 500, '\u0192': 556, '\u0218': 667, '\u0219': 500, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 556, '\u2014': 1000, '\u2018': 222, '\u2019': 222, '\u201a': 222, '\u201c': 333, '\u201d': 333, '\u201e': 333, '\u2020': 556, '\u2021': 556, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 476, '\u2206': 612, '\u2211': 600, '\u2212': 584, '\u221a': 453, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 471, '\uf6c3': 250, '\ufb01': 500, '\ufb02': 500}),
- 'Helvetica-Bold': ({'FontName': 'Helvetica-Bold', 'Descent': -207.0, 'FontBBox': (-170.0, -228.0, 1003.0, 962.0), 'FontWeight': 'Bold', 'CapHeight': 718.0, 'FontFamily': 'Helvetica', 'Flags': 0, 'XHeight': 532.0, 'ItalicAngle': 0.0, 'Ascent': 718.0}, {' ': 278, '!': 333, '"': 474, '#': 556, '$': 556, '%': 889, '&': 722, "'": 238, '(': 333, ')': 333, '*': 389, '+': 584, ',': 278, '-': 333, '.': 278, '/': 278, '0': 556, '1': 556, '2': 556, '3': 556, '4': 556, '5': 556, '6': 556, '7': 556, '8': 556, '9': 556, ':': 333, ';': 333, '<': 584, '=': 584, '>': 584, '?': 611, '@': 975, 'A': 722, 'B': 722, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 722, 'I': 278, 'J': 556, 'K': 722, 'L': 611, 'M': 833, 'N': 722, 'O': 778, 'P': 667, 'Q': 778, 'R': 722, 'S': 667, 'T': 611, 'U': 722, 'V': 667, 'W': 944, 'X': 667, 'Y': 667, 'Z': 611, '[': 333, '\\': 278, ']': 333, '^': 584, '_': 556, '`': 333, 'a': 556, 'b': 611, 'c': 556, 'd': 611, 'e': 556, 'f': 333, 'g': 611, 'h': 611, 'i': 278, 'j': 278, 'k': 556, 'l': 278, 'm': 889, 'n': 611, 'o': 611, 'p': 611, 'q': 611, 'r': 389, 's': 556, 't': 333, 'u': 611, 'v': 556, 'w': 778, 'x': 556, 'y': 556, 'z': 500, '{': 389, '|': 280, '}': 389, '~': 584, '\xa1': 333, '\xa2': 556, '\xa3': 556, '\xa4': 556, '\xa5': 556, '\xa6': 280, '\xa7': 556, '\xa8': 333, '\xa9': 737, '\xaa': 370, '\xab': 556, '\xac': 584, '\xae': 737, '\xaf': 333, '\xb0': 400, '\xb1': 584, '\xb2': 333, '\xb3': 333, '\xb4': 333, '\xb5': 611, '\xb6': 556, '\xb7': 278, '\xb8': 333, '\xb9': 333, '\xba': 365, '\xbb': 556, '\xbc': 834, '\xbd': 834, '\xbe': 834, '\xbf': 611, '\xc0': 722, '\xc1': 722, '\xc2': 722, '\xc3': 722, '\xc4': 722, '\xc5': 722, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 278, '\xcd': 278, '\xce': 278, '\xcf': 278, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 584, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 667, '\xde': 667, '\xdf': 611, '\xe0': 556, '\xe1': 556, '\xe2': 556, '\xe3': 556, '\xe4': 556, '\xe5': 556, '\xe6': 889, '\xe7': 556, '\xe8': 556, '\xe9': 556, '\xea': 556, '\xeb': 556, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 611, '\xf1': 611, '\xf2': 611, '\xf3': 611, '\xf4': 611, '\xf5': 611, '\xf6': 611, '\xf7': 584, '\xf8': 611, '\xf9': 611, '\xfa': 611, '\xfb': 611, '\xfc': 611, '\xfd': 556, '\xfe': 611, '\xff': 556, '\u0100': 722, '\u0101': 556, '\u0102': 722, '\u0103': 556, '\u0104': 722, '\u0105': 556, '\u0106': 722, '\u0107': 556, '\u010c': 722, '\u010d': 556, '\u010e': 722, '\u010f': 743, '\u0110': 722, '\u0111': 611, '\u0112': 667, '\u0113': 556, '\u0116': 667, '\u0117': 556, '\u0118': 667, '\u0119': 556, '\u011a': 667, '\u011b': 556, '\u011e': 778, '\u011f': 611, '\u0122': 778, '\u0123': 611, '\u012a': 278, '\u012b': 278, '\u012e': 278, '\u012f': 278, '\u0130': 278, '\u0131': 278, '\u0136': 722, '\u0137': 556, '\u0139': 611, '\u013a': 278, '\u013b': 611, '\u013c': 278, '\u013d': 611, '\u013e': 400, '\u0141': 611, '\u0142': 278, '\u0143': 722, '\u0144': 611, '\u0145': 722, '\u0146': 611, '\u0147': 722, '\u0148': 611, '\u014c': 778, '\u014d': 611, '\u0150': 778, '\u0151': 611, '\u0152': 1000, '\u0153': 944, '\u0154': 722, '\u0155': 389, '\u0156': 722, '\u0157': 389, '\u0158': 722, '\u0159': 389, '\u015a': 667, '\u015b': 556, '\u015e': 667, '\u015f': 556, '\u0160': 667, '\u0161': 556, '\u0162': 611, '\u0163': 333, '\u0164': 611, '\u0165': 389, '\u016a': 722, '\u016b': 611, '\u016e': 722, '\u016f': 611, '\u0170': 722, '\u0171': 611, '\u0172': 722, '\u0173': 611, '\u0178': 667, '\u0179': 611, '\u017a': 500, '\u017b': 611, '\u017c': 500, '\u017d': 611, '\u017e': 500, '\u0192': 556, '\u0218': 667, '\u0219': 556, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 556, '\u2014': 1000, '\u2018': 278, '\u2019': 278, '\u201a': 278, '\u201c': 500, '\u201d': 500, '\u201e': 500, '\u2020': 556, '\u2021': 556, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 494, '\u2206': 612, '\u2211': 600, '\u2212': 584, '\u221a': 549, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 494, '\uf6c3': 250, '\ufb01': 611, '\ufb02': 611}),
- 'Helvetica-BoldOblique': ({'FontName': 'Helvetica-BoldOblique', 'Descent': -207.0, 'FontBBox': (-175.0, -228.0, 1114.0, 962.0), 'FontWeight': 'Bold', 'CapHeight': 718.0, 'FontFamily': 'Helvetica', 'Flags': 0, 'XHeight': 532.0, 'ItalicAngle': -12.0, 'Ascent': 718.0}, {' ': 278, '!': 333, '"': 474, '#': 556, '$': 556, '%': 889, '&': 722, "'": 238, '(': 333, ')': 333, '*': 389, '+': 584, ',': 278, '-': 333, '.': 278, '/': 278, '0': 556, '1': 556, '2': 556, '3': 556, '4': 556, '5': 556, '6': 556, '7': 556, '8': 556, '9': 556, ':': 333, ';': 333, '<': 584, '=': 584, '>': 584, '?': 611, '@': 975, 'A': 722, 'B': 722, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 722, 'I': 278, 'J': 556, 'K': 722, 'L': 611, 'M': 833, 'N': 722, 'O': 778, 'P': 667, 'Q': 778, 'R': 722, 'S': 667, 'T': 611, 'U': 722, 'V': 667, 'W': 944, 'X': 667, 'Y': 667, 'Z': 611, '[': 333, '\\': 278, ']': 333, '^': 584, '_': 556, '`': 333, 'a': 556, 'b': 611, 'c': 556, 'd': 611, 'e': 556, 'f': 333, 'g': 611, 'h': 611, 'i': 278, 'j': 278, 'k': 556, 'l': 278, 'm': 889, 'n': 611, 'o': 611, 'p': 611, 'q': 611, 'r': 389, 's': 556, 't': 333, 'u': 611, 'v': 556, 'w': 778, 'x': 556, 'y': 556, 'z': 500, '{': 389, '|': 280, '}': 389, '~': 584, '\xa1': 333, '\xa2': 556, '\xa3': 556, '\xa4': 556, '\xa5': 556, '\xa6': 280, '\xa7': 556, '\xa8': 333, '\xa9': 737, '\xaa': 370, '\xab': 556, '\xac': 584, '\xae': 737, '\xaf': 333, '\xb0': 400, '\xb1': 584, '\xb2': 333, '\xb3': 333, '\xb4': 333, '\xb5': 611, '\xb6': 556, '\xb7': 278, '\xb8': 333, '\xb9': 333, '\xba': 365, '\xbb': 556, '\xbc': 834, '\xbd': 834, '\xbe': 834, '\xbf': 611, '\xc0': 722, '\xc1': 722, '\xc2': 722, '\xc3': 722, '\xc4': 722, '\xc5': 722, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 278, '\xcd': 278, '\xce': 278, '\xcf': 278, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 584, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 667, '\xde': 667, '\xdf': 611, '\xe0': 556, '\xe1': 556, '\xe2': 556, '\xe3': 556, '\xe4': 556, '\xe5': 556, '\xe6': 889, '\xe7': 556, '\xe8': 556, '\xe9': 556, '\xea': 556, '\xeb': 556, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 611, '\xf1': 611, '\xf2': 611, '\xf3': 611, '\xf4': 611, '\xf5': 611, '\xf6': 611, '\xf7': 584, '\xf8': 611, '\xf9': 611, '\xfa': 611, '\xfb': 611, '\xfc': 611, '\xfd': 556, '\xfe': 611, '\xff': 556, '\u0100': 722, '\u0101': 556, '\u0102': 722, '\u0103': 556, '\u0104': 722, '\u0105': 556, '\u0106': 722, '\u0107': 556, '\u010c': 722, '\u010d': 556, '\u010e': 722, '\u010f': 743, '\u0110': 722, '\u0111': 611, '\u0112': 667, '\u0113': 556, '\u0116': 667, '\u0117': 556, '\u0118': 667, '\u0119': 556, '\u011a': 667, '\u011b': 556, '\u011e': 778, '\u011f': 611, '\u0122': 778, '\u0123': 611, '\u012a': 278, '\u012b': 278, '\u012e': 278, '\u012f': 278, '\u0130': 278, '\u0131': 278, '\u0136': 722, '\u0137': 556, '\u0139': 611, '\u013a': 278, '\u013b': 611, '\u013c': 278, '\u013d': 611, '\u013e': 400, '\u0141': 611, '\u0142': 278, '\u0143': 722, '\u0144': 611, '\u0145': 722, '\u0146': 611, '\u0147': 722, '\u0148': 611, '\u014c': 778, '\u014d': 611, '\u0150': 778, '\u0151': 611, '\u0152': 1000, '\u0153': 944, '\u0154': 722, '\u0155': 389, '\u0156': 722, '\u0157': 389, '\u0158': 722, '\u0159': 389, '\u015a': 667, '\u015b': 556, '\u015e': 667, '\u015f': 556, '\u0160': 667, '\u0161': 556, '\u0162': 611, '\u0163': 333, '\u0164': 611, '\u0165': 389, '\u016a': 722, '\u016b': 611, '\u016e': 722, '\u016f': 611, '\u0170': 722, '\u0171': 611, '\u0172': 722, '\u0173': 611, '\u0178': 667, '\u0179': 611, '\u017a': 500, '\u017b': 611, '\u017c': 500, '\u017d': 611, '\u017e': 500, '\u0192': 556, '\u0218': 667, '\u0219': 556, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 556, '\u2014': 1000, '\u2018': 278, '\u2019': 278, '\u201a': 278, '\u201c': 500, '\u201d': 500, '\u201e': 500, '\u2020': 556, '\u2021': 556, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 494, '\u2206': 612, '\u2211': 600, '\u2212': 584, '\u221a': 549, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 494, '\uf6c3': 250, '\ufb01': 611, '\ufb02': 611}),
- 'Helvetica-Oblique': ({'FontName': 'Helvetica-Oblique', 'Descent': -207.0, 'FontBBox': (-171.0, -225.0, 1116.0, 931.0), 'FontWeight': 'Medium', 'CapHeight': 718.0, 'FontFamily': 'Helvetica', 'Flags': 0, 'XHeight': 523.0, 'ItalicAngle': -12.0, 'Ascent': 718.0}, {' ': 278, '!': 278, '"': 355, '#': 556, '$': 556, '%': 889, '&': 667, "'": 191, '(': 333, ')': 333, '*': 389, '+': 584, ',': 278, '-': 333, '.': 278, '/': 278, '0': 556, '1': 556, '2': 556, '3': 556, '4': 556, '5': 556, '6': 556, '7': 556, '8': 556, '9': 556, ':': 278, ';': 278, '<': 584, '=': 584, '>': 584, '?': 556, '@': 1015, 'A': 667, 'B': 667, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 722, 'I': 278, 'J': 500, 'K': 667, 'L': 556, 'M': 833, 'N': 722, 'O': 778, 'P': 667, 'Q': 778, 'R': 722, 'S': 667, 'T': 611, 'U': 722, 'V': 667, 'W': 944, 'X': 667, 'Y': 667, 'Z': 611, '[': 278, '\\': 278, ']': 278, '^': 469, '_': 556, '`': 333, 'a': 556, 'b': 556, 'c': 500, 'd': 556, 'e': 556, 'f': 278, 'g': 556, 'h': 556, 'i': 222, 'j': 222, 'k': 500, 'l': 222, 'm': 833, 'n': 556, 'o': 556, 'p': 556, 'q': 556, 'r': 333, 's': 500, 't': 278, 'u': 556, 'v': 500, 'w': 722, 'x': 500, 'y': 500, 'z': 500, '{': 334, '|': 260, '}': 334, '~': 584, '\xa1': 333, '\xa2': 556, '\xa3': 556, '\xa4': 556, '\xa5': 556, '\xa6': 260, '\xa7': 556, '\xa8': 333, '\xa9': 737, '\xaa': 370, '\xab': 556, '\xac': 584, '\xae': 737, '\xaf': 333, '\xb0': 400, '\xb1': 584, '\xb2': 333, '\xb3': 333, '\xb4': 333, '\xb5': 556, '\xb6': 537, '\xb7': 278, '\xb8': 333, '\xb9': 333, '\xba': 365, '\xbb': 556, '\xbc': 834, '\xbd': 834, '\xbe': 834, '\xbf': 611, '\xc0': 667, '\xc1': 667, '\xc2': 667, '\xc3': 667, '\xc4': 667, '\xc5': 667, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 278, '\xcd': 278, '\xce': 278, '\xcf': 278, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 584, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 667, '\xde': 667, '\xdf': 611, '\xe0': 556, '\xe1': 556, '\xe2': 556, '\xe3': 556, '\xe4': 556, '\xe5': 556, '\xe6': 889, '\xe7': 500, '\xe8': 556, '\xe9': 556, '\xea': 556, '\xeb': 556, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 556, '\xf1': 556, '\xf2': 556, '\xf3': 556, '\xf4': 556, '\xf5': 556, '\xf6': 556, '\xf7': 584, '\xf8': 611, '\xf9': 556, '\xfa': 556, '\xfb': 556, '\xfc': 556, '\xfd': 500, '\xfe': 556, '\xff': 500, '\u0100': 667, '\u0101': 556, '\u0102': 667, '\u0103': 556, '\u0104': 667, '\u0105': 556, '\u0106': 722, '\u0107': 500, '\u010c': 722, '\u010d': 500, '\u010e': 722, '\u010f': 643, '\u0110': 722, '\u0111': 556, '\u0112': 667, '\u0113': 556, '\u0116': 667, '\u0117': 556, '\u0118': 667, '\u0119': 556, '\u011a': 667, '\u011b': 556, '\u011e': 778, '\u011f': 556, '\u0122': 778, '\u0123': 556, '\u012a': 278, '\u012b': 278, '\u012e': 278, '\u012f': 222, '\u0130': 278, '\u0131': 278, '\u0136': 667, '\u0137': 500, '\u0139': 556, '\u013a': 222, '\u013b': 556, '\u013c': 222, '\u013d': 556, '\u013e': 299, '\u0141': 556, '\u0142': 222, '\u0143': 722, '\u0144': 556, '\u0145': 722, '\u0146': 556, '\u0147': 722, '\u0148': 556, '\u014c': 778, '\u014d': 556, '\u0150': 778, '\u0151': 556, '\u0152': 1000, '\u0153': 944, '\u0154': 722, '\u0155': 333, '\u0156': 722, '\u0157': 333, '\u0158': 722, '\u0159': 333, '\u015a': 667, '\u015b': 500, '\u015e': 667, '\u015f': 500, '\u0160': 667, '\u0161': 500, '\u0162': 611, '\u0163': 278, '\u0164': 611, '\u0165': 317, '\u016a': 722, '\u016b': 556, '\u016e': 722, '\u016f': 556, '\u0170': 722, '\u0171': 556, '\u0172': 722, '\u0173': 556, '\u0178': 667, '\u0179': 611, '\u017a': 500, '\u017b': 611, '\u017c': 500, '\u017d': 611, '\u017e': 500, '\u0192': 556, '\u0218': 667, '\u0219': 500, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 556, '\u2014': 1000, '\u2018': 222, '\u2019': 222, '\u201a': 222, '\u201c': 333, '\u201d': 333, '\u201e': 333, '\u2020': 556, '\u2021': 556, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 476, '\u2206': 612, '\u2211': 600, '\u2212': 584, '\u221a': 453, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 471, '\uf6c3': 250, '\ufb01': 500, '\ufb02': 500}),
- 'Symbol': ({'FontName': 'Symbol', 'FontBBox': (-180.0, -293.0, 1090.0, 1010.0), 'FontWeight': 'Medium', 'FontFamily': 'Symbol', 'Flags': 0, 'ItalicAngle': 0.0}, {' ': 250, '!': 333, '#': 500, '%': 833, '&': 778, '(': 333, ')': 333, '+': 549, ',': 250, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 278, ';': 278, '<': 549, '=': 549, '>': 549, '?': 444, '[': 333, ']': 333, '_': 500, '{': 480, '|': 200, '}': 480, '\xac': 713, '\xb0': 400, '\xb1': 549, '\xb5': 576, '\xd7': 549, '\xf7': 549, '\u0192': 500, '\u0391': 722, '\u0392': 667, '\u0393': 603, '\u0395': 611, '\u0396': 611, '\u0397': 722, '\u0398': 741, '\u0399': 333, '\u039a': 722, '\u039b': 686, '\u039c': 889, '\u039d': 722, '\u039e': 645, '\u039f': 722, '\u03a0': 768, '\u03a1': 556, '\u03a3': 592, '\u03a4': 611, '\u03a5': 690, '\u03a6': 763, '\u03a7': 722, '\u03a8': 795, '\u03b1': 631, '\u03b2': 549, '\u03b3': 411, '\u03b4': 494, '\u03b5': 439, '\u03b6': 494, '\u03b7': 603, '\u03b8': 521, '\u03b9': 329, '\u03ba': 549, '\u03bb': 549, '\u03bd': 521, '\u03be': 493, '\u03bf': 549, '\u03c0': 549, '\u03c1': 549, '\u03c2': 439, '\u03c3': 603, '\u03c4': 439, '\u03c5': 576, '\u03c6': 521, '\u03c7': 549, '\u03c8': 686, '\u03c9': 686, '\u03d1': 631, '\u03d2': 620, '\u03d5': 603, '\u03d6': 713, '\u2022': 460, '\u2026': 1000, '\u2032': 247, '\u2033': 411, '\u2044': 167, '\u20ac': 750, '\u2111': 686, '\u2118': 987, '\u211c': 795, '\u2126': 768, '\u2135': 823, '\u2190': 987, '\u2191': 603, '\u2192': 987, '\u2193': 603, '\u2194': 1042, '\u21b5': 658, '\u21d0': 987, '\u21d1': 603, '\u21d2': 987, '\u21d3': 603, '\u21d4': 1042, '\u2200': 713, '\u2202': 494, '\u2203': 549, '\u2205': 823, '\u2206': 612, '\u2207': 713, '\u2208': 713, '\u2209': 713, '\u220b': 439, '\u220f': 823, '\u2211': 713, '\u2212': 549, '\u2217': 500, '\u221a': 549, '\u221d': 713, '\u221e': 713, '\u2220': 768, '\u2227': 603, '\u2228': 603, '\u2229': 768, '\u222a': 768, '\u222b': 274, '\u2234': 863, '\u223c': 549, '\u2245': 549, '\u2248': 549, '\u2260': 549, '\u2261': 549, '\u2264': 549, '\u2265': 549, '\u2282': 713, '\u2283': 713, '\u2284': 713, '\u2286': 713, '\u2287': 713, '\u2295': 768, '\u2297': 768, '\u22a5': 658, '\u22c5': 250, '\u2320': 686, '\u2321': 686, '\u2329': 329, '\u232a': 329, '\u25ca': 494, '\u2660': 753, '\u2663': 753, '\u2665': 753, '\u2666': 753, '\uf6d9': 790, '\uf6da': 790, '\uf6db': 890, '\uf8e5': 500, '\uf8e6': 603, '\uf8e7': 1000, '\uf8e8': 790, '\uf8e9': 790, '\uf8ea': 786, '\uf8eb': 384, '\uf8ec': 384, '\uf8ed': 384, '\uf8ee': 384, '\uf8ef': 384, '\uf8f0': 384, '\uf8f1': 494, '\uf8f2': 494, '\uf8f3': 494, '\uf8f4': 494, '\uf8f5': 686, '\uf8f6': 384, '\uf8f7': 384, '\uf8f8': 384, '\uf8f9': 384, '\uf8fa': 384, '\uf8fb': 384, '\uf8fc': 494, '\uf8fd': 494, '\uf8fe': 494, '\uf8ff': 790}),
- 'Times-Bold': ({'FontName': 'Times-Bold', 'Descent': -217.0, 'FontBBox': (-168.0, -218.0, 1000.0, 935.0), 'FontWeight': 'Bold', 'CapHeight': 676.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 461.0, 'ItalicAngle': 0.0, 'Ascent': 683.0}, {' ': 250, '!': 333, '"': 555, '#': 500, '$': 500, '%': 1000, '&': 833, "'": 278, '(': 333, ')': 333, '*': 500, '+': 570, ',': 250, '-': 333, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 333, ';': 333, '<': 570, '=': 570, '>': 570, '?': 500, '@': 930, 'A': 722, 'B': 667, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 778, 'I': 389, 'J': 500, 'K': 778, 'L': 667, 'M': 944, 'N': 722, 'O': 778, 'P': 611, 'Q': 778, 'R': 722, 'S': 556, 'T': 667, 'U': 722, 'V': 722, 'W': 1000, 'X': 722, 'Y': 722, 'Z': 667, '[': 333, '\\': 278, ']': 333, '^': 581, '_': 500, '`': 333, 'a': 500, 'b': 556, 'c': 444, 'd': 556, 'e': 444, 'f': 333, 'g': 500, 'h': 556, 'i': 278, 'j': 333, 'k': 556, 'l': 278, 'm': 833, 'n': 556, 'o': 500, 'p': 556, 'q': 556, 'r': 444, 's': 389, 't': 333, 'u': 556, 'v': 500, 'w': 722, 'x': 500, 'y': 500, 'z': 444, '{': 394, '|': 220, '}': 394, '~': 520, '\xa1': 333, '\xa2': 500, '\xa3': 500, '\xa4': 500, '\xa5': 500, '\xa6': 220, '\xa7': 500, '\xa8': 333, '\xa9': 747, '\xaa': 300, '\xab': 500, '\xac': 570, '\xae': 747, '\xaf': 333, '\xb0': 400, '\xb1': 570, '\xb2': 300, '\xb3': 300, '\xb4': 333, '\xb5': 556, '\xb6': 540, '\xb7': 250, '\xb8': 333, '\xb9': 300, '\xba': 330, '\xbb': 500, '\xbc': 750, '\xbd': 750, '\xbe': 750, '\xbf': 500, '\xc0': 722, '\xc1': 722, '\xc2': 722, '\xc3': 722, '\xc4': 722, '\xc5': 722, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 389, '\xcd': 389, '\xce': 389, '\xcf': 389, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 570, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 722, '\xde': 611, '\xdf': 556, '\xe0': 500, '\xe1': 500, '\xe2': 500, '\xe3': 500, '\xe4': 500, '\xe5': 500, '\xe6': 722, '\xe7': 444, '\xe8': 444, '\xe9': 444, '\xea': 444, '\xeb': 444, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 500, '\xf1': 556, '\xf2': 500, '\xf3': 500, '\xf4': 500, '\xf5': 500, '\xf6': 500, '\xf7': 570, '\xf8': 500, '\xf9': 556, '\xfa': 556, '\xfb': 556, '\xfc': 556, '\xfd': 500, '\xfe': 556, '\xff': 500, '\u0100': 722, '\u0101': 500, '\u0102': 722, '\u0103': 500, '\u0104': 722, '\u0105': 500, '\u0106': 722, '\u0107': 444, '\u010c': 722, '\u010d': 444, '\u010e': 722, '\u010f': 672, '\u0110': 722, '\u0111': 556, '\u0112': 667, '\u0113': 444, '\u0116': 667, '\u0117': 444, '\u0118': 667, '\u0119': 444, '\u011a': 667, '\u011b': 444, '\u011e': 778, '\u011f': 500, '\u0122': 778, '\u0123': 500, '\u012a': 389, '\u012b': 278, '\u012e': 389, '\u012f': 278, '\u0130': 389, '\u0131': 278, '\u0136': 778, '\u0137': 556, '\u0139': 667, '\u013a': 278, '\u013b': 667, '\u013c': 278, '\u013d': 667, '\u013e': 394, '\u0141': 667, '\u0142': 278, '\u0143': 722, '\u0144': 556, '\u0145': 722, '\u0146': 556, '\u0147': 722, '\u0148': 556, '\u014c': 778, '\u014d': 500, '\u0150': 778, '\u0151': 500, '\u0152': 1000, '\u0153': 722, '\u0154': 722, '\u0155': 444, '\u0156': 722, '\u0157': 444, '\u0158': 722, '\u0159': 444, '\u015a': 556, '\u015b': 389, '\u015e': 556, '\u015f': 389, '\u0160': 556, '\u0161': 389, '\u0162': 667, '\u0163': 333, '\u0164': 667, '\u0165': 416, '\u016a': 722, '\u016b': 556, '\u016e': 722, '\u016f': 556, '\u0170': 722, '\u0171': 556, '\u0172': 722, '\u0173': 556, '\u0178': 722, '\u0179': 667, '\u017a': 444, '\u017b': 667, '\u017c': 444, '\u017d': 667, '\u017e': 444, '\u0192': 500, '\u0218': 556, '\u0219': 389, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 500, '\u2014': 1000, '\u2018': 333, '\u2019': 333, '\u201a': 333, '\u201c': 500, '\u201d': 500, '\u201e': 500, '\u2020': 500, '\u2021': 500, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 494, '\u2206': 612, '\u2211': 600, '\u2212': 570, '\u221a': 549, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 494, '\uf6c3': 250, '\ufb01': 556, '\ufb02': 556}),
- 'Times-BoldItalic': ({'FontName': 'Times-BoldItalic', 'Descent': -217.0, 'FontBBox': (-200.0, -218.0, 996.0, 921.0), 'FontWeight': 'Bold', 'CapHeight': 669.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 462.0, 'ItalicAngle': -15.0, 'Ascent': 683.0}, {' ': 250, '!': 389, '"': 555, '#': 500, '$': 500, '%': 833, '&': 778, "'": 278, '(': 333, ')': 333, '*': 500, '+': 570, ',': 250, '-': 333, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 333, ';': 333, '<': 570, '=': 570, '>': 570, '?': 500, '@': 832, 'A': 667, 'B': 667, 'C': 667, 'D': 722, 'E': 667, 'F': 667, 'G': 722, 'H': 778, 'I': 389, 'J': 500, 'K': 667, 'L': 611, 'M': 889, 'N': 722, 'O': 722, 'P': 611, 'Q': 722, 'R': 667, 'S': 556, 'T': 611, 'U': 722, 'V': 667, 'W': 889, 'X': 667, 'Y': 611, 'Z': 611, '[': 333, '\\': 278, ']': 333, '^': 570, '_': 500, '`': 333, 'a': 500, 'b': 500, 'c': 444, 'd': 500, 'e': 444, 'f': 333, 'g': 500, 'h': 556, 'i': 278, 'j': 278, 'k': 500, 'l': 278, 'm': 778, 'n': 556, 'o': 500, 'p': 500, 'q': 500, 'r': 389, 's': 389, 't': 278, 'u': 556, 'v': 444, 'w': 667, 'x': 500, 'y': 444, 'z': 389, '{': 348, '|': 220, '}': 348, '~': 570, '\xa1': 389, '\xa2': 500, '\xa3': 500, '\xa4': 500, '\xa5': 500, '\xa6': 220, '\xa7': 500, '\xa8': 333, '\xa9': 747, '\xaa': 266, '\xab': 500, '\xac': 606, '\xae': 747, '\xaf': 333, '\xb0': 400, '\xb1': 570, '\xb2': 300, '\xb3': 300, '\xb4': 333, '\xb5': 576, '\xb6': 500, '\xb7': 250, '\xb8': 333, '\xb9': 300, '\xba': 300, '\xbb': 500, '\xbc': 750, '\xbd': 750, '\xbe': 750, '\xbf': 500, '\xc0': 667, '\xc1': 667, '\xc2': 667, '\xc3': 667, '\xc4': 667, '\xc5': 667, '\xc6': 944, '\xc7': 667, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 389, '\xcd': 389, '\xce': 389, '\xcf': 389, '\xd0': 722, '\xd1': 722, '\xd2': 722, '\xd3': 722, '\xd4': 722, '\xd5': 722, '\xd6': 722, '\xd7': 570, '\xd8': 722, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 611, '\xde': 611, '\xdf': 500, '\xe0': 500, '\xe1': 500, '\xe2': 500, '\xe3': 500, '\xe4': 500, '\xe5': 500, '\xe6': 722, '\xe7': 444, '\xe8': 444, '\xe9': 444, '\xea': 444, '\xeb': 444, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 500, '\xf1': 556, '\xf2': 500, '\xf3': 500, '\xf4': 500, '\xf5': 500, '\xf6': 500, '\xf7': 570, '\xf8': 500, '\xf9': 556, '\xfa': 556, '\xfb': 556, '\xfc': 556, '\xfd': 444, '\xfe': 500, '\xff': 444, '\u0100': 667, '\u0101': 500, '\u0102': 667, '\u0103': 500, '\u0104': 667, '\u0105': 500, '\u0106': 667, '\u0107': 444, '\u010c': 667, '\u010d': 444, '\u010e': 722, '\u010f': 608, '\u0110': 722, '\u0111': 500, '\u0112': 667, '\u0113': 444, '\u0116': 667, '\u0117': 444, '\u0118': 667, '\u0119': 444, '\u011a': 667, '\u011b': 444, '\u011e': 722, '\u011f': 500, '\u0122': 722, '\u0123': 500, '\u012a': 389, '\u012b': 278, '\u012e': 389, '\u012f': 278, '\u0130': 389, '\u0131': 278, '\u0136': 667, '\u0137': 500, '\u0139': 611, '\u013a': 278, '\u013b': 611, '\u013c': 278, '\u013d': 611, '\u013e': 382, '\u0141': 611, '\u0142': 278, '\u0143': 722, '\u0144': 556, '\u0145': 722, '\u0146': 556, '\u0147': 722, '\u0148': 556, '\u014c': 722, '\u014d': 500, '\u0150': 722, '\u0151': 500, '\u0152': 944, '\u0153': 722, '\u0154': 667, '\u0155': 389, '\u0156': 667, '\u0157': 389, '\u0158': 667, '\u0159': 389, '\u015a': 556, '\u015b': 389, '\u015e': 556, '\u015f': 389, '\u0160': 556, '\u0161': 389, '\u0162': 611, '\u0163': 278, '\u0164': 611, '\u0165': 366, '\u016a': 722, '\u016b': 556, '\u016e': 722, '\u016f': 556, '\u0170': 722, '\u0171': 556, '\u0172': 722, '\u0173': 556, '\u0178': 611, '\u0179': 611, '\u017a': 389, '\u017b': 611, '\u017c': 389, '\u017d': 611, '\u017e': 389, '\u0192': 500, '\u0218': 556, '\u0219': 389, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 500, '\u2014': 1000, '\u2018': 333, '\u2019': 333, '\u201a': 333, '\u201c': 500, '\u201d': 500, '\u201e': 500, '\u2020': 500, '\u2021': 500, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 494, '\u2206': 612, '\u2211': 600, '\u2212': 606, '\u221a': 549, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 494, '\uf6c3': 250, '\ufb01': 556, '\ufb02': 556}),
- 'Times-Italic': ({'FontName': 'Times-Italic', 'Descent': -217.0, 'FontBBox': (-169.0, -217.0, 1010.0, 883.0), 'FontWeight': 'Medium', 'CapHeight': 653.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 441.0, 'ItalicAngle': -15.5, 'Ascent': 683.0}, {' ': 250, '!': 333, '"': 420, '#': 500, '$': 500, '%': 833, '&': 778, "'": 214, '(': 333, ')': 333, '*': 500, '+': 675, ',': 250, '-': 333, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 333, ';': 333, '<': 675, '=': 675, '>': 675, '?': 500, '@': 920, 'A': 611, 'B': 611, 'C': 667, 'D': 722, 'E': 611, 'F': 611, 'G': 722, 'H': 722, 'I': 333, 'J': 444, 'K': 667, 'L': 556, 'M': 833, 'N': 667, 'O': 722, 'P': 611, 'Q': 722, 'R': 611, 'S': 500, 'T': 556, 'U': 722, 'V': 611, 'W': 833, 'X': 611, 'Y': 556, 'Z': 556, '[': 389, '\\': 278, ']': 389, '^': 422, '_': 500, '`': 333, 'a': 500, 'b': 500, 'c': 444, 'd': 500, 'e': 444, 'f': 278, 'g': 500, 'h': 500, 'i': 278, 'j': 278, 'k': 444, 'l': 278, 'm': 722, 'n': 500, 'o': 500, 'p': 500, 'q': 500, 'r': 389, 's': 389, 't': 278, 'u': 500, 'v': 444, 'w': 667, 'x': 444, 'y': 444, 'z': 389, '{': 400, '|': 275, '}': 400, '~': 541, '\xa1': 389, '\xa2': 500, '\xa3': 500, '\xa4': 500, '\xa5': 500, '\xa6': 275, '\xa7': 500, '\xa8': 333, '\xa9': 760, '\xaa': 276, '\xab': 500, '\xac': 675, '\xae': 760, '\xaf': 333, '\xb0': 400, '\xb1': 675, '\xb2': 300, '\xb3': 300, '\xb4': 333, '\xb5': 500, '\xb6': 523, '\xb7': 250, '\xb8': 333, '\xb9': 300, '\xba': 310, '\xbb': 500, '\xbc': 750, '\xbd': 750, '\xbe': 750, '\xbf': 500, '\xc0': 611, '\xc1': 611, '\xc2': 611, '\xc3': 611, '\xc4': 611, '\xc5': 611, '\xc6': 889, '\xc7': 667, '\xc8': 611, '\xc9': 611, '\xca': 611, '\xcb': 611, '\xcc': 333, '\xcd': 333, '\xce': 333, '\xcf': 333, '\xd0': 722, '\xd1': 667, '\xd2': 722, '\xd3': 722, '\xd4': 722, '\xd5': 722, '\xd6': 722, '\xd7': 675, '\xd8': 722, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 556, '\xde': 611, '\xdf': 500, '\xe0': 500, '\xe1': 500, '\xe2': 500, '\xe3': 500, '\xe4': 500, '\xe5': 500, '\xe6': 667, '\xe7': 444, '\xe8': 444, '\xe9': 444, '\xea': 444, '\xeb': 444, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 500, '\xf1': 500, '\xf2': 500, '\xf3': 500, '\xf4': 500, '\xf5': 500, '\xf6': 500, '\xf7': 675, '\xf8': 500, '\xf9': 500, '\xfa': 500, '\xfb': 500, '\xfc': 500, '\xfd': 444, '\xfe': 500, '\xff': 444, '\u0100': 611, '\u0101': 500, '\u0102': 611, '\u0103': 500, '\u0104': 611, '\u0105': 500, '\u0106': 667, '\u0107': 444, '\u010c': 667, '\u010d': 444, '\u010e': 722, '\u010f': 544, '\u0110': 722, '\u0111': 500, '\u0112': 611, '\u0113': 444, '\u0116': 611, '\u0117': 444, '\u0118': 611, '\u0119': 444, '\u011a': 611, '\u011b': 444, '\u011e': 722, '\u011f': 500, '\u0122': 722, '\u0123': 500, '\u012a': 333, '\u012b': 278, '\u012e': 333, '\u012f': 278, '\u0130': 333, '\u0131': 278, '\u0136': 667, '\u0137': 444, '\u0139': 556, '\u013a': 278, '\u013b': 556, '\u013c': 278, '\u013d': 611, '\u013e': 300, '\u0141': 556, '\u0142': 278, '\u0143': 667, '\u0144': 500, '\u0145': 667, '\u0146': 500, '\u0147': 667, '\u0148': 500, '\u014c': 722, '\u014d': 500, '\u0150': 722, '\u0151': 500, '\u0152': 944, '\u0153': 667, '\u0154': 611, '\u0155': 389, '\u0156': 611, '\u0157': 389, '\u0158': 611, '\u0159': 389, '\u015a': 500, '\u015b': 389, '\u015e': 500, '\u015f': 389, '\u0160': 500, '\u0161': 389, '\u0162': 556, '\u0163': 278, '\u0164': 556, '\u0165': 300, '\u016a': 722, '\u016b': 500, '\u016e': 722, '\u016f': 500, '\u0170': 722, '\u0171': 500, '\u0172': 722, '\u0173': 500, '\u0178': 556, '\u0179': 556, '\u017a': 389, '\u017b': 556, '\u017c': 389, '\u017d': 556, '\u017e': 389, '\u0192': 500, '\u0218': 500, '\u0219': 389, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 500, '\u2014': 889, '\u2018': 333, '\u2019': 333, '\u201a': 333, '\u201c': 556, '\u201d': 556, '\u201e': 556, '\u2020': 500, '\u2021': 500, '\u2022': 350, '\u2026': 889, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 980, '\u2202': 476, '\u2206': 612, '\u2211': 600, '\u2212': 675, '\u221a': 453, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 471, '\uf6c3': 250, '\ufb01': 500, '\ufb02': 500}),
- 'Times-Roman': ({'FontName': 'Times-Roman', 'Descent': -217.0, 'FontBBox': (-168.0, -218.0, 1000.0, 898.0), 'FontWeight': 'Roman', 'CapHeight': 662.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 450.0, 'ItalicAngle': 0.0, 'Ascent': 683.0}, {' ': 250, '!': 333, '"': 408, '#': 500, '$': 500, '%': 833, '&': 778, "'": 180, '(': 333, ')': 333, '*': 500, '+': 564, ',': 250, '-': 333, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 278, ';': 278, '<': 564, '=': 564, '>': 564, '?': 444, '@': 921, 'A': 722, 'B': 667, 'C': 667, 'D': 722, 'E': 611, 'F': 556, 'G': 722, 'H': 722, 'I': 333, 'J': 389, 'K': 722, 'L': 611, 'M': 889, 'N': 722, 'O': 722, 'P': 556, 'Q': 722, 'R': 667, 'S': 556, 'T': 611, 'U': 722, 'V': 722, 'W': 944, 'X': 722, 'Y': 722, 'Z': 611, '[': 333, '\\': 278, ']': 333, '^': 469, '_': 500, '`': 333, 'a': 444, 'b': 500, 'c': 444, 'd': 500, 'e': 444, 'f': 333, 'g': 500, 'h': 500, 'i': 278, 'j': 278, 'k': 500, 'l': 278, 'm': 778, 'n': 500, 'o': 500, 'p': 500, 'q': 500, 'r': 333, 's': 389, 't': 278, 'u': 500, 'v': 500, 'w': 722, 'x': 500, 'y': 500, 'z': 444, '{': 480, '|': 200, '}': 480, '~': 541, '\xa1': 333, '\xa2': 500, '\xa3': 500, '\xa4': 500, '\xa5': 500, '\xa6': 200, '\xa7': 500, '\xa8': 333, '\xa9': 760, '\xaa': 276, '\xab': 500, '\xac': 564, '\xae': 760, '\xaf': 333, '\xb0': 400, '\xb1': 564, '\xb2': 300, '\xb3': 300, '\xb4': 333, '\xb5': 500, '\xb6': 453, '\xb7': 250, '\xb8': 333, '\xb9': 300, '\xba': 310, '\xbb': 500, '\xbc': 750, '\xbd': 750, '\xbe': 750, '\xbf': 444, '\xc0': 722, '\xc1': 722, '\xc2': 722, '\xc3': 722, '\xc4': 722, '\xc5': 722, '\xc6': 889, '\xc7': 667, '\xc8': 611, '\xc9': 611, '\xca': 611, '\xcb': 611, '\xcc': 333, '\xcd': 333, '\xce': 333, '\xcf': 333, '\xd0': 722, '\xd1': 722, '\xd2': 722, '\xd3': 722, '\xd4': 722, '\xd5': 722, '\xd6': 722, '\xd7': 564, '\xd8': 722, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 722, '\xde': 556, '\xdf': 500, '\xe0': 444, '\xe1': 444, '\xe2': 444, '\xe3': 444, '\xe4': 444, '\xe5': 444, '\xe6': 667, '\xe7': 444, '\xe8': 444, '\xe9': 444, '\xea': 444, '\xeb': 444, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 500, '\xf1': 500, '\xf2': 500, '\xf3': 500, '\xf4': 500, '\xf5': 500, '\xf6': 500, '\xf7': 564, '\xf8': 500, '\xf9': 500, '\xfa': 500, '\xfb': 500, '\xfc': 500, '\xfd': 500, '\xfe': 500, '\xff': 500, '\u0100': 722, '\u0101': 444, '\u0102': 722, '\u0103': 444, '\u0104': 722, '\u0105': 444, '\u0106': 667, '\u0107': 444, '\u010c': 667, '\u010d': 444, '\u010e': 722, '\u010f': 588, '\u0110': 722, '\u0111': 500, '\u0112': 611, '\u0113': 444, '\u0116': 611, '\u0117': 444, '\u0118': 611, '\u0119': 444, '\u011a': 611, '\u011b': 444, '\u011e': 722, '\u011f': 500, '\u0122': 722, '\u0123': 500, '\u012a': 333, '\u012b': 278, '\u012e': 333, '\u012f': 278, '\u0130': 333, '\u0131': 278, '\u0136': 722, '\u0137': 500, '\u0139': 611, '\u013a': 278, '\u013b': 611, '\u013c': 278, '\u013d': 611, '\u013e': 344, '\u0141': 611, '\u0142': 278, '\u0143': 722, '\u0144': 500, '\u0145': 722, '\u0146': 500, '\u0147': 722, '\u0148': 500, '\u014c': 722, '\u014d': 500, '\u0150': 722, '\u0151': 500, '\u0152': 889, '\u0153': 722, '\u0154': 667, '\u0155': 333, '\u0156': 667, '\u0157': 333, '\u0158': 667, '\u0159': 333, '\u015a': 556, '\u015b': 389, '\u015e': 556, '\u015f': 389, '\u0160': 556, '\u0161': 389, '\u0162': 611, '\u0163': 278, '\u0164': 611, '\u0165': 326, '\u016a': 722, '\u016b': 500, '\u016e': 722, '\u016f': 500, '\u0170': 722, '\u0171': 500, '\u0172': 722, '\u0173': 500, '\u0178': 722, '\u0179': 611, '\u017a': 444, '\u017b': 611, '\u017c': 444, '\u017d': 611, '\u017e': 444, '\u0192': 500, '\u0218': 556, '\u0219': 389, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 500, '\u2014': 1000, '\u2018': 333, '\u2019': 333, '\u201a': 333, '\u201c': 444, '\u201d': 444, '\u201e': 444, '\u2020': 500, '\u2021': 500, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 980, '\u2202': 476, '\u2206': 612, '\u2211': 600, '\u2212': 564, '\u221a': 453, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 471, '\uf6c3': 250, '\ufb01': 556, '\ufb02': 556}),
- 'ZapfDingbats': ({'FontName': 'ZapfDingbats', 'FontBBox': (-1.0, -143.0, 981.0, 820.0), 'FontWeight': 'Medium', 'FontFamily': 'ITC', 'Flags': 0, 'ItalicAngle': 0.0}, {'\x01': 974, '\x02': 961, '\x03': 980, '\x04': 719, '\x05': 789, '\x06': 494, '\x07': 552, '\x08': 537, '\t': 577, '\n': 692, '\x0b': 960, '\x0c': 939, '\r': 549, '\x0e': 855, '\x0f': 911, '\x10': 933, '\x11': 945, '\x12': 974, '\x13': 755, '\x14': 846, '\x15': 762, '\x16': 761, '\x17': 571, '\x18': 677, '\x19': 763, '\x1a': 760, '\x1b': 759, '\x1c': 754, '\x1d': 786, '\x1e': 788, '\x1f': 788, ' ': 790, '!': 793, '"': 794, '#': 816, '$': 823, '%': 789, '&': 841, "'": 823, '(': 833, ')': 816, '*': 831, '+': 923, ',': 744, '-': 723, '.': 749, '/': 790, '0': 792, '1': 695, '2': 776, '3': 768, '4': 792, '5': 759, '6': 707, '7': 708, '8': 682, '9': 701, ':': 826, ';': 815, '<': 789, '=': 789, '>': 707, '?': 687, '@': 696, 'A': 689, 'B': 786, 'C': 787, 'D': 713, 'E': 791, 'F': 785, 'G': 791, 'H': 873, 'I': 761, 'J': 762, 'K': 759, 'L': 892, 'M': 892, 'N': 788, 'O': 784, 'Q': 438, 'R': 138, 'S': 277, 'T': 415, 'U': 509, 'V': 410, 'W': 234, 'X': 234, 'Y': 390, 'Z': 390, '[': 276, '\\': 276, ']': 317, '^': 317, '_': 334, '`': 334, 'a': 392, 'b': 392, 'c': 668, 'd': 668, 'e': 732, 'f': 544, 'g': 544, 'h': 910, 'i': 911, 'j': 667, 'k': 760, 'l': 760, 'm': 626, 'n': 694, 'o': 595, 'p': 776, 'u': 690, 'v': 791, 'w': 790, 'x': 788, 'y': 788, 'z': 788, '{': 788, '|': 788, '}': 788, '~': 788, '\x7f': 788, '\x80': 788, '\x81': 788, '\x82': 788, '\x83': 788, '\x84': 788, '\x85': 788, '\x86': 788, '\x87': 788, '\x88': 788, '\x89': 788, '\x8a': 788, '\x8b': 788, '\x8c': 788, '\x8d': 788, '\x8e': 788, '\x8f': 788, '\x90': 788, '\x91': 788, '\x92': 788, '\x93': 788, '\x94': 788, '\x95': 788, '\x96': 788, '\x97': 788, '\x98': 788, '\x99': 788, '\x9a': 788, '\x9b': 788, '\x9c': 788, '\x9d': 788, '\x9e': 788, '\x9f': 788, '\xa0': 894, '\xa1': 838, '\xa2': 924, '\xa3': 1016, '\xa4': 458, '\xa5': 924, '\xa6': 918, '\xa7': 927, '\xa8': 928, '\xa9': 928, '\xaa': 834, '\xab': 873, '\xac': 828, '\xad': 924, '\xae': 917, '\xaf': 930, '\xb0': 931, '\xb1': 463, '\xb2': 883, '\xb3': 836, '\xb4': 867, '\xb5': 696, '\xb6': 874, '\xb7': 760, '\xb8': 946, '\xb9': 865, '\xba': 967, '\xbb': 831, '\xbc': 873, '\xbd': 927, '\xbe': 970, '\xbf': 918, '\xc0': 748, '\xc1': 836, '\xc2': 771, '\xc3': 888, '\xc4': 748, '\xc5': 771, '\xc6': 888, '\xc7': 867, '\xc8': 696, '\xc9': 874, '\xca': 974, '\xcb': 762, '\xcc': 759, '\xcd': 509, '\xce': 410}),
-}
@@ -1,151 +0,0 @@
-"""Functions that can be used for the most common use-cases for pdfminer.six"""
-
-import logging
-import sys
-from io import StringIO
-
-from .converter import XMLConverter, HTMLConverter, TextConverter, \
-    PDFPageAggregator
-from .image import ImageWriter
-from .layout import LAParams
-from .pdfdevice import TagExtractor
-from .pdfinterp import PDFResourceManager, PDFPageInterpreter
-from .pdfpage import PDFPage
-from .utils import open_filename
-
-
-def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
-                       laparams=None, maxpages=0, page_numbers=None,
-                       password="", scale=1.0, rotation=0, layoutmode='normal',
-                       output_dir=None, strip_control=False, debug=False,
-                       disable_caching=False, **kwargs):
-    """Parses text from inf-file and writes to outfp file-like object.
-
-    Takes loads of optional arguments but the defaults are somewhat sane.
-    Beware laparams: Including an empty LAParams is not the same as passing
-    None!
-
-    :param inf: a file-like object to read PDF structure from, such as a
-        file handler (using the builtin `open()` function) or a `BytesIO`.
-    :param outfp: a file-like object to write the text to.
-    :param output_type: May be 'text', 'xml', 'html', 'tag'. Only 'text' works
-        properly.
-    :param codec: Text decoding codec
-    :param laparams: An LAParams object from pdfminer.layout. Default is None
-        but may not layout correctly.
-    :param maxpages: How many pages to stop parsing after
-    :param page_numbers: zero-indexed page numbers to operate on.
-    :param password: For encrypted PDFs, the password to decrypt.
-    :param scale: Scale factor
-    :param rotation: Rotation factor
-    :param layoutmode: Default is 'normal', see
-        pdfminer.converter.HTMLConverter
-    :param output_dir: If given, creates an ImageWriter for extracted images.
-    :param strip_control: Does what it says on the tin
-    :param debug: Output more logging data
-    :param disable_caching: Does what it says on the tin
-    :param other:
-    :return: nothing, acting as it does on two streams. Use StringIO to get
-        strings.
-    """
-    if debug:
-        logging.getLogger().setLevel(logging.DEBUG)
-
-    imagewriter = None
-    if output_dir:
-        imagewriter = ImageWriter(output_dir)
-
-    rsrcmgr = PDFResourceManager(caching=not disable_caching)
-
-    if output_type == 'text':
-        device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
-                               imagewriter=imagewriter)
-
-    if outfp == sys.stdout:
-        outfp = sys.stdout.buffer
-
-    if output_type == 'xml':
-        device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
-                              imagewriter=imagewriter,
-                              stripcontrol=strip_control)
-    elif output_type == 'html':
-        device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale,
-                               layoutmode=layoutmode, laparams=laparams,
-                               imagewriter=imagewriter)
-    elif output_type == 'tag':
-        device = TagExtractor(rsrcmgr, outfp, codec=codec)
-
-    interpreter = PDFPageInterpreter(rsrcmgr, device)
-    for page in PDFPage.get_pages(inf,
-                                  page_numbers,
-                                  maxpages=maxpages,
-                                  password=password,
-                                  caching=not disable_caching):
-        page.rotate = (page.rotate + rotation) % 360
-        interpreter.process_page(page)
-
-    device.close()
-
-
-def extract_text(pdf_file, password='', page_numbers=None, maxpages=0,
-                 caching=True, codec='utf-8', laparams=None):
-    """Parse and return the text contained in a PDF file.
-
-    :param pdf_file: Either a file path or a file-like object for the PDF file
-        to be worked on.
-    :param password: For encrypted PDFs, the password to decrypt.
-    :param page_numbers: List of zero-indexed page numbers to extract.
-    :param maxpages: The maximum number of pages to parse
-    :param caching: If resources should be cached
-    :param codec: Text decoding codec
-    :param laparams: An LAParams object from pdfminer.layout. If None, uses
-        some default settings that often work well.
-    :return: a string containing all of the text extracted.
-    """
-    if laparams is None:
-        laparams = LAParams()
-
-    with open_filename(pdf_file, "rb") as fp, StringIO() as output_string:
-        rsrcmgr = PDFResourceManager(caching=caching)
-        device = TextConverter(rsrcmgr, output_string, codec=codec,
-                               laparams=laparams)
-        interpreter = PDFPageInterpreter(rsrcmgr, device)
-
-        for page in PDFPage.get_pages(
-                fp,
-                page_numbers,
-                maxpages=maxpages,
-                password=password,
-                caching=caching,
-        ):
-            interpreter.process_page(page)
-
-        return output_string.getvalue()
-
-
-def extract_pages(pdf_file, password='', page_numbers=None, maxpages=0,
-                  caching=True, laparams=None):
-    """Extract and yield LTPage objects
-
-    :param pdf_file: Either a file path or a file-like object for the PDF file
-        to be worked on.
-    :param password: For encrypted PDFs, the password to decrypt.
-    :param page_numbers: List of zero-indexed page numbers to extract.
-    :param maxpages: The maximum number of pages to parse
-    :param caching: If resources should be cached
-    :param laparams: An LAParams object from pdfminer.layout. If None, uses
-        some default settings that often work well.
-    :return:
-    """
-    if laparams is None:
-        laparams = LAParams()
-
-    with open_filename(pdf_file, "rb") as fp:
-        resource_manager = PDFResourceManager(caching=caching)
-        device = PDFPageAggregator(resource_manager, laparams=laparams)
-        interpreter = PDFPageInterpreter(resource_manager, device)
-        for page in PDFPage.get_pages(fp, page_numbers, maxpages=maxpages,
-                                      password=password, caching=caching):
-            interpreter.process_page(page)
-            layout = device.get_result()
-            yield layout
@@ -1,165 +0,0 @@
-import os
-import os.path
-import struct
-from io import BytesIO
-
-from .jbig2 import JBIG2StreamReader, JBIG2StreamWriter
-from .pdfcolor import LITERAL_DEVICE_CMYK
-from .pdfcolor import LITERAL_DEVICE_GRAY
-from .pdfcolor import LITERAL_DEVICE_RGB
-from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE
-
-
-def align32(x):
-    return ((x+3)//4)*4
-
-
-class BMPWriter:
-    def __init__(self, fp, bits, width, height):
-        self.fp = fp
-        self.bits = bits
-        self.width = width
-        self.height = height
-        if bits == 1:
-            ncols = 2
-        elif bits == 8:
-            ncols = 256
-        elif bits == 24:
-            ncols = 0
-        else:
-            raise ValueError(bits)
-        self.linesize = align32((self.width*self.bits+7)//8)
-        self.datasize = self.linesize * self.height
-        headersize = 14+40+ncols*4
-        info = struct.pack('<IiiHHIIIIII', 40, self.width, self.height,
-                           1, self.bits, 0, self.datasize, 0, 0, ncols, 0)
-        assert len(info) == 40, str(len(info))
-        header = struct.pack('<ccIHHI', b'B', b'M',
-                             headersize+self.datasize, 0, 0, headersize)
-        assert len(header) == 14, str(len(header))
-        self.fp.write(header)
-        self.fp.write(info)
-        if ncols == 2:
-            # B&W color table
-            for i in (0, 255):
-                self.fp.write(struct.pack('BBBx', i, i, i))
-        elif ncols == 256:
-            # grayscale color table
-            for i in range(256):
-                self.fp.write(struct.pack('BBBx', i, i, i))
-        self.pos0 = self.fp.tell()
-        self.pos1 = self.pos0 + self.datasize
-        return
-
-    def write_line(self, y, data):
-        self.fp.seek(self.pos1 - (y+1)*self.linesize)
-        self.fp.write(data)
-        return
-
-
-class ImageWriter:
-    """Write image to a file
-
-    Supports various image types: JPEG, JBIG2 and bitmaps
-    """
-
-    def __init__(self, outdir):
-        self.outdir = outdir
-        if not os.path.exists(self.outdir):
-            os.makedirs(self.outdir)
-        return
-
-    def export_image(self, image):
-        (width, height) = image.srcsize
-
-        is_jbig2 = self.is_jbig2_image(image)
-        ext = self._get_image_extension(image, width, height, is_jbig2)
-        name, path = self._create_unique_image_name(self.outdir,
-                                                    image.name, ext)
-
-        fp = open(path, 'wb')
-        if ext == '.jpg':
-            raw_data = image.stream.get_rawdata()
-            if LITERAL_DEVICE_CMYK in image.colorspace:
-                from PIL import Image
-                from PIL import ImageChops
-                ifp = BytesIO(raw_data)
-                i = Image.open(ifp)
-                i = ImageChops.invert(i)
-                i = i.convert('RGB')
-                i.save(fp, 'JPEG')
-            else:
-                fp.write(raw_data)
-        elif is_jbig2:
-            input_stream = BytesIO()
-            input_stream.write(image.stream.get_data())
-            input_stream.seek(0)
-            reader = JBIG2StreamReader(input_stream)
-            segments = reader.get_segments()
-
-            writer = JBIG2StreamWriter(fp)
-            writer.write_file(segments)
-        elif image.bits == 1:
-            bmp = BMPWriter(fp, 1, width, height)
-            data = image.stream.get_data()
-            i = 0
-            width = (width+7)//8
-            for y in range(height):
-                bmp.write_line(y, data[i:i+width])
-                i += width
-        elif image.bits == 8 and LITERAL_DEVICE_RGB in image.colorspace:
-            bmp = BMPWriter(fp, 24, width, height)
-            data = image.stream.get_data()
-            i = 0
-            width = width*3
-            for y in range(height):
-                bmp.write_line(y, data[i:i+width])
-                i += width
-        elif image.bits == 8 and LITERAL_DEVICE_GRAY in image.colorspace:
-            bmp = BMPWriter(fp, 8, width, height)
-            data = image.stream.get_data()
-            i = 0
-            for y in range(height):
-                bmp.write_line(y, data[i:i+width])
-                i += width
-        else:
-            fp.write(image.stream.get_data())
-        fp.close()
-        return name
-
-    @staticmethod
-    def is_jbig2_image(image):
-        filters = image.stream.get_filters()
-        is_jbig2 = False
-        for filter_name, params in filters:
-            if filter_name in LITERALS_JBIG2_DECODE:
-                is_jbig2 = True
-                break
-        return is_jbig2
-
-    @staticmethod
-    def _get_image_extension(image, width, height, is_jbig2):
-        filters = image.stream.get_filters()
-        if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
-            ext = '.jpg'
-        elif is_jbig2:
-            ext = '.jb2'
-        elif (image.bits == 1 or
-              image.bits == 8 and
-              (LITERAL_DEVICE_RGB in image.colorspace or
-               LITERAL_DEVICE_GRAY in image.colorspace)):
-            ext = '.%dx%d.bmp' % (width, height)
-        else:
-            ext = '.%d.%dx%d.img' % (image.bits, width, height)
-        return ext
-
-    @staticmethod
-    def _create_unique_image_name(dirname, image_name, ext):
-        name = image_name + ext
-        path = os.path.join(dirname, name)
-        img_index = 0
-        while os.path.exists(path):
-            name = '%s.%d%s' % (image_name, img_index, ext)
-            path = os.path.join(dirname, name)
-            img_index += 1
-        return name, path
@@ -1,321 +0,0 @@
-import math
-import os
-from struct import pack, unpack, calcsize
-
-# segment structure base
-SEG_STRUCT = [
-    (">L", "number"),
-    (">B", "flags"),
-    (">B", "retention_flags"),
-    (">B", "page_assoc"),
-    (">L", "data_length"),
-]
-
-# segment header literals
-HEADER_FLAG_DEFERRED = 0b10000000
-HEADER_FLAG_PAGE_ASSOC_LONG = 0b01000000
-
-SEG_TYPE_MASK = 0b00111111
-
-REF_COUNT_SHORT_MASK = 0b11100000
-REF_COUNT_LONG_MASK = 0x1fffffff
-REF_COUNT_LONG = 7
-
-DATA_LEN_UNKNOWN = 0xffffffff
-
-# segment types
-SEG_TYPE_IMMEDIATE_GEN_REGION = 38
-SEG_TYPE_END_OF_PAGE = 49
-SEG_TYPE_END_OF_FILE = 50
-
-# file literals
-FILE_HEADER_ID = b'\x97\x4A\x42\x32\x0D\x0A\x1A\x0A'
-FILE_HEAD_FLAG_SEQUENTIAL = 0b00000001
-FILE_HEAD_FLAG_PAGES_UNKNOWN = 0b00000010
-
-
-def bit_set(bit_pos, value):
-    return bool((value >> bit_pos) & 1)
-
-
-def check_flag(flag, value):
-    return bool(flag & value)
-
-
-def masked_value(mask, value):
-    for bit_pos in range(0, 31):
-        if bit_set(bit_pos, mask):
-            return (value & mask) >> bit_pos
-
-    raise Exception("Invalid mask or value")
-
-
-def mask_value(mask, value):
-    for bit_pos in range(0, 31):
-        if bit_set(bit_pos, mask):
-            return (value & (mask >> bit_pos)) << bit_pos
-
-    raise Exception("Invalid mask or value")
-
-
-class JBIG2StreamReader:
-    """Read segments from a JBIG2 byte stream"""
-
-    def __init__(self, stream):
-        self.stream = stream
-
-    def get_segments(self):
-        segments = []
-        while not self.is_eof():
-            segment = {}
-            for field_format, name in SEG_STRUCT:
-                field_len = calcsize(field_format)
-                field = self.stream.read(field_len)
-                if len(field) < field_len:
-                    segment["_error"] = True
-                    break
-                value = unpack(field_format, field)
-                if len(value) == 1:
-                    [value] = value
-                parser = getattr(self, "parse_%s" % name, None)
-                if callable(parser):
-                    value = parser(segment, value, field)
-                segment[name] = value
-
-            if not segment.get("_error"):
-                segments.append(segment)
-        return segments
-
-    def is_eof(self):
-        if self.stream.read(1) == b'':
-            return True
-        else:
-            self.stream.seek(-1, os.SEEK_CUR)
-            return False
-
-    def parse_flags(self, segment, flags, field):
-        return {
-            "deferred": check_flag(HEADER_FLAG_DEFERRED, flags),
-            "page_assoc_long": check_flag(HEADER_FLAG_PAGE_ASSOC_LONG, flags),
-            "type": masked_value(SEG_TYPE_MASK, flags)
-        }
-
-    def parse_retention_flags(self, segment, flags, field):
-        ref_count = masked_value(REF_COUNT_SHORT_MASK, flags)
-        retain_segments = []
-        ref_segments = []
-
-        if ref_count < REF_COUNT_LONG:
-            for bit_pos in range(5):
-                retain_segments.append(bit_set(bit_pos, flags))
-        else:
-            field += self.stream.read(3)
-            [ref_count] = unpack(">L", field)
-            ref_count = masked_value(REF_COUNT_LONG_MASK, ref_count)
-            ret_bytes_count = int(math.ceil((ref_count + 1) / 8))
-            for ret_byte_index in range(ret_bytes_count):
-                [ret_byte] = unpack(">B", self.stream.read(1))
-                for bit_pos in range(7):
-                    retain_segments.append(bit_set(bit_pos, ret_byte))
-
-        seg_num = segment["number"]
-        if seg_num <= 256:
-            ref_format = ">B"
-        elif seg_num <= 65536:
-            ref_format = ">I"
-        else:
-            ref_format = ">L"
-
-        ref_size = calcsize(ref_format)
-
-        for ref_index in range(ref_count):
-            ref = self.stream.read(ref_size)
-            [ref] = unpack(ref_format, ref)
-            ref_segments.append(ref)
-
-        return {
-            "ref_count": ref_count,
-            "retain_segments": retain_segments,
-            "ref_segments": ref_segments,
-        }
-
-    def parse_page_assoc(self, segment, page, field):
-        if segment["flags"]["page_assoc_long"]:
-            field += self.stream.read(3)
-            [page] = unpack(">L", field)
-        return page
-
-    def parse_data_length(self, segment, length, field):
-        if length:
-            if (segment["flags"]["type"] == SEG_TYPE_IMMEDIATE_GEN_REGION) \
-                    and (length == DATA_LEN_UNKNOWN):
-
-                raise NotImplementedError(
-                    "Working with unknown segment length "
-                    "is not implemented yet"
-                )
-            else:
-                segment["raw_data"] = self.stream.read(length)
-
-        return length
-
-
-class JBIG2StreamWriter:
-    """Write JBIG2 segments to a file in JBIG2 format"""
-
-    def __init__(self, stream):
-        self.stream = stream
-
-    def write_segments(self, segments, fix_last_page=True):
-        data_len = 0
-        current_page = None
-        seg_num = None
-
-        for segment in segments:
-            data = self.encode_segment(segment)
-            self.stream.write(data)
-            data_len += len(data)
-
-            seg_num = segment["number"]
-
-            if fix_last_page:
-                seg_page = segment.get("page_assoc")
-
-                if segment["flags"]["type"] == SEG_TYPE_END_OF_PAGE:
-                    current_page = None
-                elif seg_page:
-                    current_page = seg_page
-
-        if fix_last_page and current_page and (seg_num is not None):
-            segment = self.get_eop_segment(seg_num + 1, current_page)
-            data = self.encode_segment(segment)
-            self.stream.write(data)
-            data_len += len(data)
-
-        return data_len
-
-    def write_file(self, segments, fix_last_page=True):
-        header = FILE_HEADER_ID
-        header_flags = FILE_HEAD_FLAG_SEQUENTIAL | FILE_HEAD_FLAG_PAGES_UNKNOWN
-        header += pack(">B", header_flags)
-        self.stream.write(header)
-        data_len = len(header)
-
-        data_len += self.write_segments(segments, fix_last_page)
-
-        seg_num = 0
-        for segment in segments:
-            seg_num = segment["number"]
-
-        eof_segment = self.get_eof_segment(seg_num + 1)
-        data = self.encode_segment(eof_segment)
-
-        self.stream.write(data)
-        data_len += len(data)
-
-        return data_len
-
-    def encode_segment(self, segment):
-        data = b''
-        for field_format, name in SEG_STRUCT:
-            value = segment.get(name)
-            encoder = getattr(self, "encode_%s" % name, None)
-            if callable(encoder):
-                field = encoder(value, segment)
-            else:
-                field = pack(field_format, value)
-            data += field
-        return data
-
-    def encode_flags(self, value, segment):
-        flags = 0
-        if value.get("deferred"):
-            flags |= HEADER_FLAG_DEFERRED
-
-        if "page_assoc_long" in value:
-            flags |= HEADER_FLAG_PAGE_ASSOC_LONG \
-                if value["page_assoc_long"] else flags
-        else:
-            flags |= HEADER_FLAG_PAGE_ASSOC_LONG \
-                if segment.get("page", 0) > 255 else flags
-
-        flags |= mask_value(SEG_TYPE_MASK, value["type"])
-
-        return pack(">B", flags)
-
-    def encode_retention_flags(self, value, segment):
-        flags = []
-        flags_format = ">B"
-        ref_count = value["ref_count"]
-        retain_segments = value.get("retain_segments", [])
-
-        if ref_count <= 4:
-            flags_byte = mask_value(REF_COUNT_SHORT_MASK, ref_count)
-            for ref_index, ref_retain in enumerate(retain_segments):
-                flags_byte |= 1 << ref_index
-            flags.append(flags_byte)
-        else:
-            bytes_count = math.ceil((ref_count + 1) / 8)
-            flags_format = ">L" + ("B" * bytes_count)
-            flags_dword = mask_value(
-                REF_COUNT_SHORT_MASK,
-                REF_COUNT_LONG
-            ) << 24
-            flags.append(flags_dword)
-
-            for byte_index in range(bytes_count):
-                ret_byte = 0
-                ret_part = retain_segments[byte_index * 8:byte_index * 8 + 8]
-                for bit_pos, ret_seg in enumerate(ret_part):
-                    ret_byte |= 1 << bit_pos if ret_seg else ret_byte
-
-                flags.append(ret_byte)
-
-        ref_segments = value.get("ref_segments", [])
-
-        seg_num = segment["number"]
-        if seg_num <= 256:
-            ref_format = "B"
-        elif seg_num <= 65536:
-            ref_format = "I"
-        else:
-            ref_format = "L"
-
-        for ref in ref_segments:
-            flags_format += ref_format
-            flags.append(ref)
-
-        return pack(flags_format, *flags)
-
-    def encode_data_length(self, value, segment):
-        data = pack(">L", value)
-        data += segment["raw_data"]
-        return data
-
-    def get_eop_segment(self, seg_number, page_number):
-        return {
-            'data_length': 0,
-            'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_PAGE},
-            'number': seg_number,
-            'page_assoc': page_number,
-            'raw_data': b'',
-            'retention_flags': {
-                'ref_count': 0,
-                'ref_segments': [],
-                'retain_segments': []
-            }
-        }
-
-    def get_eof_segment(self, seg_number):
-        return {
-            'data_length': 0,
-            'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_FILE},
-            'number': seg_number,
-            'page_assoc': 0,
-            'raw_data': b'',
-            'retention_flags': {
-                'ref_count': 0,
-                'ref_segments': [],
-                'retain_segments': []
-            }
-        }
@@ -1,242 +0,0 @@
-""" Standard encoding tables used in PDF.
-
-This table is extracted from PDF Reference Manual 1.6, pp.925
-  "D.1 Latin Character Set and Encodings"
-
-"""
-
-ENCODING = [
-  # (name, std, mac, win, pdf)
-  ('A', 65, 65, 65, 65),
-  ('AE', 225, 174, 198, 198),
-  ('Aacute', None, 231, 193, 193),
-  ('Acircumflex', None, 229, 194, 194),
-  ('Adieresis', None, 128, 196, 196),
-  ('Agrave', None, 203, 192, 192),
-  ('Aring', None, 129, 197, 197),
-  ('Atilde', None, 204, 195, 195),
-  ('B', 66, 66, 66, 66),
-  ('C', 67, 67, 67, 67),
-  ('Ccedilla', None, 130, 199, 199),
-  ('D', 68, 68, 68, 68),
-  ('E', 69, 69, 69, 69),
-  ('Eacute', None, 131, 201, 201),
-  ('Ecircumflex', None, 230, 202, 202),
-  ('Edieresis', None, 232, 203, 203),
-  ('Egrave', None, 233, 200, 200),
-  ('Eth', None, None, 208, 208),
-  ('Euro', None, None, 128, 160),
-  ('F', 70, 70, 70, 70),
-  ('G', 71, 71, 71, 71),
-  ('H', 72, 72, 72, 72),
-  ('I', 73, 73, 73, 73),
-  ('Iacute', None, 234, 205, 205),
-  ('Icircumflex', None, 235, 206, 206),
-  ('Idieresis', None, 236, 207, 207),
-  ('Igrave', None, 237, 204, 204),
-  ('J', 74, 74, 74, 74),
-  ('K', 75, 75, 75, 75),
-  ('L', 76, 76, 76, 76),
-  ('Lslash', 232, None, None, 149),
-  ('M', 77, 77, 77, 77),
-  ('N', 78, 78, 78, 78),
-  ('Ntilde', None, 132, 209, 209),
-  ('O', 79, 79, 79, 79),
-  ('OE', 234, 206, 140, 150),
-  ('Oacute', None, 238, 211, 211),
-  ('Ocircumflex', None, 239, 212, 212),
-  ('Odieresis', None, 133, 214, 214),
-  ('Ograve', None, 241, 210, 210),
-  ('Oslash', 233, 175, 216, 216),
-  ('Otilde', None, 205, 213, 213),
-  ('P', 80, 80, 80, 80),
-  ('Q', 81, 81, 81, 81),
-  ('R', 82, 82, 82, 82),
-  ('S', 83, 83, 83, 83),
-  ('Scaron', None, None, 138, 151),
-  ('T', 84, 84, 84, 84),
-  ('Thorn', None, None, 222, 222),
-  ('U', 85, 85, 85, 85),
-  ('Uacute', None, 242, 218, 218),
-  ('Ucircumflex', None, 243, 219, 219),
-  ('Udieresis', None, 134, 220, 220),
-  ('Ugrave', None, 244, 217, 217),
-  ('V', 86, 86, 86, 86),
-  ('W', 87, 87, 87, 87),
-  ('X', 88, 88, 88, 88),
-  ('Y', 89, 89, 89, 89),
-  ('Yacute', None, None, 221, 221),
-  ('Ydieresis', None, 217, 159, 152),
-  ('Z', 90, 90, 90, 90),
-  ('Zcaron', None, None, 142, 153),
-  ('a', 97, 97, 97, 97),
-  ('aacute', None, 135, 225, 225),
-  ('acircumflex', None, 137, 226, 226),
-  ('acute', 194, 171, 180, 180),
-  ('adieresis', None, 138, 228, 228),
-  ('ae', 241, 190, 230, 230),
-  ('agrave', None, 136, 224, 224),
-  ('ampersand', 38, 38, 38, 38),
-  ('aring', None, 140, 229, 229),
-  ('asciicircum', 94, 94, 94, 94),
-  ('asciitilde', 126, 126, 126, 126),
-  ('asterisk', 42, 42, 42, 42),
-  ('at', 64, 64, 64, 64),
-  ('atilde', None, 139, 227, 227),
-  ('b', 98, 98, 98, 98),
-  ('backslash', 92, 92, 92, 92),
-  ('bar', 124, 124, 124, 124),
-  ('braceleft', 123, 123, 123, 123),
-  ('braceright', 125, 125, 125, 125),
-  ('bracketleft', 91, 91, 91, 91),
-  ('bracketright', 93, 93, 93, 93),
-  ('breve', 198, 249, None, 24),
-  ('brokenbar', None, None, 166, 166),
-  ('bullet', 183, 165, 149, 128),
-  ('c', 99, 99, 99, 99),
-  ('caron', 207, 255, None, 25),
-  ('ccedilla', None, 141, 231, 231),
-  ('cedilla', 203, 252, 184, 184),
-  ('cent', 162, 162, 162, 162),
-  ('circumflex', 195, 246, 136, 26),
-  ('colon', 58, 58, 58, 58),
-  ('comma', 44, 44, 44, 44),
-  ('copyright', None, 169, 169, 169),
-  ('currency', 168, 219, 164, 164),
-  ('d', 100, 100, 100, 100),
-  ('dagger', 178, 160, 134, 129),
-  ('daggerdbl', 179, 224, 135, 130),
-  ('degree', None, 161, 176, 176),
-  ('dieresis', 200, 172, 168, 168),
-  ('divide', None, 214, 247, 247),
-  ('dollar', 36, 36, 36, 36),
-  ('dotaccent', 199, 250, None, 27),
-  ('dotlessi', 245, 245, None, 154),
-  ('e', 101, 101, 101, 101),
-  ('eacute', None, 142, 233, 233),
-  ('ecircumflex', None, 144, 234, 234),
-  ('edieresis', None, 145, 235, 235),
-  ('egrave', None, 143, 232, 232),
-  ('eight', 56, 56, 56, 56),
-  ('ellipsis', 188, 201, 133, 131),
-  ('emdash', 208, 209, 151, 132),
-  ('endash', 177, 208, 150, 133),
-  ('equal', 61, 61, 61, 61),
-  ('eth', None, None, 240, 240),
-  ('exclam', 33, 33, 33, 33),
-  ('exclamdown', 161, 193, 161, 161),
-  ('f', 102, 102, 102, 102),
-  ('fi', 174, 222, None, 147),
-  ('five', 53, 53, 53, 53),
-  ('fl', 175, 223, None, 148),
-  ('florin', 166, 196, 131, 134),
-  ('four', 52, 52, 52, 52),
-  ('fraction', 164, 218, None, 135),
-  ('g', 103, 103, 103, 103),
-  ('germandbls', 251, 167, 223, 223),
-  ('grave', 193, 96, 96, 96),
-  ('greater', 62, 62, 62, 62),
-  ('guillemotleft', 171, 199, 171, 171),
-  ('guillemotright', 187, 200, 187, 187),
-  ('guilsinglleft', 172, 220, 139, 136),
-  ('guilsinglright', 173, 221, 155, 137),
-  ('h', 104, 104, 104, 104),
-  ('hungarumlaut', 205, 253, None, 28),
-  ('hyphen', 45, 45, 45, 45),
-  ('i', 105, 105, 105, 105),
-  ('iacute', None, 146, 237, 237),
-  ('icircumflex', None, 148, 238, 238),
-  ('idieresis', None, 149, 239, 239),
-  ('igrave', None, 147, 236, 236),
-  ('j', 106, 106, 106, 106),
-  ('k', 107, 107, 107, 107),
-  ('l', 108, 108, 108, 108),
-  ('less', 60, 60, 60, 60),
-  ('logicalnot', None, 194, 172, 172),
-  ('lslash', 248, None, None, 155),
-  ('m', 109, 109, 109, 109),
-  ('macron', 197, 248, 175, 175),
-  ('minus', None, None, None, 138),
-  ('mu', None, 181, 181, 181),
-  ('multiply', None, None, 215, 215),
-  ('n', 110, 110, 110, 110),
-  ('nbspace', None, 202, 160, None),
-  ('nine', 57, 57, 57, 57),
-  ('ntilde', None, 150, 241, 241),
-  ('numbersign', 35, 35, 35, 35),
-  ('o', 111, 111, 111, 111),
-  ('oacute', None, 151, 243, 243),
-  ('ocircumflex', None, 153, 244, 244),
-  ('odieresis', None, 154, 246, 246),
-  ('oe', 250, 207, 156, 156),
-  ('ogonek', 206, 254, None, 29),
-  ('ograve', None, 152, 242, 242),
-  ('one', 49, 49, 49, 49),
-  ('onehalf', None, None, 189, 189),
-  ('onequarter', None, None, 188, 188),
-  ('onesuperior', None, None, 185, 185),
-  ('ordfeminine', 227, 187, 170, 170),
-  ('ordmasculine', 235, 188, 186, 186),
-  ('oslash', 249, 191, 248, 248),
-  ('otilde', None, 155, 245, 245),
-  ('p', 112, 112, 112, 112),
-  ('paragraph', 182, 166, 182, 182),
-  ('parenleft', 40, 40, 40, 40),
-  ('parenright', 41, 41, 41, 41),
-  ('percent', 37, 37, 37, 37),
-  ('period', 46, 46, 46, 46),
-  ('periodcentered', 180, 225, 183, 183),
-  ('perthousand', 189, 228, 137, 139),
-  ('plus', 43, 43, 43, 43),
-  ('plusminus', None, 177, 177, 177),
-  ('q', 113, 113, 113, 113),
-  ('question', 63, 63, 63, 63),
-  ('questiondown', 191, 192, 191, 191),
-  ('quotedbl', 34, 34, 34, 34),
-  ('quotedblbase', 185, 227, 132, 140),
-  ('quotedblleft', 170, 210, 147, 141),
-  ('quotedblright', 186, 211, 148, 142),
-  ('quoteleft', 96, 212, 145, 143),
-  ('quoteright', 39, 213, 146, 144),
-  ('quotesinglbase', 184, 226, 130, 145),
-  ('quotesingle', 169, 39, 39, 39),
-  ('r', 114, 114, 114, 114),
-  ('registered', None, 168, 174, 174),
-  ('ring', 202, 251, None, 30),
-  ('s', 115, 115, 115, 115),
-  ('scaron', None, None, 154, 157),
-  ('section', 167, 164, 167, 167),
-  ('semicolon', 59, 59, 59, 59),
-  ('seven', 55, 55, 55, 55),
-  ('six', 54, 54, 54, 54),
-  ('slash', 47, 47, 47, 47),
-  ('space', 32, 32, 32, 32),
-  ('space', None, 202, 160, None),
-  ('space', None, 202, 173, None),
-  ('sterling', 163, 163, 163, 163),
-  ('t', 116, 116, 116, 116),
-  ('thorn', None, None, 254, 254),
-  ('three', 51, 51, 51, 51),
-  ('threequarters', None, None, 190, 190),
-  ('threesuperior', None, None, 179, 179),
-  ('tilde', 196, 247, 152, 31),
-  ('trademark', None, 170, 153, 146),
-  ('two', 50, 50, 50, 50),
-  ('twosuperior', None, None, 178, 178),
-  ('u', 117, 117, 117, 117),
-  ('uacute', None, 156, 250, 250),
-  ('ucircumflex', None, 158, 251, 251),
-  ('udieresis', None, 159, 252, 252),
-  ('ugrave', None, 157, 249, 249),
-  ('underscore', 95, 95, 95, 95),
-  ('v', 118, 118, 118, 118),
-  ('w', 119, 119, 119, 119),
-  ('x', 120, 120, 120, 120),
-  ('y', 121, 121, 121, 121),
-  ('yacute', None, None, 253, 253),
-  ('ydieresis', None, 216, 255, 255),
-  ('yen', 165, 180, 165, 165),
-  ('z', 122, 122, 122, 122),
-  ('zcaron', None, None, 158, 158),
-  ('zero', 48, 48, 48, 48),
-]
@@ -1,866 +0,0 @@
-import heapq
-import logging
-
-from .utils import INF
-from .utils import Plane
-from .utils import apply_matrix_pt
-from .utils import bbox2str
-from .utils import fsplit
-from .utils import get_bound
-from .utils import matrix2str
-from .utils import uniq
-
-logger = logging.getLogger(__name__)
-
-
-class IndexAssigner:
-
-    def __init__(self, index=0):
-        self.index = index
-        return
-
-    def run(self, obj):
-        if isinstance(obj, LTTextBox):
-            obj.index = self.index
-            self.index += 1
-        elif isinstance(obj, LTTextGroup):
-            for x in obj:
-                self.run(x)
-        return
-
-
-class LAParams:
-    """Parameters for layout analysis
-
-    :param line_overlap: If two characters have more overlap than this they
-        are considered to be on the same line. The overlap is specified
-        relative to the minimum height of both characters.
-    :param char_margin: If two characters are closer together than this
-        margin they are considered part of the same line. The margin is
-        specified relative to the width of the character.
-    :param word_margin: If two characters on the same line are further apart
-        than this margin then they are considered to be two separate words, and
-        an intermediate space will be added for readability. The margin is
-        specified relative to the width of the character.
-    :param line_margin: If two lines are are close together they are
-        considered to be part of the same paragraph. The margin is
-        specified relative to the height of a line.
-    :param boxes_flow: Specifies how much a horizontal and vertical position
-        of a text matters when determining the order of text boxes. The value
-        should be within the range of -1.0 (only horizontal position
-        matters) to +1.0 (only vertical position matters). You can also pass
-        `None` to disable advanced layout analysis, and instead return text
-        based on the position of the bottom left corner of the text box.
-    :param detect_vertical: If vertical text should be considered during
-        layout analysis
-    :param all_texts: If layout analysis should be performed on text in
-        figures.
-    """
-
-    def __init__(self,
-                 line_overlap=0.5,
-                 char_margin=2.0,
-                 line_margin=0.5,
-                 word_margin=0.1,
-                 boxes_flow=0.5,
-                 detect_vertical=False,
-                 all_texts=False):
-        self.line_overlap = line_overlap
-        self.char_margin = char_margin
-        self.line_margin = line_margin
-        self.word_margin = word_margin
-        self.boxes_flow = boxes_flow
-        self.detect_vertical = detect_vertical
-        self.all_texts = all_texts
-
-        self._validate()
-        return
-
-    def _validate(self):
-        if self.boxes_flow is not None:
-            boxes_flow_err_msg = ("LAParam boxes_flow should be None, or a "
-                                  "number between -1 and +1")
-            if not (isinstance(self.boxes_flow, int) or
-                    isinstance(self.boxes_flow, float)):
-                raise TypeError(boxes_flow_err_msg)
-            if not -1 <= self.boxes_flow <= 1:
-                raise ValueError(boxes_flow_err_msg)
-
-    def __repr__(self):
-        return '<LAParams: char_margin=%.1f, line_margin=%.1f, ' \
-               'word_margin=%.1f all_texts=%r>' % \
-               (self.char_margin, self.line_margin, self.word_margin,
-                self.all_texts)
-
-
-class LTItem:
-    """Interface for things that can be analyzed"""
-
-    def analyze(self, laparams):
-        """Perform the layout analysis."""
-        return
-
-
-class LTText:
-    """Interface for things that have text"""
-
-    def __repr__(self):
-        return ('<%s %r>' %
-                (self.__class__.__name__, self.get_text()))
-
-    def get_text(self):
-        """Text contained in this object"""
-        raise NotImplementedError
-
-
-class LTComponent(LTItem):
-    """Object with a bounding box"""
-
-    def __init__(self, bbox):
-        LTItem.__init__(self)
-        self.set_bbox(bbox)
-        return
-
-    def __repr__(self):
-        return ('<%s %s>' %
-                (self.__class__.__name__, bbox2str(self.bbox)))
-
-    # Disable comparison.
-    def __lt__(self, _):
-        raise ValueError
-
-    def __le__(self, _):
-        raise ValueError
-
-    def __gt__(self, _):
-        raise ValueError
-
-    def __ge__(self, _):
-        raise ValueError
-
-    def set_bbox(self, bbox):
-        (x0, y0, x1, y1) = bbox
-        self.x0 = x0
-        self.y0 = y0
-        self.x1 = x1
-        self.y1 = y1
-        self.width = x1-x0
-        self.height = y1-y0
-        self.bbox = bbox
-        return
-
-    def is_empty(self):
-        return self.width <= 0 or self.height <= 0
-
-    def is_hoverlap(self, obj):
-        assert isinstance(obj, LTComponent), str(type(obj))
-        return obj.x0 <= self.x1 and self.x0 <= obj.x1
-
-    def hdistance(self, obj):
-        assert isinstance(obj, LTComponent), str(type(obj))
-        if self.is_hoverlap(obj):
-            return 0
-        else:
-            return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
-
-    def hoverlap(self, obj):
-        assert isinstance(obj, LTComponent), str(type(obj))
-        if self.is_hoverlap(obj):
-            return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
-        else:
-            return 0
-
-    def is_voverlap(self, obj):
-        assert isinstance(obj, LTComponent), str(type(obj))
-        return obj.y0 <= self.y1 and self.y0 <= obj.y1
-
-    def vdistance(self, obj):
-        assert isinstance(obj, LTComponent), str(type(obj))
-        if self.is_voverlap(obj):
-            return 0
-        else:
-            return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
-
-    def voverlap(self, obj):
-        assert isinstance(obj, LTComponent), str(type(obj))
-        if self.is_voverlap(obj):
-            return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
-        else:
-            return 0
-
-
-class LTCurve(LTComponent):
-    """A generic Bezier curve"""
-
-    def __init__(self, linewidth, pts, stroke=False, fill=False, evenodd=False,
-                 stroking_color=None, non_stroking_color=None):
-        LTComponent.__init__(self, get_bound(pts))
-        self.pts = pts
-        self.linewidth = linewidth
-        self.stroke = stroke
-        self.fill = fill
-        self.evenodd = evenodd
-        self.stroking_color = stroking_color
-        self.non_stroking_color = non_stroking_color
-        return
-
-    def get_pts(self):
-        return ','.join('%.3f,%.3f' % p for p in self.pts)
-
-
-class LTLine(LTCurve):
-    """A single straight line.
-
-    Could be used for separating text or figures.
-    """
-
-    def __init__(self, linewidth, p0, p1, stroke=False, fill=False,
-                 evenodd=False, stroking_color=None, non_stroking_color=None):
-        LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd,
-                         stroking_color, non_stroking_color)
-        return
-
-
-class LTRect(LTCurve):
-    """A rectangle.
-
-    Could be used for framing another pictures or figures.
-    """
-
-    def __init__(self, linewidth, bbox, stroke=False, fill=False,
-                 evenodd=False, stroking_color=None,  non_stroking_color=None):
-        (x0, y0, x1, y1) = bbox
-        LTCurve.__init__(self, linewidth,
-                         [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke,
-                         fill, evenodd, stroking_color, non_stroking_color)
-        return
-
-
-class LTImage(LTComponent):
-    """An image object.
-
-    Embedded images can be in JPEG, Bitmap or JBIG2.
-    """
-
-    def __init__(self, name, stream, bbox):
-        LTComponent.__init__(self, bbox)
-        self.name = name
-        self.stream = stream
-        self.srcsize = (stream.get_any(('W', 'Width')),
-                        stream.get_any(('H', 'Height')))
-        self.imagemask = stream.get_any(('IM', 'ImageMask'))
-        self.bits = stream.get_any(('BPC', 'BitsPerComponent'), 1)
-        self.colorspace = stream.get_any(('CS', 'ColorSpace'))
-        if not isinstance(self.colorspace, list):
-            self.colorspace = [self.colorspace]
-        return
-
-    def __repr__(self):
-        return ('<%s(%s) %s %r>' %
-                (self.__class__.__name__, self.name,
-                 bbox2str(self.bbox), self.srcsize))
-
-
-class LTAnno(LTItem, LTText):
-    """Actual letter in the text as a Unicode string.
-
-    Note that, while a LTChar object has actual boundaries, LTAnno objects does
-    not, as these are "virtual" characters, inserted by a layout analyzer
-    according to the relationship between two characters (e.g. a space).
-    """
-
-    def __init__(self, text):
-        self._text = text
-        return
-
-    def get_text(self):
-        return self._text
-
-
-class LTChar(LTComponent, LTText):
-    """Actual letter in the text as a Unicode string."""
-
-    def __init__(self, matrix, font, fontsize, scaling, rise,
-                 text, textwidth, textdisp, ncs, graphicstate):
-        LTText.__init__(self)
-        self._text = text
-        self.matrix = matrix
-        self.fontname = font.fontname
-        self.ncs = ncs
-        self.graphicstate = graphicstate
-        self.adv = textwidth * fontsize * scaling
-        # compute the boundary rectangle.
-        if font.is_vertical():
-            # vertical
-            (vx, vy) = textdisp
-            if vx is None:
-                vx = fontsize * 0.5
-            else:
-                vx = vx * fontsize * .001
-            vy = (1000 - vy) * fontsize * .001
-            bbox_lower_left = (-vx, vy + rise + self.adv)
-            bbox_upper_right = (-vx + fontsize, vy + rise)
-        else:
-            # horizontal
-            descent = font.get_descent() * fontsize
-            bbox_lower_left = (0, descent + rise)
-            bbox_upper_right = (self.adv, descent + rise + fontsize)
-        (a, b, c, d, e, f) = self.matrix
-        self.upright = (0 < a*d*scaling and b*c <= 0)
-        (x0, y0) = apply_matrix_pt(self.matrix, bbox_lower_left)
-        (x1, y1) = apply_matrix_pt(self.matrix, bbox_upper_right)
-        if x1 < x0:
-            (x0, x1) = (x1, x0)
-        if y1 < y0:
-            (y0, y1) = (y1, y0)
-        LTComponent.__init__(self, (x0, y0, x1, y1))
-        if font.is_vertical():
-            self.size = self.width
-        else:
-            self.size = self.height
-        return
-
-    def __repr__(self):
-        return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
-                (self.__class__.__name__, bbox2str(self.bbox),
-                 matrix2str(self.matrix), self.fontname, self.adv,
-                 self.get_text()))
-
-    def get_text(self):
-        return self._text
-
-    def is_compatible(self, obj):
-        """Returns True if two characters can coexist in the same line."""
-        return True
-
-
-class LTContainer(LTComponent):
-    """Object that can be extended and analyzed"""
-
-    def __init__(self, bbox):
-        LTComponent.__init__(self, bbox)
-        self._objs = []
-        return
-
-    def __iter__(self):
-        return iter(self._objs)
-
-    def __len__(self):
-        return len(self._objs)
-
-    def add(self, obj):
-        self._objs.append(obj)
-        return
-
-    def extend(self, objs):
-        for obj in objs:
-            self.add(obj)
-        return
-
-    def analyze(self, laparams):
-        for obj in self._objs:
-            obj.analyze(laparams)
-        return
-
-
-class LTExpandableContainer(LTContainer):
-    def __init__(self):
-        LTContainer.__init__(self, (+INF, +INF, -INF, -INF))
-        return
-
-    def add(self, obj):
-        LTContainer.add(self, obj)
-        self.set_bbox((min(self.x0, obj.x0), min(self.y0, obj.y0),
-                       max(self.x1, obj.x1), max(self.y1, obj.y1)))
-        return
-
-
-class LTTextContainer(LTExpandableContainer, LTText):
-    def __init__(self):
-        LTText.__init__(self)
-        LTExpandableContainer.__init__(self)
-        return
-
-    def get_text(self):
-        return ''.join(obj.get_text() for obj in self
-                       if isinstance(obj, LTText))
-
-
-class LTTextLine(LTTextContainer):
-    """Contains a list of LTChar objects that represent a single text line.
-
-    The characters are aligned either horizontally or vertically, depending on
-    the text's writing mode.
-    """
-
-    def __init__(self, word_margin):
-        LTTextContainer.__init__(self)
-        self.word_margin = word_margin
-        return
-
-    def __repr__(self):
-        return ('<%s %s %r>' %
-                (self.__class__.__name__, bbox2str(self.bbox),
-                 self.get_text()))
-
-    def analyze(self, laparams):
-        LTTextContainer.analyze(self, laparams)
-        LTContainer.add(self, LTAnno('\n'))
-        return
-
-    def find_neighbors(self, plane, ratio):
-        raise NotImplementedError
-
-
-class LTTextLineHorizontal(LTTextLine):
-    def __init__(self, word_margin):
-        LTTextLine.__init__(self, word_margin)
-        self._x1 = +INF
-        return
-
-    def add(self, obj):
-        if isinstance(obj, LTChar) and self.word_margin:
-            margin = self.word_margin * max(obj.width, obj.height)
-            if self._x1 < obj.x0 - margin:
-                LTContainer.add(self, LTAnno(' '))
-        self._x1 = obj.x1
-        LTTextLine.add(self, obj)
-        return
-
-    def find_neighbors(self, plane, ratio):
-        """
-        Finds neighboring LTTextLineHorizontals in the plane.
-
-        Returns a list of other LTTestLineHorizontals in the plane which are
-        close to self. "Close" can be controlled by ratio. The returned objects
-        will be the same height as self, and also either left-, right-, or
-        centrally-aligned.
-        """
-        d = ratio * self.height
-        objs = plane.find((self.x0, self.y0 - d, self.x1, self.y1 + d))
-        return [obj for obj in objs
-                if (isinstance(obj, LTTextLineHorizontal) and
-                    self._is_same_height_as(obj, tolerance=d) and
-                    (self._is_left_aligned_with(obj, tolerance=d) or
-                     self._is_right_aligned_with(obj, tolerance=d) or
-                     self._is_centrally_aligned_with(obj, tolerance=d)))]
-
-    def _is_left_aligned_with(self, other, tolerance=0):
-        """
-        Whether the left-hand edge of `other` is within `tolerance`.
-        """
-        return abs(other.x0 - self.x0) <= tolerance
-
-    def _is_right_aligned_with(self, other, tolerance=0):
-        """
-        Whether the right-hand edge of `other` is within `tolerance`.
-        """
-        return abs(other.x1 - self.x1) <= tolerance
-
-    def _is_centrally_aligned_with(self, other, tolerance=0):
-        """
-        Whether the horizontal center of `other` is within `tolerance`.
-        """
-        return abs(
-            (other.x0 + other.x1) / 2 - (self.x0 + self.x1) / 2) <= tolerance
-
-    def _is_same_height_as(self, other, tolerance):
-        return abs(other.height - self.height) <= tolerance
-
-
-class LTTextLineVertical(LTTextLine):
-    def __init__(self, word_margin):
-        LTTextLine.__init__(self, word_margin)
-        self._y0 = -INF
-        return
-
-    def add(self, obj):
-        if isinstance(obj, LTChar) and self.word_margin:
-            margin = self.word_margin * max(obj.width, obj.height)
-            if obj.y1 + margin < self._y0:
-                LTContainer.add(self, LTAnno(' '))
-        self._y0 = obj.y0
-        LTTextLine.add(self, obj)
-        return
-
-    def find_neighbors(self, plane, ratio):
-        """
-        Finds neighboring LTTextLineVerticals in the plane.
-
-        Returns a list of other LTTextLineVerticals in the plane which are
-        close to self. "Close" can be controlled by ratio. The returned objects
-        will be the same width as self, and also either upper-, lower-, or
-        centrally-aligned.
-        """
-        d = ratio * self.width
-        objs = plane.find((self.x0 - d, self.y0, self.x1 + d, self.y1))
-        return [obj for obj in objs
-                if (isinstance(obj, LTTextLineVertical) and
-                    self._is_same_width_as(obj, tolerance=d) and
-                    (self._is_lower_aligned_with(obj, tolerance=d) or
-                     self._is_upper_aligned_with(obj, tolerance=d) or
-                     self._is_centrally_aligned_with(obj, tolerance=d)))]
-
-    def _is_lower_aligned_with(self, other, tolerance=0):
-        """
-        Whether the lower edge of `other` is within `tolerance`.
-        """
-        return abs(other.y0 - self.y0) <= tolerance
-
-    def _is_upper_aligned_with(self, other, tolerance=0):
-        """
-        Whether the upper edge of `other` is within `tolerance`.
-        """
-        return abs(other.y1 - self.y1) <= tolerance
-
-    def _is_centrally_aligned_with(self, other, tolerance=0):
-        """
-        Whether the vertical center of `other` is within `tolerance`.
-        """
-        return abs(
-            (other.y0 + other.y1) / 2 - (self.y0 + self.y1) / 2) <= tolerance
-
-    def _is_same_width_as(self, other, tolerance):
-        return abs(other.width - self.width) <= tolerance
-
-
-class LTTextBox(LTTextContainer):
-    """Represents a group of text chunks in a rectangular area.
-
-    Note that this box is created by geometric analysis and does not
-    necessarily represents a logical boundary of the text. It contains a list
-    of LTTextLine objects.
-    """
-
-    def __init__(self):
-        LTTextContainer.__init__(self)
-        self.index = -1
-        return
-
-    def __repr__(self):
-        return ('<%s(%s) %s %r>' %
-                (self.__class__.__name__,
-                 self.index, bbox2str(self.bbox), self.get_text()))
-
-
-class LTTextBoxHorizontal(LTTextBox):
-    def analyze(self, laparams):
-        LTTextBox.analyze(self, laparams)
-        self._objs.sort(key=lambda obj: -obj.y1)
-        return
-
-    def get_writing_mode(self):
-        return 'lr-tb'
-
-
-class LTTextBoxVertical(LTTextBox):
-    def analyze(self, laparams):
-        LTTextBox.analyze(self, laparams)
-        self._objs.sort(key=lambda obj: -obj.x1)
-        return
-
-    def get_writing_mode(self):
-        return 'tb-rl'
-
-
-class LTTextGroup(LTTextContainer):
-    def __init__(self, objs):
-        LTTextContainer.__init__(self)
-        self.extend(objs)
-        return
-
-
-class LTTextGroupLRTB(LTTextGroup):
-    def analyze(self, laparams):
-        LTTextGroup.analyze(self, laparams)
-        # reorder the objects from top-left to bottom-right.
-        self._objs.sort(
-            key=lambda obj: (1 - laparams.boxes_flow) * obj.x0
-            - (1 + laparams.boxes_flow) * (obj.y0 + obj.y1))
-        return
-
-
-class LTTextGroupTBRL(LTTextGroup):
-    def analyze(self, laparams):
-        LTTextGroup.analyze(self, laparams)
-        # reorder the objects from top-right to bottom-left.
-        self._objs.sort(
-            key=lambda obj: - (1 + laparams.boxes_flow) * (obj.x0 + obj.x1)
-                            - (1 - laparams.boxes_flow) * obj.y1)
-        return
-
-
-class LTLayoutContainer(LTContainer):
-    def __init__(self, bbox):
-        LTContainer.__init__(self, bbox)
-        self.groups = None
-        return
-
-    # group_objects: group text object to textlines.
-    def group_objects(self, laparams, objs):
-        obj0 = None
-        line = None
-        for obj1 in objs:
-            if obj0 is not None:
-                # halign: obj0 and obj1 is horizontally aligned.
-                #
-                #   +------+ - - -
-                #   | obj0 | - - +------+   -
-                #   |      |     | obj1 |   | (line_overlap)
-                #   +------+ - - |      |   -
-                #          - - - +------+
-                #
-                #          |<--->|
-                #        (char_margin)
-                halign = \
-                    obj0.is_compatible(obj1) \
-                    and obj0.is_voverlap(obj1) \
-                    and min(obj0.height, obj1.height) * laparams.line_overlap \
-                    < obj0.voverlap(obj1) \
-                    and obj0.hdistance(obj1) \
-                    < max(obj0.width, obj1.width) * laparams.char_margin
-
-                # valign: obj0 and obj1 is vertically aligned.
-                #
-                #   +------+
-                #   | obj0 |
-                #   |      |
-                #   +------+ - - -
-                #     |    |     | (char_margin)
-                #     +------+ - -
-                #     | obj1 |
-                #     |      |
-                #     +------+
-                #
-                #     |<-->|
-                #   (line_overlap)
-                valign = \
-                    laparams.detect_vertical \
-                    and obj0.is_compatible(obj1) \
-                    and obj0.is_hoverlap(obj1) \
-                    and min(obj0.width, obj1.width) * laparams.line_overlap \
-                    < obj0.hoverlap(obj1) \
-                    and obj0.vdistance(obj1) \
-                    < max(obj0.height, obj1.height) * laparams.char_margin
-
-                if ((halign and isinstance(line, LTTextLineHorizontal)) or
-                        (valign and isinstance(line, LTTextLineVertical))):
-
-                    line.add(obj1)
-                elif line is not None:
-                    yield line
-                    line = None
-                else:
-                    if valign and not halign:
-                        line = LTTextLineVertical(laparams.word_margin)
-                        line.add(obj0)
-                        line.add(obj1)
-                    elif halign and not valign:
-                        line = LTTextLineHorizontal(laparams.word_margin)
-                        line.add(obj0)
-                        line.add(obj1)
-                    else:
-                        line = LTTextLineHorizontal(laparams.word_margin)
-                        line.add(obj0)
-                        yield line
-                        line = None
-            obj0 = obj1
-        if line is None:
-            line = LTTextLineHorizontal(laparams.word_margin)
-            line.add(obj0)
-        yield line
-        return
-
-    def group_textlines(self, laparams, lines):
-        """Group neighboring lines to textboxes"""
-        plane = Plane(self.bbox)
-        plane.extend(lines)
-        boxes = {}
-        for line in lines:
-            neighbors = line.find_neighbors(plane, laparams.line_margin)
-            members = [line]
-            for obj1 in neighbors:
-                members.append(obj1)
-                if obj1 in boxes:
-                    members.extend(boxes.pop(obj1))
-            if isinstance(line, LTTextLineHorizontal):
-                box = LTTextBoxHorizontal()
-            else:
-                box = LTTextBoxVertical()
-            for obj in uniq(members):
-                box.add(obj)
-                boxes[obj] = box
-        done = set()
-        for line in lines:
-            if line not in boxes:
-                continue
-            box = boxes[line]
-            if box in done:
-                continue
-            done.add(box)
-            if not box.is_empty():
-                yield box
-        return
-
-    def group_textboxes(self, laparams, boxes):
-        """Group textboxes hierarchically.
-
-        Get pair-wise distances, via dist func defined below, and then merge
-        from the closest textbox pair. Once obj1 and obj2 are merged /
-        grouped, the resulting group is considered as a new object, and its
-        distances to other objects & groups are added to the process queue.
-
-        For performance reason, pair-wise distances and object pair info are
-        maintained in a heap of (idx, dist, id(obj1), id(obj2), obj1, obj2)
-        tuples. It ensures quick access to the smallest element. Note that
-        since comparison operators, e.g., __lt__, are disabled for
-        LTComponent, id(obj) has to appear before obj in element tuples.
-
-        :param laparams: LAParams object.
-        :param boxes: All textbox objects to be grouped.
-        :return: a list that has only one element, the final top level textbox.
-        """
-
-        def dist(obj1, obj2):
-            """A distance function between two TextBoxes.
-
-            Consider the bounding rectangle for obj1 and obj2.
-            Return its area less the areas of obj1 and obj2,
-            shown as 'www' below. This value may be negative.
-                    +------+..........+ (x1, y1)
-                    | obj1 |wwwwwwwwww:
-                    +------+www+------+
-                    :wwwwwwwwww| obj2 |
-            (x0, y0) +..........+------+
-            """
-            x0 = min(obj1.x0, obj2.x0)
-            y0 = min(obj1.y0, obj2.y0)
-            x1 = max(obj1.x1, obj2.x1)
-            y1 = max(obj1.y1, obj2.y1)
-            return (x1 - x0) * (y1 - y0) \
-                - obj1.width*obj1.height - obj2.width*obj2.height
-
-        def isany(obj1, obj2):
-            """Check if there's any other object between obj1 and obj2."""
-            x0 = min(obj1.x0, obj2.x0)
-            y0 = min(obj1.y0, obj2.y0)
-            x1 = max(obj1.x1, obj2.x1)
-            y1 = max(obj1.y1, obj2.y1)
-            objs = set(plane.find((x0, y0, x1, y1)))
-            return objs.difference((obj1, obj2))
-
-        dists = []
-        for i in range(len(boxes)):
-            obj1 = boxes[i]
-            for j in range(i+1, len(boxes)):
-                obj2 = boxes[j]
-                dists.append((False, dist(obj1, obj2), id(obj1), id(obj2),
-                              obj1, obj2))
-        heapq.heapify(dists)
-
-        plane = Plane(self.bbox)
-        plane.extend(boxes)
-        done = set()
-        while len(dists) > 0:
-            (skip_isany, d, id1, id2, obj1, obj2) = heapq.heappop(dists)
-            # Skip objects that are already merged
-            if (id1 not in done) and (id2 not in done):
-                if skip_isany and isany(obj1, obj2):
-                    heapq.heappush(dists, (True, d, id1, id2, obj1, obj2))
-                    continue
-                if isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or \
-                        isinstance(obj2, (LTTextBoxVertical, LTTextGroupTBRL)):
-                    group = LTTextGroupTBRL([obj1, obj2])
-                else:
-                    group = LTTextGroupLRTB([obj1, obj2])
-                plane.remove(obj1)
-                plane.remove(obj2)
-                done.update([id1, id2])
-
-                for other in plane:
-                    heapq.heappush(dists, (False, dist(group, other),
-                                           id(group), id(other), group, other))
-                plane.add(group)
-        return list(plane)
-
-    def analyze(self, laparams):
-        # textobjs is a list of LTChar objects, i.e.
-        # it has all the individual characters in the page.
-        (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar),
-                                       self)
-        for obj in otherobjs:
-            obj.analyze(laparams)
-        if not textobjs:
-            return
-        textlines = list(self.group_objects(laparams, textobjs))
-        (empties, textlines) = fsplit(lambda obj: obj.is_empty(), textlines)
-        for obj in empties:
-            obj.analyze(laparams)
-        textboxes = list(self.group_textlines(laparams, textlines))
-        if laparams.boxes_flow is None:
-            for textbox in textboxes:
-                textbox.analyze(laparams)
-
-            def getkey(box):
-                if isinstance(box, LTTextBoxVertical):
-                    return (0, -box.x1, -box.y0)
-                else:
-                    return (1, -box.y0, box.x0)
-            textboxes.sort(key=getkey)
-        else:
-            self.groups = self.group_textboxes(laparams, textboxes)
-            assigner = IndexAssigner()
-            for group in self.groups:
-                group.analyze(laparams)
-                assigner.run(group)
-            textboxes.sort(key=lambda box: box.index)
-        self._objs = textboxes + otherobjs + empties
-        return
-
-
-class LTFigure(LTLayoutContainer):
-    """Represents an area used by PDF Form objects.
-
-    PDF Forms can be used to present figures or pictures by embedding yet
-    another PDF document within a page. Note that LTFigure objects can appear
-    recursively.
-    """
-
-    def __init__(self, name, bbox, matrix):
-        self.name = name
-        self.matrix = matrix
-        (x, y, w, h) = bbox
-        bounds = ((x, y), (x + w, y), (x, y + h), (x + w, y + h))
-        bbox = get_bound(apply_matrix_pt(matrix, (p, q)) for (p, q) in bounds)
-        LTLayoutContainer.__init__(self, bbox)
-        return
-
-    def __repr__(self):
-        return ('<%s(%s) %s matrix=%s>' %
-                (self.__class__.__name__, self.name,
-                 bbox2str(self.bbox), matrix2str(self.matrix)))
-
-    def analyze(self, laparams):
-        if not laparams.all_texts:
-            return
-        LTLayoutContainer.analyze(self, laparams)
-        return
-
-
-class LTPage(LTLayoutContainer):
-    """Represents an entire page.
-
-    May contain child objects like LTTextBox, LTFigure, LTImage, LTRect,
-    LTCurve and LTLine.
-    """
-
-    def __init__(self, pageid, bbox, rotate=0):
-        LTLayoutContainer.__init__(self, bbox)
-        self.pageid = pageid
-        self.rotate = rotate
-        return
-
-    def __repr__(self):
-        return ('<%s(%r) %s rotate=%r>' %
-                (self.__class__.__name__, self.pageid,
-                 bbox2str(self.bbox), self.rotate))
@@ -1,99 +0,0 @@
-from io import BytesIO
-import logging
-
-
-logger = logging.getLogger(__name__)
-
-
-class CorruptDataError(Exception):
-    pass
-
-
-class LZWDecoder:
-
-    def __init__(self, fp):
-        self.fp = fp
-        self.buff = 0
-        self.bpos = 8
-        self.nbits = 9
-        self.table = None
-        self.prevbuf = None
-        return
-
-    def readbits(self, bits):
-        v = 0
-        while 1:
-            # the number of remaining bits we can get from the current buffer.
-            r = 8-self.bpos
-            if bits <= r:
-                # |-----8-bits-----|
-                # |-bpos-|-bits-|  |
-                # |      |----r----|
-                v = (v << bits) | ((self.buff >> (r-bits)) & ((1 << bits)-1))
-                self.bpos += bits
-                break
-            else:
-                # |-----8-bits-----|
-                # |-bpos-|---bits----...
-                # |      |----r----|
-                v = (v << r) | (self.buff & ((1 << r)-1))
-                bits -= r
-                x = self.fp.read(1)
-                if not x:
-                    raise EOFError
-                self.buff = ord(x)
-                self.bpos = 0
-        return v
-
-    def feed(self, code):
-        x = b''
-        if code == 256:
-            self.table = [bytes((c,)) for c in range(256)]  # 0-255
-            self.table.append(None)  # 256
-            self.table.append(None)  # 257
-            self.prevbuf = b''
-            self.nbits = 9
-        elif code == 257:
-            pass
-        elif not self.prevbuf:
-            x = self.prevbuf = self.table[code]
-        else:
-            if code < len(self.table):
-                x = self.table[code]
-                self.table.append(self.prevbuf+x[:1])
-            elif code == len(self.table):
-                self.table.append(self.prevbuf+self.prevbuf[:1])
-                x = self.table[code]
-            else:
-                raise CorruptDataError
-            table_length = len(self.table)
-            if table_length == 511:
-                self.nbits = 10
-            elif table_length == 1023:
-                self.nbits = 11
-            elif table_length == 2047:
-                self.nbits = 12
-            self.prevbuf = x
-        return x
-
-    def run(self):
-        while 1:
-            try:
-                code = self.readbits(self.nbits)
-            except EOFError:
-                break
-            try:
-                x = self.feed(code)
-            except CorruptDataError:
-                # just ignore corrupt data and stop yielding there
-                break
-            yield x
-            logger.debug('nbits=%d, code=%d, output=%r, table=%r'
-                         % (self.nbits, code, x, self.table[258:]))
-        return
-
-
-def lzwdecode(data):
-    fp = BytesIO(data)
-    s = LZWDecoder(fp).run()
-    return b''.join(s)
@@ -1,35 +0,0 @@
-import collections
-from .psparser import LIT
-
-
-LITERAL_DEVICE_GRAY = LIT('DeviceGray')
-LITERAL_DEVICE_RGB = LIT('DeviceRGB')
-LITERAL_DEVICE_CMYK = LIT('DeviceCMYK')
-
-
-class PDFColorSpace:
-
-    def __init__(self, name, ncomponents):
-        self.name = name
-        self.ncomponents = ncomponents
-        return
-
-    def __repr__(self):
-        return '<PDFColorSpace: %s, ncomponents=%d>' % \
-               (self.name, self.ncomponents)
-
-
-PREDEFINED_COLORSPACE = collections.OrderedDict()
-
-for (name, n) in [
-    ('DeviceGray', 1),  # default value first
-    ('CalRGB', 3),
-    ('CalGray', 1),
-    ('Lab', 3),
-    ('DeviceRGB', 3),
-    ('DeviceCMYK', 4),
-    ('Separation', 1),
-    ('Indexed', 1),
-    ('Pattern', 1),
-]:
-    PREDEFINED_COLORSPACE[name] = PDFColorSpace(name, n)
@@ -1,193 +0,0 @@
-from . import utils
-from .pdffont import PDFUnicodeNotDefined
-
-
-class PDFDevice:
-    """Translate the output of PDFPageInterpreter to the output that is needed
-    """
-
-    def __init__(self, rsrcmgr):
-        self.rsrcmgr = rsrcmgr
-        self.ctm = None
-        return
-
-    def __repr__(self):
-        return '<PDFDevice>'
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.close()
-
-    def close(self):
-        return
-
-    def set_ctm(self, ctm):
-        self.ctm = ctm
-        return
-
-    def begin_tag(self, tag, props=None):
-        return
-
-    def end_tag(self):
-        return
-
-    def do_tag(self, tag, props=None):
-        return
-
-    def begin_page(self, page, ctm):
-        return
-
-    def end_page(self, page):
-        return
-
-    def begin_figure(self, name, bbox, matrix):
-        return
-
-    def end_figure(self, name):
-        return
-
-    def paint_path(self, graphicstate, stroke, fill, evenodd, path):
-        return
-
-    def render_image(self, name, stream):
-        return
-
-    def render_string(self, textstate, seq, ncs, graphicstate):
-        return
-
-
-class PDFTextDevice(PDFDevice):
-
-    def render_string(self, textstate, seq, ncs, graphicstate):
-        matrix = utils.mult_matrix(textstate.matrix, self.ctm)
-        font = textstate.font
-        fontsize = textstate.fontsize
-        scaling = textstate.scaling * .01
-        charspace = textstate.charspace * scaling
-        wordspace = textstate.wordspace * scaling
-        rise = textstate.rise
-        if font.is_multibyte():
-            wordspace = 0
-        dxscale = .001 * fontsize * scaling
-        if font.is_vertical():
-            textstate.linematrix = self.render_string_vertical(
-                seq, matrix, textstate.linematrix, font, fontsize,
-                scaling, charspace, wordspace, rise, dxscale, ncs,
-                graphicstate)
-        else:
-            textstate.linematrix = self.render_string_horizontal(
-                seq, matrix, textstate.linematrix, font, fontsize,
-                scaling, charspace, wordspace, rise, dxscale, ncs,
-                graphicstate)
-        return
-
-    def render_string_horizontal(self, seq, matrix, pos,
-                                 font, fontsize, scaling, charspace, wordspace,
-                                 rise, dxscale, ncs, graphicstate):
-        (x, y) = pos
-        needcharspace = False
-        for obj in seq:
-            if utils.isnumber(obj):
-                x -= obj*dxscale
-                needcharspace = True
-            else:
-                for cid in font.decode(obj):
-                    if needcharspace:
-                        x += charspace
-                    x += self.render_char(
-                        utils.translate_matrix(matrix, (x, y)), font,
-                        fontsize, scaling, rise, cid, ncs, graphicstate)
-                    if cid == 32 and wordspace:
-                        x += wordspace
-                    needcharspace = True
-        return (x, y)
-
-    def render_string_vertical(self, seq, matrix, pos,
-                               font, fontsize, scaling, charspace, wordspace,
-                               rise, dxscale, ncs, graphicstate):
-        (x, y) = pos
-        needcharspace = False
-        for obj in seq:
-            if utils.isnumber(obj):
-                y -= obj*dxscale
-                needcharspace = True
-            else:
-                for cid in font.decode(obj):
-                    if needcharspace:
-                        y += charspace
-                    y += self.render_char(
-                        utils.translate_matrix(matrix, (x, y)), font, fontsize,
-                        scaling, rise, cid, ncs, graphicstate)
-                    if cid == 32 and wordspace:
-                        y += wordspace
-                    needcharspace = True
-        return (x, y)
-
-    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
-                    graphicstate):
-        return 0
-
-
-class TagExtractor(PDFDevice):
-
-    def __init__(self, rsrcmgr, outfp, codec='utf-8'):
-        PDFDevice.__init__(self, rsrcmgr)
-        self.outfp = outfp
-        self.codec = codec
-        self.pageno = 0
-        self._stack = []
-        return
-
-    def render_string(self, textstate, seq, ncs, graphicstate):
-        font = textstate.font
-        text = ''
-        for obj in seq:
-            if isinstance(obj, str):
-                obj = utils.make_compat_bytes(obj)
-            if not isinstance(obj, bytes):
-                continue
-            chars = font.decode(obj)
-            for cid in chars:
-                try:
-                    char = font.to_unichr(cid)
-                    text += char
-                except PDFUnicodeNotDefined:
-                    print(chars)
-                    pass
-        self.outfp.write(utils.enc(text))
-        return
-
-    def begin_page(self, page, ctm):
-        output = '<page id="%s" bbox="%s" rotate="%d">' %\
-                 (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
-        self.outfp.write(utils.make_compat_bytes(output))
-        return
-
-    def end_page(self, page):
-        self.outfp.write(utils.make_compat_bytes('</page>\n'))
-        self.pageno += 1
-        return
-
-    def begin_tag(self, tag, props=None):
-        s = ''
-        if isinstance(props, dict):
-            s = ''.join(' {}="{}"'.format(utils.enc(k), utils.enc(str(v)))
-                        for (k, v) in sorted(props.items()))
-        out_s = '<{}{}>'.format(utils.enc(tag.name), s)
-        self.outfp.write(utils.make_compat_bytes(out_s))
-        self._stack.append(tag)
-        return
-
-    def end_tag(self):
-        assert self._stack, str(self.pageno)
-        tag = self._stack.pop(-1)
-        out_s = '</%s>' % utils.enc(tag.name)
-        self.outfp.write(utils.make_compat_bytes(out_s))
-        return
-
-    def do_tag(self, tag, props=None):
-        self.begin_tag(tag, props)
-        self._stack.pop(-1)
-        return
@@ -1,831 +0,0 @@
-import logging
-import re
-import struct
-from hashlib import sha256, md5
-
-from cryptography.hazmat.backends import default_backend
-from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
-
-from . import settings
-from .arcfour import Arcfour
-from .pdfparser import PDFSyntaxError, PDFStreamParser
-from .pdftypes import PDFException, uint_value, PDFTypeError, PDFStream, \
-    PDFObjectNotFound, decipher_all, int_value, str_value, list_value, \
-    dict_value, stream_value
-from .psparser import PSEOF, literal_name, LIT, KWD
-from .utils import choplist, nunpack, decode_text
-
-log = logging.getLogger(__name__)
-
-
-class PDFNoValidXRef(PDFSyntaxError):
-    pass
-
-
-class PDFNoValidXRefWarning(SyntaxWarning):
-    pass
-
-
-class PDFNoOutlines(PDFException):
-    pass
-
-
-class PDFDestinationNotFound(PDFException):
-    pass
-
-
-class PDFEncryptionError(PDFException):
-    pass
-
-
-class PDFPasswordIncorrect(PDFEncryptionError):
-    pass
-
-
-class PDFTextExtractionNotAllowedWarning(UserWarning):
-    pass
-
-
-class PDFTextExtractionNotAllowed(PDFEncryptionError):
-    pass
-
-
-class PDFTextExtractionNotAllowedError(PDFTextExtractionNotAllowed):
-    def __init__(self, *args):
-        from warnings import warn
-        warn('PDFTextExtractionNotAllowedError will be removed in the future. '
-             'Use PDFTextExtractionNotAllowed instead.', DeprecationWarning)
-        super().__init__(*args)
-
-
-# some predefined literals and keywords.
-LITERAL_OBJSTM = LIT('ObjStm')
-LITERAL_XREF = LIT('XRef')
-LITERAL_CATALOG = LIT('Catalog')
-
-
-class PDFBaseXRef:
-
-    def get_trailer(self):
-        raise NotImplementedError
-
-    def get_objids(self):
-        return []
-
-    # Must return
-    #     (strmid, index, genno)
-    #  or (None, pos, genno)
-    def get_pos(self, objid):
-        raise KeyError(objid)
-
-
-class PDFXRef(PDFBaseXRef):
-
-    def __init__(self):
-        self.offsets = {}
-        self.trailer = {}
-        return
-
-    def __repr__(self):
-        return '<PDFXRef: offsets=%r>' % (self.offsets.keys())
-
-    def load(self, parser):
-        while True:
-            try:
-                (pos, line) = parser.nextline()
-                if not line.strip():
-                    continue
-            except PSEOF:
-                raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
-            if not line:
-                raise PDFNoValidXRef('Premature eof: %r' % parser)
-            if line.startswith(b'trailer'):
-                parser.seek(pos)
-                break
-            f = line.strip().split(b' ')
-            if len(f) != 2:
-                error_msg = 'Trailer not found: {!r}: line={!r}'\
-                    .format(parser, line)
-                raise PDFNoValidXRef(error_msg)
-            try:
-                (start, nobjs) = map(int, f)
-            except ValueError:
-                error_msg = 'Invalid line: {!r}: line={!r}'\
-                    .format(parser, line)
-                raise PDFNoValidXRef(error_msg)
-            for objid in range(start, start+nobjs):
-                try:
-                    (_, line) = parser.nextline()
-                except PSEOF:
-                    raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
-                f = line.strip().split(b' ')
-                if len(f) != 3:
-                    error_msg = 'Invalid XRef format: {!r}, line={!r}'\
-                        .format(parser, line)
-                    raise PDFNoValidXRef(error_msg)
-                (pos, genno, use) = f
-                if use != b'n':
-                    continue
-                self.offsets[objid] = (None, int(pos), int(genno))
-        log.info('xref objects: %r', self.offsets)
-        self.load_trailer(parser)
-        return
-
-    def load_trailer(self, parser):
-        try:
-            (_, kwd) = parser.nexttoken()
-            assert kwd is KWD(b'trailer'), str(kwd)
-            (_, dic) = parser.nextobject()
-        except PSEOF:
-            x = parser.pop(1)
-            if not x:
-                raise PDFNoValidXRef('Unexpected EOF - file corrupted')
-            (_, dic) = x[0]
-        self.trailer.update(dict_value(dic))
-        log.debug('trailer=%r', self.trailer)
-        return
-
-    def get_trailer(self):
-        return self.trailer
-
-    def get_objids(self):
-        return self.offsets.keys()
-
-    def get_pos(self, objid):
-        try:
-            return self.offsets[objid]
-        except KeyError:
-            raise
-
-
-class PDFXRefFallback(PDFXRef):
-
-    def __repr__(self):
-        return '<PDFXRefFallback: offsets=%r>' % (self.offsets.keys())
-
-    PDFOBJ_CUE = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
-
-    def load(self, parser):
-        parser.seek(0)
-        while 1:
-            try:
-                (pos, line) = parser.nextline()
-            except PSEOF:
-                break
-            if line.startswith(b'trailer'):
-                parser.seek(pos)
-                self.load_trailer(parser)
-                log.info('trailer: %r', self.trailer)
-                break
-            line = line.decode('latin-1')  # default pdf encoding
-            m = self.PDFOBJ_CUE.match(line)
-            if not m:
-                continue
-            (objid, genno) = m.groups()
-            objid = int(objid)
-            genno = int(genno)
-            self.offsets[objid] = (None, pos, genno)
-            # expand ObjStm.
-            parser.seek(pos)
-            (_, obj) = parser.nextobject()
-            if isinstance(obj, PDFStream) \
-                    and obj.get('Type') is LITERAL_OBJSTM:
-                stream = stream_value(obj)
-                try:
-                    n = stream['N']
-                except KeyError:
-                    if settings.STRICT:
-                        raise PDFSyntaxError('N is not defined: %r' % stream)
-                    n = 0
-                parser1 = PDFStreamParser(stream.get_data())
-                objs = []
-                try:
-                    while 1:
-                        (_, obj) = parser1.nextobject()
-                        objs.append(obj)
-                except PSEOF:
-                    pass
-                n = min(n, len(objs)//2)
-                for index in range(n):
-                    objid1 = objs[index*2]
-                    self.offsets[objid1] = (objid, index, 0)
-        return
-
-
-class PDFXRefStream(PDFBaseXRef):
-
-    def __init__(self):
-        self.data = None
-        self.entlen = None
-        self.fl1 = self.fl2 = self.fl3 = None
-        self.ranges = []
-        return
-
-    def __repr__(self):
-        return '<PDFXRefStream: ranges=%r>' % (self.ranges)
-
-    def load(self, parser):
-        (_, objid) = parser.nexttoken()  # ignored
-        (_, genno) = parser.nexttoken()  # ignored
-        (_, kwd) = parser.nexttoken()
-        (_, stream) = parser.nextobject()
-        if not isinstance(stream, PDFStream) \
-                or stream['Type'] is not LITERAL_XREF:
-            raise PDFNoValidXRef('Invalid PDF stream spec.')
-        size = stream['Size']
-        index_array = stream.get('Index', (0, size))
-        if len(index_array) % 2 != 0:
-            raise PDFSyntaxError('Invalid index number')
-        self.ranges.extend(choplist(2, index_array))
-        (self.fl1, self.fl2, self.fl3) = stream['W']
-        self.data = stream.get_data()
-        self.entlen = self.fl1+self.fl2+self.fl3
-        self.trailer = stream.attrs
-        log.info('xref stream: objid=%s, fields=%d,%d,%d',
-                 ', '.join(map(repr, self.ranges)),
-                 self.fl1, self.fl2, self.fl3)
-        return
-
-    def get_trailer(self):
-        return self.trailer
-
-    def get_objids(self):
-        for (start, nobjs) in self.ranges:
-            for i in range(nobjs):
-                offset = self.entlen * i
-                ent = self.data[offset:offset+self.entlen]
-                f1 = nunpack(ent[:self.fl1], 1)
-                if f1 == 1 or f1 == 2:
-                    yield start+i
-        return
-
-    def get_pos(self, objid):
-        index = 0
-        for (start, nobjs) in self.ranges:
-            if start <= objid and objid < start+nobjs:
-                index += objid - start
-                break
-            else:
-                index += nobjs
-        else:
-            raise KeyError(objid)
-        offset = self.entlen * index
-        ent = self.data[offset:offset+self.entlen]
-        f1 = nunpack(ent[:self.fl1], 1)
-        f2 = nunpack(ent[self.fl1:self.fl1+self.fl2])
-        f3 = nunpack(ent[self.fl1+self.fl2:])
-        if f1 == 1:
-            return (None, f2, f3)
-        elif f1 == 2:
-            return (f2, f3, 0)
-        else:
-            # this is a free object
-            raise KeyError(objid)
-
-
-class PDFStandardSecurityHandler:
-
-    PASSWORD_PADDING = (b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08'
-                        b'..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz')
-    supported_revisions = (2, 3)
-
-    def __init__(self, docid, param, password=''):
-        self.docid = docid
-        self.param = param
-        self.password = password
-        self.init()
-        return
-
-    def init(self):
-        self.init_params()
-        if self.r not in self.supported_revisions:
-            error_msg = 'Unsupported revision: param=%r' % self.param
-            raise PDFEncryptionError(error_msg)
-        self.init_key()
-        return
-
-    def init_params(self):
-        self.v = int_value(self.param.get('V', 0))
-        self.r = int_value(self.param['R'])
-        self.p = uint_value(self.param['P'], 32)
-        self.o = str_value(self.param['O'])
-        self.u = str_value(self.param['U'])
-        self.length = int_value(self.param.get('Length', 40))
-        return
-
-    def init_key(self):
-        self.key = self.authenticate(self.password)
-        if self.key is None:
-            raise PDFPasswordIncorrect
-        return
-
-    def is_printable(self):
-        return bool(self.p & 4)
-
-    def is_modifiable(self):
-        return bool(self.p & 8)
-
-    def is_extractable(self):
-        return bool(self.p & 16)
-
-    def compute_u(self, key):
-        if self.r == 2:
-            # Algorithm 3.4
-            return Arcfour(key).encrypt(self.PASSWORD_PADDING)  # 2
-        else:
-            # Algorithm 3.5
-            hash = md5(self.PASSWORD_PADDING)  # 2
-            hash.update(self.docid[0])  # 3
-            result = Arcfour(key).encrypt(hash.digest())  # 4
-            for i in range(1, 20):  # 5
-                k = b''.join(bytes((c ^ i,)) for c in iter(key))
-                result = Arcfour(k).encrypt(result)
-            result += result  # 6
-            return result
-
-    def compute_encryption_key(self, password):
-        # Algorithm 3.2
-        password = (password + self.PASSWORD_PADDING)[:32]  # 1
-        hash = md5(password)  # 2
-        hash.update(self.o)  # 3
-        # See https://github.com/pdfminer/pdfminer.six/issues/186
-        hash.update(struct.pack('<L', self.p))  # 4
-        hash.update(self.docid[0])  # 5
-        if self.r >= 4:
-            if not self.encrypt_metadata:
-                hash.update(b'\xff\xff\xff\xff')
-        result = hash.digest()
-        n = 5
-        if self.r >= 3:
-            n = self.length // 8
-            for _ in range(50):
-                result = md5(result[:n]).digest()
-        return result[:n]
-
-    def authenticate(self, password):
-        password = password.encode("latin1")
-        key = self.authenticate_user_password(password)
-        if key is None:
-            key = self.authenticate_owner_password(password)
-        return key
-
-    def authenticate_user_password(self, password):
-        key = self.compute_encryption_key(password)
-        if self.verify_encryption_key(key):
-            return key
-        else:
-            return None
-
-    def verify_encryption_key(self, key):
-        # Algorithm 3.6
-        u = self.compute_u(key)
-        if self.r == 2:
-            return u == self.u
-        return u[:16] == self.u[:16]
-
-    def authenticate_owner_password(self, password):
-        # Algorithm 3.7
-        password = (password + self.PASSWORD_PADDING)[:32]
-        hash = md5(password)
-        if self.r >= 3:
-            for _ in range(50):
-                hash = md5(hash.digest())
-        n = 5
-        if self.r >= 3:
-            n = self.length // 8
-        key = hash.digest()[:n]
-        if self.r == 2:
-            user_password = Arcfour(key).decrypt(self.o)
-        else:
-            user_password = self.o
-            for i in range(19, -1, -1):
-                k = b''.join(bytes((c ^ i,)) for c in iter(key))
-                user_password = Arcfour(k).decrypt(user_password)
-        return self.authenticate_user_password(user_password)
-
-    def decrypt(self, objid, genno, data, attrs=None):
-        return self.decrypt_rc4(objid, genno, data)
-
-    def decrypt_rc4(self, objid, genno, data):
-        key = self.key + struct.pack('<L', objid)[:3] \
-              + struct.pack('<L', genno)[:2]
-        hash = md5(key)
-        key = hash.digest()[:min(len(key), 16)]
-        return Arcfour(key).decrypt(data)
-
-
-class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
-
-    supported_revisions = (4,)
-
-    def init_params(self):
-        super().init_params()
-        self.length = 128
-        self.cf = dict_value(self.param.get('CF'))
-        self.stmf = literal_name(self.param['StmF'])
-        self.strf = literal_name(self.param['StrF'])
-        self.encrypt_metadata = bool(self.param.get('EncryptMetadata', True))
-        if self.stmf != self.strf:
-            error_msg = 'Unsupported crypt filter: param=%r' % self.param
-            raise PDFEncryptionError(error_msg)
-        self.cfm = {}
-        for k, v in self.cf.items():
-            f = self.get_cfm(literal_name(v['CFM']))
-            if f is None:
-                error_msg = 'Unknown crypt filter method: param=%r' \
-                            % self.param
-                raise PDFEncryptionError(error_msg)
-            self.cfm[k] = f
-        self.cfm['Identity'] = self.decrypt_identity
-        if self.strf not in self.cfm:
-            error_msg = 'Undefined crypt filter: param=%r' % self.param
-            raise PDFEncryptionError(error_msg)
-        return
-
-    def get_cfm(self, name):
-        if name == 'V2':
-            return self.decrypt_rc4
-        elif name == 'AESV2':
-            return self.decrypt_aes128
-        else:
-            return None
-
-    def decrypt(self, objid, genno, data, attrs=None, name=None):
-        if not self.encrypt_metadata and attrs is not None:
-            t = attrs.get('Type')
-            if t is not None and literal_name(t) == 'Metadata':
-                return data
-        if name is None:
-            name = self.strf
-        return self.cfm[name](objid, genno, data)
-
-    def decrypt_identity(self, objid, genno, data):
-        return data
-
-    def decrypt_aes128(self, objid, genno, data):
-        key = self.key + struct.pack('<L', objid)[:3] \
-              + struct.pack('<L', genno)[:2] + b'sAlT'
-        hash = md5(key)
-        key = hash.digest()[:min(len(key), 16)]
-        initialization_vector = data[:16]
-        ciphertext = data[16:]
-        cipher = Cipher(algorithms.AES(key),
-                        modes.CBC(initialization_vector),
-                        backend=default_backend())
-        return cipher.decryptor().update(ciphertext)
-
-
-class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
-
-    supported_revisions = (5,)
-
-    def init_params(self):
-        super().init_params()
-        self.length = 256
-        self.oe = str_value(self.param['OE'])
-        self.ue = str_value(self.param['UE'])
-        self.o_hash = self.o[:32]
-        self.o_validation_salt = self.o[32:40]
-        self.o_key_salt = self.o[40:]
-        self.u_hash = self.u[:32]
-        self.u_validation_salt = self.u[32:40]
-        self.u_key_salt = self.u[40:]
-        return
-
-    def get_cfm(self, name):
-        if name == 'AESV3':
-            return self.decrypt_aes256
-        else:
-            return None
-
-    def authenticate(self, password):
-        password = password.encode('utf-8')[:127]
-        hash = sha256(password)
-        hash.update(self.o_validation_salt)
-        hash.update(self.u)
-        if hash.digest() == self.o_hash:
-            hash = sha256(password)
-            hash.update(self.o_key_salt)
-            hash.update(self.u)
-            cipher = Cipher(algorithms.AES(hash.digest()),
-                            modes.CBC(b'\0' * 16),
-                            backend=default_backend())
-            return cipher.decryptor().update(self.oe)
-        hash = sha256(password)
-        hash.update(self.u_validation_salt)
-        if hash.digest() == self.u_hash:
-            hash = sha256(password)
-            hash.update(self.u_key_salt)
-            cipher = Cipher(algorithms.AES(hash.digest()),
-                            modes.CBC(b'\0' * 16),
-                            backend=default_backend())
-            return cipher.decryptor().update(self.ue)
-        return None
-
-    def decrypt_aes256(self, objid, genno, data):
-        initialization_vector = data[:16]
-        ciphertext = data[16:]
-        cipher = Cipher(algorithms.AES(self.key),
-                        modes.CBC(initialization_vector),
-                        backend=default_backend())
-        return cipher.decryptor().update(ciphertext)
-
-
-class PDFDocument:
-    """PDFDocument object represents a PDF document.
-
-    Since a PDF file can be very big, normally it is not loaded at
-    once. So PDF document has to cooperate with a PDF parser in order to
-    dynamically import the data as processing goes.
-
-    Typical usage:
-      doc = PDFDocument(parser, password)
-      obj = doc.getobj(objid)
-
-    """
-
-    security_handler_registry = {
-        1: PDFStandardSecurityHandler,
-        2: PDFStandardSecurityHandler,
-        4: PDFStandardSecurityHandlerV4,
-        5: PDFStandardSecurityHandlerV5,
-    }
-
-    def __init__(self, parser, password='', caching=True, fallback=True):
-        "Set the document to use a given PDFParser object."
-        self.caching = caching
-        self.xrefs = []
-        self.info = []
-        self.catalog = None
-        self.encryption = None
-        self.decipher = None
-        self._parser = None
-        self._cached_objs = {}
-        self._parsed_objs = {}
-        self._parser = parser
-        self._parser.set_document(self)
-        self.is_printable = self.is_modifiable = self.is_extractable = True
-        # Retrieve the information of each header that was appended
-        # (maybe multiple times) at the end of the document.
-        try:
-            pos = self.find_xref(parser)
-            self.read_xref_from(parser, pos, self.xrefs)
-        except PDFNoValidXRef:
-            pass  # fallback = True
-        if fallback:
-            parser.fallback = True
-            xref = PDFXRefFallback()
-            xref.load(parser)
-            self.xrefs.append(xref)
-        for xref in self.xrefs:
-            trailer = xref.get_trailer()
-            if not trailer:
-                continue
-            # If there's an encryption info, remember it.
-            if 'Encrypt' in trailer:
-                self.encryption = (list_value(trailer['ID']),
-                                   dict_value(trailer['Encrypt']))
-                self._initialize_password(password)
-            if 'Info' in trailer:
-                self.info.append(dict_value(trailer['Info']))
-            if 'Root' in trailer:
-                # Every PDF file must have exactly one /Root dictionary.
-                self.catalog = dict_value(trailer['Root'])
-                break
-        else:
-            raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
-        if self.catalog.get('Type') is not LITERAL_CATALOG:
-            if settings.STRICT:
-                raise PDFSyntaxError('Catalog not found!')
-        return
-
-    KEYWORD_OBJ = KWD(b'obj')
-
-    # _initialize_password(password=b'')
-    #   Perform the initialization with a given password.
-    def _initialize_password(self, password=''):
-        (docid, param) = self.encryption
-        if literal_name(param.get('Filter')) != 'Standard':
-            raise PDFEncryptionError('Unknown filter: param=%r' % param)
-        v = int_value(param.get('V', 0))
-        factory = self.security_handler_registry.get(v)
-        if factory is None:
-            raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
-        handler = factory(docid, param, password)
-        self.decipher = handler.decrypt
-        self.is_printable = handler.is_printable()
-        self.is_modifiable = handler.is_modifiable()
-        self.is_extractable = handler.is_extractable()
-        self._parser.fallback = False  # need to read streams with exact length
-        return
-
-    def _getobj_objstm(self, stream, index, objid):
-        if stream.objid in self._parsed_objs:
-            (objs, n) = self._parsed_objs[stream.objid]
-        else:
-            (objs, n) = self._get_objects(stream)
-            if self.caching:
-                self._parsed_objs[stream.objid] = (objs, n)
-        i = n*2+index
-        try:
-            obj = objs[i]
-        except IndexError:
-            raise PDFSyntaxError('index too big: %r' % index)
-        return obj
-
-    def _get_objects(self, stream):
-        if stream.get('Type') is not LITERAL_OBJSTM:
-            if settings.STRICT:
-                raise PDFSyntaxError('Not a stream object: %r' % stream)
-        try:
-            n = stream['N']
-        except KeyError:
-            if settings.STRICT:
-                raise PDFSyntaxError('N is not defined: %r' % stream)
-            n = 0
-        parser = PDFStreamParser(stream.get_data())
-        parser.set_document(self)
-        objs = []
-        try:
-            while 1:
-                (_, obj) = parser.nextobject()
-                objs.append(obj)
-        except PSEOF:
-            pass
-        return (objs, n)
-
-    def _getobj_parse(self, pos, objid):
-        self._parser.seek(pos)
-        (_, objid1) = self._parser.nexttoken()  # objid
-        (_, genno) = self._parser.nexttoken()  # genno
-        (_, kwd) = self._parser.nexttoken()
-        # hack around malformed pdf files
-        # copied from https://github.com/jaepil/pdfminer3k/blob/master/
-        # pdfminer/pdfparser.py#L399
-        # to solve https://github.com/pdfminer/pdfminer.six/issues/56
-        # assert objid1 == objid, str((objid1, objid))
-        if objid1 != objid:
-            x = []
-            while kwd is not self.KEYWORD_OBJ:
-                (_, kwd) = self._parser.nexttoken()
-                x.append(kwd)
-            if len(x) >= 2:
-                objid1 = x[-2]
-        # #### end hack around malformed pdf files
-        if objid1 != objid:
-            raise PDFSyntaxError('objid mismatch: {!r}={!r}'
-                                 .format(objid1, objid))
-
-        if kwd != KWD(b'obj'):
-            raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
-        (_, obj) = self._parser.nextobject()
-        return obj
-
-    # can raise PDFObjectNotFound
-    def getobj(self, objid):
-        """Get object from PDF
-
-        :raises PDFException if PDFDocument is not initialized
-        :raises PDFObjectNotFound if objid does not exist in PDF
-        """
-        if not self.xrefs:
-            raise PDFException('PDFDocument is not initialized')
-        log.debug('getobj: objid=%r', objid)
-        if objid in self._cached_objs:
-            (obj, genno) = self._cached_objs[objid]
-        else:
-            for xref in self.xrefs:
-                try:
-                    (strmid, index, genno) = xref.get_pos(objid)
-                except KeyError:
-                    continue
-                try:
-                    if strmid is not None:
-                        stream = stream_value(self.getobj(strmid))
-                        obj = self._getobj_objstm(stream, index, objid)
-                    else:
-                        obj = self._getobj_parse(index, objid)
-                        if self.decipher:
-                            obj = decipher_all(self.decipher, objid, genno,
-                                               obj)
-
-                    if isinstance(obj, PDFStream):
-                        obj.set_objid(objid, genno)
-                    break
-                except (PSEOF, PDFSyntaxError):
-                    continue
-            else:
-                raise PDFObjectNotFound(objid)
-            log.debug('register: objid=%r: %r', objid, obj)
-            if self.caching:
-                self._cached_objs[objid] = (obj, genno)
-        return obj
-
-    def get_outlines(self):
-        if 'Outlines' not in self.catalog:
-            raise PDFNoOutlines
-
-        def search(entry, level):
-            entry = dict_value(entry)
-            if 'Title' in entry:
-                if 'A' in entry or 'Dest' in entry:
-                    title = decode_text(str_value(entry['Title']))
-                    dest = entry.get('Dest')
-                    action = entry.get('A')
-                    se = entry.get('SE')
-                    yield (level, title, dest, action, se)
-            if 'First' in entry and 'Last' in entry:
-                yield from search(entry['First'], level+1)
-            if 'Next' in entry:
-                yield from search(entry['Next'], level)
-            return
-        return search(self.catalog['Outlines'], 0)
-
-    def lookup_name(self, cat, key):
-        try:
-            names = dict_value(self.catalog['Names'])
-        except (PDFTypeError, KeyError):
-            raise KeyError((cat, key))
-        # may raise KeyError
-        d0 = dict_value(names[cat])
-
-        def lookup(d):
-            if 'Limits' in d:
-                (k1, k2) = list_value(d['Limits'])
-                if key < k1 or k2 < key:
-                    return None
-            if 'Names' in d:
-                objs = list_value(d['Names'])
-                names = dict(choplist(2, objs))
-                return names[key]
-            if 'Kids' in d:
-                for c in list_value(d['Kids']):
-                    v = lookup(dict_value(c))
-                    if v:
-                        return v
-            raise KeyError((cat, key))
-        return lookup(d0)
-
-    def get_dest(self, name):
-        try:
-            # PDF-1.2 or later
-            obj = self.lookup_name('Dests', name)
-        except KeyError:
-            # PDF-1.1 or prior
-            if 'Dests' not in self.catalog:
-                raise PDFDestinationNotFound(name)
-            d0 = dict_value(self.catalog['Dests'])
-            if name not in d0:
-                raise PDFDestinationNotFound(name)
-            obj = d0[name]
-        return obj
-
-    # find_xref
-    def find_xref(self, parser):
-        """Internal function used to locate the first XRef."""
-        # search the last xref table by scanning the file backwards.
-        prev = None
-        for line in parser.revreadlines():
-            line = line.strip()
-            log.debug('find_xref: %r', line)
-            if line == b'startxref':
-                break
-            if line:
-                prev = line
-        else:
-            raise PDFNoValidXRef('Unexpected EOF')
-        log.info('xref found: pos=%r', prev)
-        return int(prev)
-
-    # read xref table
-    def read_xref_from(self, parser, start, xrefs):
-        """Reads XRefs from the given location."""
-        parser.seek(start)
-        parser.reset()
-        try:
-            (pos, token) = parser.nexttoken()
-        except PSEOF:
-            raise PDFNoValidXRef('Unexpected EOF')
-        log.info('read_xref_from: start=%d, token=%r', start, token)
-        if isinstance(token, int):
-            # XRefStream: PDF-1.5
-            parser.seek(pos)
-            parser.reset()
-            xref = PDFXRefStream()
-            xref.load(parser)
-        else:
-            if token is parser.KEYWORD_XREF:
-                parser.nextline()
-            xref = PDFXRef()
-            xref.load(parser)
-        xrefs.append(xref)
-        trailer = xref.get_trailer()
-        log.info('trailer: %r', trailer)
-        if 'XRefStm' in trailer:
-            pos = int_value(trailer['XRefStm'])
-            self.read_xref_from(parser, pos, xrefs)
-        if 'Prev' in trailer:
-            # find previous xref
-            pos = int_value(trailer['Prev'])
-            self.read_xref_from(parser, pos, xrefs)
-        return
@@ -1,801 +0,0 @@
-import logging
-import struct
-import sys
-from io import BytesIO
-
-from . import settings
-from .cmapdb import CMap
-from .cmapdb import CMapDB
-from .cmapdb import CMapParser
-from .cmapdb import FileUnicodeMap
-from .encodingdb import EncodingDB
-from .encodingdb import name2unicode
-from .fontmetrics import FONT_METRICS
-from .pdftypes import PDFException
-from .pdftypes import PDFStream
-from .pdftypes import dict_value
-from .pdftypes import int_value
-from .pdftypes import list_value
-from .pdftypes import num_value
-from .pdftypes import resolve1, resolve_all
-from .pdftypes import stream_value
-from .psparser import KWD
-from .psparser import LIT
-from .psparser import PSEOF
-from .psparser import PSLiteral
-from .psparser import PSStackParser
-from .psparser import literal_name
-from .utils import apply_matrix_norm
-from .utils import choplist
-from .utils import isnumber
-from .utils import nunpack
-
-log = logging.getLogger(__name__)
-
-
-def get_widths(seq):
-    widths = {}
-    r = []
-    for v in seq:
-        if isinstance(v, list):
-            if r:
-                char1 = r[-1]
-                for (i, w) in enumerate(v):
-                    widths[char1+i] = w
-                r = []
-        elif isnumber(v):
-            r.append(v)
-            if len(r) == 3:
-                (char1, char2, w) = r
-                for i in range(char1, char2+1):
-                    widths[i] = w
-                r = []
-    return widths
-
-
-def get_widths2(seq):
-    widths = {}
-    r = []
-    for v in seq:
-        if isinstance(v, list):
-            if r:
-                char1 = r[-1]
-                for (i, (w, vx, vy)) in enumerate(choplist(3, v)):
-                    widths[char1+i] = (w, (vx, vy))
-                r = []
-        elif isnumber(v):
-            r.append(v)
-            if len(r) == 5:
-                (char1, char2, w, vx, vy) = r
-                for i in range(char1, char2+1):
-                    widths[i] = (w, (vx, vy))
-                r = []
-    return widths
-
-
-class FontMetricsDB:
-
-    @classmethod
-    def get_metrics(cls, fontname):
-        return FONT_METRICS[fontname]
-
-
-class Type1FontHeaderParser(PSStackParser):
-
-    KEYWORD_BEGIN = KWD(b'begin')
-    KEYWORD_END = KWD(b'end')
-    KEYWORD_DEF = KWD(b'def')
-    KEYWORD_PUT = KWD(b'put')
-    KEYWORD_DICT = KWD(b'dict')
-    KEYWORD_ARRAY = KWD(b'array')
-    KEYWORD_READONLY = KWD(b'readonly')
-    KEYWORD_FOR = KWD(b'for')
-
-    def __init__(self, data):
-        PSStackParser.__init__(self, data)
-        self._cid2unicode = {}
-        return
-
-    def get_encoding(self):
-        """Parse the font encoding.
-
-        The Type1 font encoding maps character codes to character names. These
-        character names could either be standard Adobe glyph names, or
-        character names associated with custom CharStrings for this font. A
-        CharString is a sequence of operations that describe how the character
-        should be drawn. Currently, this function returns '' (empty string)
-        for character names that are associated with a CharStrings.
-
-        Reference: Adobe Systems Incorporated, Adobe Type 1 Font Format
-
-        :returns mapping of character identifiers (cid's) to unicode characters
-        """
-        while 1:
-            try:
-                (cid, name) = self.nextobject()
-            except PSEOF:
-                break
-            try:
-                self._cid2unicode[cid] = name2unicode(name)
-            except KeyError as e:
-                log.debug(str(e))
-        return self._cid2unicode
-
-    def do_keyword(self, pos, token):
-        if token is self.KEYWORD_PUT:
-            ((_, key), (_, value)) = self.pop(2)
-            if (isinstance(key, int) and isinstance(value, PSLiteral)):
-                self.add_results((key, literal_name(value)))
-        return
-
-
-NIBBLES = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', 'e', 'e-',
-           None, '-')
-
-# Mapping of cmap names. Original cmap name is kept if not in the mapping.
-# (missing reference for why DLIdent is mapped to Identity)
-IDENTITY_ENCODER = {
-    'DLIdent-H': 'Identity-H',
-    'DLIdent-V': 'Identity-V',
-}
-
-
-def getdict(data):
-    d = {}
-    fp = BytesIO(data)
-    stack = []
-    while 1:
-        c = fp.read(1)
-        if not c:
-            break
-        b0 = ord(c)
-        if b0 <= 21:
-            d[b0] = stack
-            stack = []
-            continue
-        if b0 == 30:
-            s = ''
-            loop = True
-            while loop:
-                b = ord(fp.read(1))
-                for n in (b >> 4, b & 15):
-                    if n == 15:
-                        loop = False
-                    else:
-                        s += NIBBLES[n]
-            value = float(s)
-        elif 32 <= b0 and b0 <= 246:
-            value = b0-139
-        else:
-            b1 = ord(fp.read(1))
-            if 247 <= b0 and b0 <= 250:
-                value = ((b0-247) << 8)+b1+108
-            elif 251 <= b0 and b0 <= 254:
-                value = -((b0-251) << 8)-b1-108
-            else:
-                b2 = ord(fp.read(1))
-                if 128 <= b1:
-                    b1 -= 256
-                if b0 == 28:
-                    value = b1 << 8 | b2
-                else:
-                    value = b1 << 24 | b2 << 16 | \
-                            struct.unpack('>H', fp.read(2))[0]
-        stack.append(value)
-    return d
-
-
-class CFFFont:
-
-    STANDARD_STRINGS = (
-      '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign',
-      'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
-      'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period',
-      'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
-      'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
-      'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
-      'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
-      'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash',
-      'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a',
-      'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
-      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
-      'braceleft', 'bar', 'braceright', 'asciitilde', 'exclamdown',
-      'cent', 'sterling', 'fraction', 'yen', 'florin', 'section',
-      'currency', 'quotesingle', 'quotedblleft', 'guillemotleft',
-      'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash',
-      'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
-      'quotesinglbase', 'quotedblbase', 'quotedblright',
-      'guillemotright', 'ellipsis', 'perthousand', 'questiondown',
-      'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve',
-      'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut',
-      'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
-      'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash',
-      'oslash', 'oe', 'germandbls', 'onesuperior', 'logicalnot', 'mu',
-      'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn',
-      'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
-      'threequarters', 'twosuperior', 'registered', 'minus', 'eth',
-      'multiply', 'threesuperior', 'copyright', 'Aacute',
-      'Acircumflex', 'Adieresis', 'Agrave', 'Aring', 'Atilde',
-      'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
-      'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde',
-      'Oacute', 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde',
-      'Scaron', 'Uacute', 'Ucircumflex', 'Udieresis', 'Ugrave',
-      'Yacute', 'Ydieresis', 'Zcaron', 'aacute', 'acircumflex',
-      'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla', 'eacute',
-      'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
-      'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex',
-      'odieresis', 'ograve', 'otilde', 'scaron', 'uacute',
-      'ucircumflex', 'udieresis', 'ugrave', 'yacute', 'ydieresis',
-      'zcaron', 'exclamsmall', 'Hungarumlautsmall', 'dollaroldstyle',
-      'dollarsuperior', 'ampersandsmall', 'Acutesmall',
-      'parenleftsuperior', 'parenrightsuperior', 'twodotenleader',
-      'onedotenleader', 'zerooldstyle', 'oneoldstyle', 'twooldstyle',
-      'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle',
-      'sevenoldstyle', 'eightoldstyle', 'nineoldstyle',
-      'commasuperior', 'threequartersemdash', 'periodsuperior',
-      'questionsmall', 'asuperior', 'bsuperior', 'centsuperior',
-      'dsuperior', 'esuperior', 'isuperior', 'lsuperior', 'msuperior',
-      'nsuperior', 'osuperior', 'rsuperior', 'ssuperior', 'tsuperior',
-      'ff', 'ffi', 'ffl', 'parenleftinferior', 'parenrightinferior',
-      'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall',
-      'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall',
-      'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall',
-      'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall',
-      'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall',
-      'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
-      'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall',
-      'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall',
-      'Dotaccentsmall', 'Macronsmall', 'figuredash', 'hypheninferior',
-      'Ogoneksmall', 'Ringsmall', 'Cedillasmall', 'questiondownsmall',
-      'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths',
-      'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
-      'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
-      'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior',
-      'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior',
-      'seveninferior', 'eightinferior', 'nineinferior',
-      'centinferior', 'dollarinferior', 'periodinferior',
-      'commainferior', 'Agravesmall', 'Aacutesmall',
-      'Acircumflexsmall', 'Atildesmall', 'Adieresissmall',
-      'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall',
-      'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall',
-      'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
-      'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall',
-      'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall',
-      'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall',
-      'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
-      'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000',
-      '001.001', '001.002', '001.003', 'Black', 'Bold', 'Book',
-      'Light', 'Medium', 'Regular', 'Roman', 'Semibold',
-    )
-
-    class INDEX:
-
-        def __init__(self, fp):
-            self.fp = fp
-            self.offsets = []
-            (count, offsize) = struct.unpack('>HB', self.fp.read(3))
-            for i in range(count+1):
-                self.offsets.append(nunpack(self.fp.read(offsize)))
-            self.base = self.fp.tell()-1
-            self.fp.seek(self.base+self.offsets[-1])
-            return
-
-        def __repr__(self):
-            return '<INDEX: size=%d>' % len(self)
-
-        def __len__(self):
-            return len(self.offsets)-1
-
-        def __getitem__(self, i):
-            self.fp.seek(self.base+self.offsets[i])
-            return self.fp.read(self.offsets[i+1]-self.offsets[i])
-
-        def __iter__(self):
-            return iter(self[i] for i in range(len(self)))
-
-    def __init__(self, name, fp):
-        self.name = name
-        self.fp = fp
-        # Header
-        (_major, _minor, hdrsize, offsize) = struct.unpack('BBBB',
-                                                           self.fp.read(4))
-        self.fp.read(hdrsize-4)
-        # Name INDEX
-        self.name_index = self.INDEX(self.fp)
-        # Top DICT INDEX
-        self.dict_index = self.INDEX(self.fp)
-        # String INDEX
-        self.string_index = self.INDEX(self.fp)
-        # Global Subr INDEX
-        self.subr_index = self.INDEX(self.fp)
-        # Top DICT DATA
-        self.top_dict = getdict(self.dict_index[0])
-        (charset_pos,) = self.top_dict.get(15, [0])
-        (encoding_pos,) = self.top_dict.get(16, [0])
-        (charstring_pos,) = self.top_dict.get(17, [0])
-        # CharStrings
-        self.fp.seek(charstring_pos)
-        self.charstring = self.INDEX(self.fp)
-        self.nglyphs = len(self.charstring)
-        # Encodings
-        self.code2gid = {}
-        self.gid2code = {}
-        self.fp.seek(encoding_pos)
-        format = self.fp.read(1)
-        if format == b'\x00':
-            # Format 0
-            (n,) = struct.unpack('B', self.fp.read(1))
-            for (code, gid) in enumerate(struct.unpack('B'*n,
-                                                       self.fp.read(n))):
-                self.code2gid[code] = gid
-                self.gid2code[gid] = code
-        elif format == b'\x01':
-            # Format 1
-            (n,) = struct.unpack('B', self.fp.read(1))
-            code = 0
-            for i in range(n):
-                (first, nleft) = struct.unpack('BB', self.fp.read(2))
-                for gid in range(first, first+nleft+1):
-                    self.code2gid[code] = gid
-                    self.gid2code[gid] = code
-                    code += 1
-        else:
-            raise ValueError('unsupported encoding format: %r' % format)
-        # Charsets
-        self.name2gid = {}
-        self.gid2name = {}
-        self.fp.seek(charset_pos)
-        format = self.fp.read(1)
-        if format == b'\x00':
-            # Format 0
-            n = self.nglyphs-1
-            for (gid, sid) in enumerate(struct.unpack('>'+'H'*n,
-                                                      self.fp.read(2*n))):
-                gid += 1
-                name = self.getstr(sid)
-                self.name2gid[name] = gid
-                self.gid2name[gid] = name
-        elif format == b'\x01':
-            # Format 1
-            (n,) = struct.unpack('B', self.fp.read(1))
-            sid = 0
-            for i in range(n):
-                (first, nleft) = struct.unpack('BB', self.fp.read(2))
-                for gid in range(first, first+nleft+1):
-                    name = self.getstr(sid)
-                    self.name2gid[name] = gid
-                    self.gid2name[gid] = name
-                    sid += 1
-        elif format == b'\x02':
-            # Format 2
-            assert False, str(('Unhandled', format))
-        else:
-            raise ValueError('unsupported charset format: %r' % format)
-        return
-
-    def getstr(self, sid):
-        if sid < len(self.STANDARD_STRINGS):
-            return self.STANDARD_STRINGS[sid]
-        return self.string_index[sid-len(self.STANDARD_STRINGS)]
-
-
-class TrueTypeFont:
-
-    class CMapNotFound(Exception):
-        pass
-
-    def __init__(self, name, fp):
-        self.name = name
-        self.fp = fp
-        self.tables = {}
-        self.fonttype = fp.read(4)
-        try:
-            (ntables, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8))
-            for _ in range(ntables):
-                (name, tsum, offset, length) = struct.unpack('>4sLLL',
-                                                             fp.read(16))
-                self.tables[name] = (offset, length)
-        except struct.error:
-            # Do not fail if there are not enough bytes to read. Even for
-            # corrupted PDFs we would like to get as much information as
-            # possible, so continue.
-            pass
-        return
-
-    def create_unicode_map(self):
-        if 'cmap' not in self.tables:
-            raise TrueTypeFont.CMapNotFound
-        (base_offset, length) = self.tables['cmap']
-        fp = self.fp
-        fp.seek(base_offset)
-        (version, nsubtables) = struct.unpack('>HH', fp.read(4))
-        subtables = []
-        for i in range(nsubtables):
-            subtables.append(struct.unpack('>HHL', fp.read(8)))
-        char2gid = {}
-        # Only supports subtable type 0, 2 and 4.
-        for (_1, _2, st_offset) in subtables:
-            fp.seek(base_offset+st_offset)
-            (fmttype, fmtlen, fmtlang) = struct.unpack('>HHH', fp.read(6))
-            if fmttype == 0:
-                char2gid.update(enumerate(struct.unpack('>256B',
-                                                        fp.read(256))))
-            elif fmttype == 2:
-                subheaderkeys = struct.unpack('>256H', fp.read(512))
-                firstbytes = [0]*8192
-                for (i, k) in enumerate(subheaderkeys):
-                    firstbytes[k//8] = i
-                nhdrs = max(subheaderkeys)//8 + 1
-                hdrs = []
-                for i in range(nhdrs):
-                    (firstcode, entcount, delta, offset) = \
-                        struct.unpack('>HHhH', fp.read(8))
-                    hdrs.append((i, firstcode, entcount, delta,
-                                 fp.tell()-2+offset))
-                for (i, firstcode, entcount, delta, pos) in hdrs:
-                    if not entcount:
-                        continue
-                    first = firstcode + (firstbytes[i] << 8)
-                    fp.seek(pos)
-                    for c in range(entcount):
-                        gid = struct.unpack('>H', fp.read(2))
-                        if gid:
-                            gid += delta
-                        char2gid[first+c] = gid
-            elif fmttype == 4:
-                (segcount, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8))
-                segcount //= 2
-                ecs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
-                fp.read(2)
-                scs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
-                idds = struct.unpack('>%dh' % segcount, fp.read(2*segcount))
-                pos = fp.tell()
-                idrs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
-                for (ec, sc, idd, idr) in zip(ecs, scs, idds, idrs):
-                    if idr:
-                        fp.seek(pos+idr)
-                        for c in range(sc, ec+1):
-                            b = struct.unpack('>H', fp.read(2))[0]
-                            char2gid[c] = (b + idd) & 0xffff
-                    else:
-                        for c in range(sc, ec+1):
-                            char2gid[c] = (c + idd) & 0xffff
-            else:
-                assert False, str(('Unhandled', fmttype))
-        # create unicode map
-        unicode_map = FileUnicodeMap()
-        for (char, gid) in char2gid.items():
-            unicode_map.add_cid2unichr(gid, char)
-        return unicode_map
-
-
-class PDFFontError(PDFException):
-    pass
-
-
-class PDFUnicodeNotDefined(PDFFontError):
-    pass
-
-
-LITERAL_STANDARD_ENCODING = LIT('StandardEncoding')
-LITERAL_TYPE1C = LIT('Type1C')
-
-
-class PDFFont:
-
-    def __init__(self, descriptor, widths, default_width=None):
-        self.descriptor = descriptor
-        self.widths = resolve_all(widths)
-        self.fontname = resolve1(descriptor.get('FontName', 'unknown'))
-        if isinstance(self.fontname, PSLiteral):
-            self.fontname = literal_name(self.fontname)
-        self.flags = int_value(descriptor.get('Flags', 0))
-        self.ascent = num_value(descriptor.get('Ascent', 0))
-        self.descent = num_value(descriptor.get('Descent', 0))
-        self.italic_angle = num_value(descriptor.get('ItalicAngle', 0))
-        if default_width is None:
-            self.default_width = num_value(descriptor.get('MissingWidth', 0))
-        else:
-            self.default_width = default_width
-        self.leading = num_value(descriptor.get('Leading', 0))
-        self.bbox = list_value(resolve_all(descriptor.get('FontBBox',
-                                                          (0, 0, 0, 0))))
-        self.hscale = self.vscale = .001
-
-        # PDF RM 9.8.1 specifies /Descent should always be a negative number.
-        # PScript5.dll seems to produce Descent with a positive number, but
-        # text analysis will be wrong if this is taken as correct. So force
-        # descent to negative.
-        if self.descent > 0:
-            self.descent = -self.descent
-        return
-
-    def __repr__(self):
-        return '<PDFFont>'
-
-    def is_vertical(self):
-        return False
-
-    def is_multibyte(self):
-        return False
-
-    def decode(self, bytes):
-        return bytearray(bytes)  # map(ord, bytes)
-
-    def get_ascent(self):
-        """Ascent above the baseline, in text space units"""
-        return self.ascent * self.vscale
-
-    def get_descent(self):
-        """Descent below the baseline, in text space units; always negative"""
-        return self.descent * self.vscale
-
-    def get_width(self):
-        w = self.bbox[2]-self.bbox[0]
-        if w == 0:
-            w = -self.default_width
-        return w * self.hscale
-
-    def get_height(self):
-        h = self.bbox[3]-self.bbox[1]
-        if h == 0:
-            h = self.ascent - self.descent
-        return h * self.vscale
-
-    def char_width(self, cid):
-        try:
-            return self.widths[cid] * self.hscale
-        except KeyError:
-            try:
-                return self.widths[self.to_unichr(cid)] * self.hscale
-            except (KeyError, PDFUnicodeNotDefined):
-                return self.default_width * self.hscale
-
-    def char_disp(self, cid):
-        return 0
-
-    def string_width(self, s):
-        return sum(self.char_width(cid) for cid in self.decode(s))
-
-
-class PDFSimpleFont(PDFFont):
-
-    def __init__(self, descriptor, widths, spec):
-        # Font encoding is specified either by a name of
-        # built-in encoding or a dictionary that describes
-        # the differences.
-        if 'Encoding' in spec:
-            encoding = resolve1(spec['Encoding'])
-        else:
-            encoding = LITERAL_STANDARD_ENCODING
-        if isinstance(encoding, dict):
-            name = literal_name(encoding.get('BaseEncoding',
-                                             LITERAL_STANDARD_ENCODING))
-            diff = list_value(encoding.get('Differences', []))
-            self.cid2unicode = EncodingDB.get_encoding(name, diff)
-        else:
-            self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
-        self.unicode_map = None
-        if 'ToUnicode' in spec:
-            strm = stream_value(spec['ToUnicode'])
-            self.unicode_map = FileUnicodeMap()
-            CMapParser(self.unicode_map, BytesIO(strm.get_data())).run()
-        PDFFont.__init__(self, descriptor, widths)
-        return
-
-    def to_unichr(self, cid):
-        if self.unicode_map:
-            try:
-                return self.unicode_map.get_unichr(cid)
-            except KeyError:
-                pass
-        try:
-            return self.cid2unicode[cid]
-        except KeyError:
-            raise PDFUnicodeNotDefined(None, cid)
-
-
-class PDFType1Font(PDFSimpleFont):
-
-    def __init__(self, rsrcmgr, spec):
-        try:
-            self.basefont = literal_name(spec['BaseFont'])
-        except KeyError:
-            if settings.STRICT:
-                raise PDFFontError('BaseFont is missing')
-            self.basefont = 'unknown'
-        try:
-            (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
-        except KeyError:
-            descriptor = dict_value(spec.get('FontDescriptor', {}))
-            firstchar = int_value(spec.get('FirstChar', 0))
-            # lastchar = int_value(spec.get('LastChar', 255))
-            widths = list_value(spec.get('Widths', [0]*256))
-            widths = {i+firstchar: w for (i, w) in enumerate(widths)}
-        PDFSimpleFont.__init__(self, descriptor, widths, spec)
-        if 'Encoding' not in spec and 'FontFile' in descriptor:
-            # try to recover the missing encoding info from the font file.
-            self.fontfile = stream_value(descriptor.get('FontFile'))
-            length1 = int_value(self.fontfile['Length1'])
-            data = self.fontfile.get_data()[:length1]
-            parser = Type1FontHeaderParser(BytesIO(data))
-            self.cid2unicode = parser.get_encoding()
-        return
-
-    def __repr__(self):
-        return '<PDFType1Font: basefont=%r>' % self.basefont
-
-
-class PDFTrueTypeFont(PDFType1Font):
-
-    def __repr__(self):
-        return '<PDFTrueTypeFont: basefont=%r>' % self.basefont
-
-
-class PDFType3Font(PDFSimpleFont):
-
-    def __init__(self, rsrcmgr, spec):
-        firstchar = int_value(spec.get('FirstChar', 0))
-        # lastchar = int_value(spec.get('LastChar', 0))
-        widths = list_value(spec.get('Widths', [0]*256))
-        widths = {i+firstchar: w for (i, w) in enumerate(widths)}
-        if 'FontDescriptor' in spec:
-            descriptor = dict_value(spec['FontDescriptor'])
-        else:
-            descriptor = {'Ascent': 0, 'Descent': 0,
-                          'FontBBox': spec['FontBBox']}
-        PDFSimpleFont.__init__(self, descriptor, widths, spec)
-        self.matrix = tuple(list_value(spec.get('FontMatrix')))
-        (_, self.descent, _, self.ascent) = self.bbox
-        (self.hscale, self.vscale) = apply_matrix_norm(self.matrix, (1, 1))
-        return
-
-    def __repr__(self):
-        return '<PDFType3Font>'
-
-
-class PDFCIDFont(PDFFont):
-
-    def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
-        try:
-            self.basefont = literal_name(spec['BaseFont'])
-        except KeyError:
-            if strict:
-                raise PDFFontError('BaseFont is missing')
-            self.basefont = 'unknown'
-        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
-        cid_registry = resolve1(
-            self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1")
-        cid_ordering = resolve1(
-            self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1")
-        self.cidcoding = '{}-{}'.format(cid_registry, cid_ordering)
-        self.cmap = self.get_cmap_from_spec(spec, strict)
-
-        try:
-            descriptor = dict_value(spec['FontDescriptor'])
-        except KeyError:
-            if strict:
-                raise PDFFontError('FontDescriptor is missing')
-            descriptor = {}
-        ttf = None
-        if 'FontFile2' in descriptor:
-            self.fontfile = stream_value(descriptor.get('FontFile2'))
-            ttf = TrueTypeFont(self.basefont,
-                               BytesIO(self.fontfile.get_data()))
-        self.unicode_map = None
-        if 'ToUnicode' in spec:
-            strm = stream_value(spec['ToUnicode'])
-            self.unicode_map = FileUnicodeMap()
-            CMapParser(self.unicode_map, BytesIO(strm.get_data())).run()
-        elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'):
-            if ttf:
-                try:
-                    self.unicode_map = ttf.create_unicode_map()
-                except TrueTypeFont.CMapNotFound:
-                    pass
-        else:
-            try:
-                self.unicode_map = CMapDB.get_unicode_map(
-                    self.cidcoding, self.cmap.is_vertical())
-            except CMapDB.CMapNotFound:
-                pass
-
-        self.vertical = self.cmap.is_vertical()
-        if self.vertical:
-            # writing mode: vertical
-            widths = get_widths2(list_value(spec.get('W2', [])))
-            self.disps = {cid: (vx, vy)
-                          for (cid, (_, (vx, vy))) in widths.items()}
-            (vy, w) = spec.get('DW2', [880, -1000])
-            self.default_disp = (None, vy)
-            widths = {cid: w for (cid, (w, _)) in widths.items()}
-            default_width = w
-        else:
-            # writing mode: horizontal
-            self.disps = {}
-            self.default_disp = 0
-            widths = get_widths(list_value(spec.get('W', [])))
-            default_width = spec.get('DW', 1000)
-        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
-        return
-
-    def get_cmap_from_spec(self, spec, strict):
-        """Get cmap from font specification
-
-        For certain PDFs, Encoding Type isn't mentioned as an attribute of
-        Encoding but as an attribute of CMapName, where CMapName is an
-        attribute of spec['Encoding'].
-        The horizontal/vertical modes are mentioned with different name
-        such as 'DLIdent-H/V','OneByteIdentityH/V','Identity-H/V'.
-        """
-        cmap_name = self._get_cmap_name(spec, strict)
-
-        try:
-            return CMapDB.get_cmap(cmap_name)
-        except CMapDB.CMapNotFound as e:
-            if strict:
-                raise PDFFontError(e)
-            return CMap()
-
-    @staticmethod
-    def _get_cmap_name(spec, strict):
-        """Get cmap name from font specification"""
-        cmap_name = 'unknown'  # default value
-
-        try:
-            spec_encoding = spec['Encoding']
-            if hasattr(spec_encoding, 'name'):
-                cmap_name = literal_name(spec['Encoding'])
-            else:
-                cmap_name = literal_name(spec_encoding['CMapName'])
-        except KeyError:
-            if strict:
-                raise PDFFontError('Encoding is unspecified')
-
-        if type(cmap_name) is PDFStream:
-            if 'CMapName' in cmap_name:
-                cmap_name = cmap_name.get('CMapName').name
-            else:
-                if strict:
-                    raise PDFFontError('CMapName unspecified for encoding')
-
-        cmap_name = IDENTITY_ENCODER.get(cmap_name, cmap_name)
-        return cmap_name
-
-    def __repr__(self):
-        return '<PDFCIDFont: basefont={!r}, cidcoding={!r}>'\
-            .format(self.basefont, self.cidcoding)
-
-    def is_vertical(self):
-        return self.vertical
-
-    def is_multibyte(self):
-        return True
-
-    def decode(self, bytes):
-        return self.cmap.decode(bytes)
-
-    def char_disp(self, cid):
-        "Returns an integer for horizontal fonts, a tuple for vertical fonts."
-        return self.disps.get(cid, self.default_disp)
-
-    def to_unichr(self, cid):
-        try:
-            if not self.unicode_map:
-                raise KeyError(cid)
-            return self.unicode_map.get_unichr(cid)
-        except KeyError:
-            raise PDFUnicodeNotDefined(self.cidcoding, cid)
-
-
-def main(argv):
-    for fname in argv[1:]:
-        fp = open(fname, 'rb')
-        font = CFFFont(fname, fp)
-        print(font)
-        fp.close()
-    return
-
-
-if __name__ == '__main__':
-    sys.exit(main(sys.argv))
@@ -1,943 +0,0 @@
-import re
-import logging
-from io import BytesIO
-from .cmapdb import CMapDB
-from .cmapdb import CMap
-from .psparser import PSTypeError
-from .psparser import PSEOF
-from .psparser import PSKeyword
-from .psparser import literal_name
-from .psparser import keyword_name
-from .psparser import PSStackParser
-from .psparser import LIT
-from .psparser import KWD
-from . import settings
-from .pdftypes import PDFException
-from .pdftypes import PDFStream
-from .pdftypes import PDFObjRef
-from .pdftypes import resolve1
-from .pdftypes import list_value
-from .pdftypes import dict_value
-from .pdftypes import stream_value
-from .pdffont import PDFFontError
-from .pdffont import PDFType1Font
-from .pdffont import PDFTrueTypeFont
-from .pdffont import PDFType3Font
-from .pdffont import PDFCIDFont
-from .pdfcolor import PDFColorSpace
-from .pdfcolor import PREDEFINED_COLORSPACE
-from .utils import choplist
-from .utils import mult_matrix
-from .utils import MATRIX_IDENTITY
-
-
-log = logging.getLogger(__name__)
-
-
-class PDFResourceError(PDFException):
-    pass
-
-
-class PDFInterpreterError(PDFException):
-    pass
-
-
-LITERAL_PDF = LIT('PDF')
-LITERAL_TEXT = LIT('Text')
-LITERAL_FONT = LIT('Font')
-LITERAL_FORM = LIT('Form')
-LITERAL_IMAGE = LIT('Image')
-
-
-class PDFTextState:
-
-    def __init__(self):
-        self.font = None
-        self.fontsize = 0
-        self.charspace = 0
-        self.wordspace = 0
-        self.scaling = 100
-        self.leading = 0
-        self.render = 0
-        self.rise = 0
-        self.reset()
-        # self.matrix is set
-        # self.linematrix is set
-        return
-
-    def __repr__(self):
-        return '<PDFTextState: font=%r, fontsize=%r, charspace=%r, ' \
-               'wordspace=%r, scaling=%r, leading=%r, render=%r, rise=%r, ' \
-               'matrix=%r, linematrix=%r>' \
-               % (self.font, self.fontsize, self.charspace, self.wordspace,
-                  self.scaling, self.leading, self.render, self.rise,
-                  self.matrix, self.linematrix)
-
-    def copy(self):
-        obj = PDFTextState()
-        obj.font = self.font
-        obj.fontsize = self.fontsize
-        obj.charspace = self.charspace
-        obj.wordspace = self.wordspace
-        obj.scaling = self.scaling
-        obj.leading = self.leading
-        obj.render = self.render
-        obj.rise = self.rise
-        obj.matrix = self.matrix
-        obj.linematrix = self.linematrix
-        return obj
-
-    def reset(self):
-        self.matrix = MATRIX_IDENTITY
-        self.linematrix = (0, 0)
-        return
-
-
-class PDFGraphicState:
-
-    def __init__(self):
-        self.linewidth = 0
-        self.linecap = None
-        self.linejoin = None
-        self.miterlimit = None
-        self.dash = None
-        self.intent = None
-        self.flatness = None
-
-        # stroking color
-        self.scolor = None
-
-        # non stroking color
-        self.ncolor = None
-        return
-
-    def copy(self):
-        obj = PDFGraphicState()
-        obj.linewidth = self.linewidth
-        obj.linecap = self.linecap
-        obj.linejoin = self.linejoin
-        obj.miterlimit = self.miterlimit
-        obj.dash = self.dash
-        obj.intent = self.intent
-        obj.flatness = self.flatness
-        obj.scolor = self.scolor
-        obj.ncolor = self.ncolor
-        return obj
-
-    def __repr__(self):
-        return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
-                ' miterlimit=%r, dash=%r, intent=%r, flatness=%r, '
-                ' stroking color=%r, non stroking color=%r>' %
-                (self.linewidth, self.linecap, self.linejoin,
-                 self.miterlimit, self.dash, self.intent, self.flatness,
-                 self.scolor, self.ncolor))
-
-
-class PDFResourceManager:
-    """Repository of shared resources.
-
-    ResourceManager facilitates reuse of shared resources
-    such as fonts and images so that large objects are not
-    allocated multiple times.
-    """
-
-    def __init__(self, caching=True):
-        self.caching = caching
-        self._cached_fonts = {}
-        return
-
-    def get_procset(self, procs):
-        for proc in procs:
-            if proc is LITERAL_PDF:
-                pass
-            elif proc is LITERAL_TEXT:
-                pass
-            else:
-                pass
-        return
-
-    def get_cmap(self, cmapname, strict=False):
-        try:
-            return CMapDB.get_cmap(cmapname)
-        except CMapDB.CMapNotFound:
-            if strict:
-                raise
-            return CMap()
-
-    def get_font(self, objid, spec):
-        if objid and objid in self._cached_fonts:
-            font = self._cached_fonts[objid]
-        else:
-            log.info('get_font: create: objid=%r, spec=%r', objid, spec)
-            if settings.STRICT:
-                if spec['Type'] is not LITERAL_FONT:
-                    raise PDFFontError('Type is not /Font')
-            # Create a Font object.
-            if 'Subtype' in spec:
-                subtype = literal_name(spec['Subtype'])
-            else:
-                if settings.STRICT:
-                    raise PDFFontError('Font Subtype is not specified.')
-                subtype = 'Type1'
-            if subtype in ('Type1', 'MMType1'):
-                # Type1 Font
-                font = PDFType1Font(self, spec)
-            elif subtype == 'TrueType':
-                # TrueType Font
-                font = PDFTrueTypeFont(self, spec)
-            elif subtype == 'Type3':
-                # Type3 Font
-                font = PDFType3Font(self, spec)
-            elif subtype in ('CIDFontType0', 'CIDFontType2'):
-                # CID Font
-                font = PDFCIDFont(self, spec)
-            elif subtype == 'Type0':
-                # Type0 Font
-                dfonts = list_value(spec['DescendantFonts'])
-                assert dfonts
-                subspec = dict_value(dfonts[0]).copy()
-                for k in ('Encoding', 'ToUnicode'):
-                    if k in spec:
-                        subspec[k] = resolve1(spec[k])
-                font = self.get_font(None, subspec)
-            else:
-                if settings.STRICT:
-                    raise PDFFontError('Invalid Font spec: %r' % spec)
-                font = PDFType1Font(self, spec)  # this is so wrong!
-            if objid and self.caching:
-                self._cached_fonts[objid] = font
-        return font
-
-
-class PDFContentParser(PSStackParser):
-
-    def __init__(self, streams):
-        self.streams = streams
-        self.istream = 0
-        PSStackParser.__init__(self, None)
-        return
-
-    def fillfp(self):
-        if not self.fp:
-            if self.istream < len(self.streams):
-                strm = stream_value(self.streams[self.istream])
-                self.istream += 1
-            else:
-                raise PSEOF('Unexpected EOF, file truncated?')
-            self.fp = BytesIO(strm.get_data())
-        return
-
-    def seek(self, pos):
-        self.fillfp()
-        PSStackParser.seek(self, pos)
-        return
-
-    def fillbuf(self):
-        if self.charpos < len(self.buf):
-            return
-        while 1:
-            self.fillfp()
-            self.bufpos = self.fp.tell()
-            self.buf = self.fp.read(self.BUFSIZ)
-            if self.buf:
-                break
-            self.fp = None
-        self.charpos = 0
-        return
-
-    def get_inline_data(self, pos, target=b'EI'):
-        self.seek(pos)
-        i = 0
-        data = b''
-        while i <= len(target):
-            self.fillbuf()
-            if i:
-                c = self.buf[self.charpos]
-                c = bytes((c,))
-                data += c
-                self.charpos += 1
-                if len(target) <= i and c.isspace():
-                    i += 1
-                elif i < len(target) and c == (bytes((target[i],))):
-                    i += 1
-                else:
-                    i = 0
-            else:
-                try:
-                    j = self.buf.index(target[0], self.charpos)
-                    data += self.buf[self.charpos:j+1]
-                    self.charpos = j+1
-                    i = 1
-                except ValueError:
-                    data += self.buf[self.charpos:]
-                    self.charpos = len(self.buf)
-        data = data[:-(len(target)+1)]  # strip the last part
-        data = re.sub(br'(\x0d\x0a|[\x0d\x0a])$', b'', data)
-        return (pos, data)
-
-    def flush(self):
-        self.add_results(*self.popall())
-        return
-
-    KEYWORD_BI = KWD(b'BI')
-    KEYWORD_ID = KWD(b'ID')
-    KEYWORD_EI = KWD(b'EI')
-
-    def do_keyword(self, pos, token):
-        if token is self.KEYWORD_BI:
-            # inline image within a content stream
-            self.start_type(pos, 'inline')
-        elif token is self.KEYWORD_ID:
-            try:
-                (_, objs) = self.end_type('inline')
-                if len(objs) % 2 != 0:
-                    error_msg = 'Invalid dictionary construct: {!r}' \
-                        .format(objs)
-                    raise PSTypeError(error_msg)
-                d = {literal_name(k): v for (k, v) in choplist(2, objs)}
-                (pos, data) = self.get_inline_data(pos+len(b'ID '))
-                obj = PDFStream(d, data)
-                self.push((pos, obj))
-                self.push((pos, self.KEYWORD_EI))
-            except PSTypeError:
-                if settings.STRICT:
-                    raise
-        else:
-            self.push((pos, token))
-        return
-
-
-class PDFPageInterpreter:
-    """Processor for the content of a PDF page
-
-    Reference: PDF Reference, Appendix A, Operator Summary
-    """
-
-    def __init__(self, rsrcmgr, device):
-        self.rsrcmgr = rsrcmgr
-        self.device = device
-        return
-
-    def dup(self):
-        return self.__class__(self.rsrcmgr, self.device)
-
-    def init_resources(self, resources):
-        """Prepare the fonts and XObjects listed in the Resource attribute."""
-        self.resources = resources
-        self.fontmap = {}
-        self.xobjmap = {}
-        self.csmap = PREDEFINED_COLORSPACE.copy()
-        if not resources:
-            return
-
-        def get_colorspace(spec):
-            if isinstance(spec, list):
-                name = literal_name(spec[0])
-            else:
-                name = literal_name(spec)
-            if name == 'ICCBased' and isinstance(spec, list) \
-                    and 2 <= len(spec):
-                return PDFColorSpace(name, stream_value(spec[1])['N'])
-            elif name == 'DeviceN' and isinstance(spec, list) \
-                    and 2 <= len(spec):
-                return PDFColorSpace(name, len(list_value(spec[1])))
-            else:
-                return PREDEFINED_COLORSPACE.get(name)
-        for (k, v) in dict_value(resources).items():
-            log.debug('Resource: %r: %r', k, v)
-            if k == 'Font':
-                for (fontid, spec) in dict_value(v).items():
-                    objid = None
-                    if isinstance(spec, PDFObjRef):
-                        objid = spec.objid
-                    spec = dict_value(spec)
-                    self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
-            elif k == 'ColorSpace':
-                for (csid, spec) in dict_value(v).items():
-                    self.csmap[csid] = get_colorspace(resolve1(spec))
-            elif k == 'ProcSet':
-                self.rsrcmgr.get_procset(list_value(v))
-            elif k == 'XObject':
-                for (xobjid, xobjstrm) in dict_value(v).items():
-                    self.xobjmap[xobjid] = xobjstrm
-        return
-
-    def init_state(self, ctm):
-        """Initialize the text and graphic states for rendering a page."""
-        self.gstack = []  # stack for graphical states.
-        self.ctm = ctm
-        self.device.set_ctm(self.ctm)
-        self.textstate = PDFTextState()
-        self.graphicstate = PDFGraphicState()
-        self.curpath = []
-        # argstack: stack for command arguments.
-        self.argstack = []
-        # set some global states.
-        self.scs = self.ncs = None
-        if self.csmap:
-            self.scs = self.ncs = next(iter(self.csmap.values()))
-        return
-
-    def push(self, obj):
-        self.argstack.append(obj)
-        return
-
-    def pop(self, n):
-        if n == 0:
-            return []
-        x = self.argstack[-n:]
-        self.argstack = self.argstack[:-n]
-        return x
-
-    def get_current_state(self):
-        return (self.ctm, self.textstate.copy(), self.graphicstate.copy())
-
-    def set_current_state(self, state):
-        (self.ctm, self.textstate, self.graphicstate) = state
-        self.device.set_ctm(self.ctm)
-        return
-
-    def do_q(self):
-        """Save graphics state"""
-        self.gstack.append(self.get_current_state())
-        return
-
-    def do_Q(self):
-        """Restore graphics state"""
-        if self.gstack:
-            self.set_current_state(self.gstack.pop())
-        return
-
-    def do_cm(self, a1, b1, c1, d1, e1, f1):
-        """Concatenate matrix to current transformation matrix"""
-        self.ctm = mult_matrix((a1, b1, c1, d1, e1, f1), self.ctm)
-        self.device.set_ctm(self.ctm)
-        return
-
-    def do_w(self, linewidth):
-        """Set line width"""
-        self.graphicstate.linewidth = linewidth
-        return
-
-    def do_J(self, linecap):
-        """Set line cap style"""
-        self.graphicstate.linecap = linecap
-        return
-
-    def do_j(self, linejoin):
-        """Set line join style"""
-        self.graphicstate.linejoin = linejoin
-        return
-
-    def do_M(self, miterlimit):
-        """Set miter limit"""
-        self.graphicstate.miterlimit = miterlimit
-        return
-
-    def do_d(self, dash, phase):
-        """Set line dash pattern"""
-        self.graphicstate.dash = (dash, phase)
-        return
-
-    def do_ri(self, intent):
-        """Set color rendering intent"""
-        self.graphicstate.intent = intent
-        return
-
-    def do_i(self, flatness):
-        """Set flatness tolerance"""
-        self.graphicstate.flatness = flatness
-        return
-
-    def do_gs(self, name):
-        """Set parameters from graphics state parameter dictionary"""
-        # todo
-        return
-
-    def do_m(self, x, y):
-        """Begin new subpath"""
-        self.curpath.append(('m', x, y))
-        return
-
-    def do_l(self, x, y):
-        """Append straight line segment to path"""
-        self.curpath.append(('l', x, y))
-        return
-
-    def do_c(self, x1, y1, x2, y2, x3, y3):
-        """Append curved segment to path (three control points)"""
-        self.curpath.append(('c', x1, y1, x2, y2, x3, y3))
-        return
-
-    def do_v(self, x2, y2, x3, y3):
-        """Append curved segment to path (initial point replicated)"""
-        self.curpath.append(('v', x2, y2, x3, y3))
-        return
-
-    def do_y(self, x1, y1, x3, y3):
-        """Append curved segment to path (final point replicated)"""
-        self.curpath.append(('y', x1, y1, x3, y3))
-        return
-
-    def do_h(self):
-        """Close subpath"""
-        self.curpath.append(('h',))
-        return
-
-    def do_re(self, x, y, w, h):
-        """Append rectangle to path"""
-        self.curpath.append(('m', x, y))
-        self.curpath.append(('l', x+w, y))
-        self.curpath.append(('l', x+w, y+h))
-        self.curpath.append(('l', x, y+h))
-        self.curpath.append(('h',))
-        return
-
-    def do_S(self):
-        """Stroke path"""
-        self.device.paint_path(self.graphicstate, True, False, False,
-                               self.curpath)
-        self.curpath = []
-        return
-
-    def do_s(self):
-        """Close and stroke path"""
-        self.do_h()
-        self.do_S()
-        return
-
-    def do_f(self):
-        """Fill path using nonzero winding number rule"""
-        self.device.paint_path(self.graphicstate, False, True, False,
-                               self.curpath)
-        self.curpath = []
-        return
-
-    def do_F(self):
-        """Fill path using nonzero winding number rule (obsolete)"""
-        return self.do_f()
-
-    def do_f_a(self):
-        """Fill path using even-odd rule"""
-        self.device.paint_path(self.graphicstate, False, True, True,
-                               self.curpath)
-        self.curpath = []
-        return
-
-    def do_B(self):
-        """Fill and stroke path using nonzero winding number rule"""
-        self.device.paint_path(self.graphicstate, True, True, False,
-                               self.curpath)
-        self.curpath = []
-        return
-
-    def do_B_a(self):
-        """Fill and stroke path using even-odd rule"""
-        self.device.paint_path(self.graphicstate, True, True, True,
-                               self.curpath)
-        self.curpath = []
-        return
-
-    def do_b(self):
-        """Close, fill, and stroke path using nonzero winding number rule"""
-        self.do_h()
-        self.do_B()
-        return
-
-    def do_b_a(self):
-        """Close, fill, and stroke path using even-odd rule"""
-        self.do_h()
-        self.do_B_a()
-        return
-
-    def do_n(self):
-        """End path without filling or stroking"""
-        self.curpath = []
-        return
-
-    def do_W(self):
-        """Set clipping path using nonzero winding number rule"""
-        return
-
-    def do_W_a(self):
-        """Set clipping path using even-odd rule"""
-        return
-
-    def do_CS(self, name):
-        """Set color space for stroking operations
-
-        Introduced in PDF 1.1
-        """
-        try:
-            self.scs = self.csmap[literal_name(name)]
-        except KeyError:
-            if settings.STRICT:
-                raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
-        return
-
-    def do_cs(self, name):
-        """Set color space for nonstroking operations"""
-        try:
-            self.ncs = self.csmap[literal_name(name)]
-        except KeyError:
-            if settings.STRICT:
-                raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
-        return
-
-    def do_G(self, gray):
-        """Set gray level for stroking operations"""
-        self.graphicstate.scolor = gray
-        return
-
-    def do_g(self, gray):
-        """Set gray level for nonstroking operations"""
-        self.graphicstate.ncolor = gray
-        return
-
-    def do_RG(self, r, g, b):
-        """Set RGB color for stroking operations"""
-        self.graphicstate.scolor = (r, g, b)
-        return
-
-    def do_rg(self, r, g, b):
-        """Set RGB color for nonstroking operations"""
-        self.graphicstate.ncolor = (r, g, b)
-        return
-
-    def do_K(self, c, m, y, k):
-        """Set CMYK color for stroking operations"""
-        self.graphicstate.scolor = (c, m, y, k)
-        return
-
-    def do_k(self, c, m, y, k):
-        """Set CMYK color for nonstroking operations"""
-        self.graphicstate.ncolor = (c, m, y, k)
-        return
-
-    def do_SCN(self):
-        """Set color for stroking operations."""
-        if self.scs:
-            n = self.scs.ncomponents
-        else:
-            if settings.STRICT:
-                raise PDFInterpreterError('No colorspace specified!')
-            n = 1
-        self.graphicstate.scolor = self.pop(n)
-        return
-
-    def do_scn(self):
-        """Set color for nonstroking operations"""
-        if self.ncs:
-            n = self.ncs.ncomponents
-        else:
-            if settings.STRICT:
-                raise PDFInterpreterError('No colorspace specified!')
-            n = 1
-        self.graphicstate.ncolor = self.pop(n)
-        return
-
-    def do_SC(self):
-        """Set color for stroking operations"""
-        self.do_SCN()
-        return
-
-    def do_sc(self):
-        """Set color for nonstroking operations"""
-        self.do_scn()
-        return
-
-    def do_sh(self, name):
-        """Paint area defined by shading pattern"""
-        return
-
-    def do_BT(self):
-        """Begin text object
-
-        Initializing the text matrix, Tm, and the text line matrix, Tlm, to
-        the identity matrix. Text objects cannot be nested; a second BT cannot
-        appear before an ET.
-        """
-        self.textstate.reset()
-        return
-
-    def do_ET(self):
-        """End a text object"""
-        return
-
-    def do_BX(self):
-        """Begin compatibility section"""
-        return
-
-    def do_EX(self):
-        """End compatibility section"""
-        return
-
-    def do_MP(self, tag):
-        """Define marked-content point"""
-        self.device.do_tag(tag)
-        return
-
-    def do_DP(self, tag, props):
-        """Define marked-content point with property list"""
-        self.device.do_tag(tag, props)
-        return
-
-    def do_BMC(self, tag):
-        """Begin marked-content sequence"""
-        self.device.begin_tag(tag)
-        return
-
-    def do_BDC(self, tag, props):
-        """Begin marked-content sequence with property list"""
-        self.device.begin_tag(tag, props)
-        return
-
-    def do_EMC(self):
-        """End marked-content sequence"""
-        self.device.end_tag()
-        return
-
-    def do_Tc(self, space):
-        """Set character spacing.
-
-        Character spacing is used by the Tj, TJ, and ' operators.
-
-        :param space: a number expressed in unscaled text space units.
-        """
-        self.textstate.charspace = space
-        return
-
-    def do_Tw(self, space):
-        """Set the word spacing.
-
-        Word spacing is used by the Tj, TJ, and ' operators.
-
-        :param space: a number expressed in unscaled text space units
-        """
-        self.textstate.wordspace = space
-        return
-
-    def do_Tz(self, scale):
-        """Set the horizontal scaling.
-
-        :param scale: is a number specifying the percentage of the normal width
-        """
-        self.textstate.scaling = scale
-        return
-
-    def do_TL(self, leading):
-        """Set the text leading.
-
-        Text leading is used only by the T*, ', and " operators.
-
-        :param leading: a number expressed in unscaled text space units
-        """
-        self.textstate.leading = -leading
-        return
-
-    def do_Tf(self, fontid, fontsize):
-        """Set the text font
-
-        :param fontid: the name of a font resource in the Font subdictionary
-            of the current resource dictionary
-        :param fontsize: size is a number representing a scale factor.
-        """
-        try:
-            self.textstate.font = self.fontmap[literal_name(fontid)]
-        except KeyError:
-            if settings.STRICT:
-                raise PDFInterpreterError('Undefined Font id: %r' % fontid)
-            self.textstate.font = self.rsrcmgr.get_font(None, {})
-        self.textstate.fontsize = fontsize
-        return
-
-    def do_Tr(self, render):
-        """Set the text rendering mode"""
-        self.textstate.render = render
-        return
-
-    def do_Ts(self, rise):
-        """Set the text rise
-
-        :param rise: a number expressed in unscaled text space units
-        """
-        self.textstate.rise = rise
-        return
-
-    def do_Td(self, tx, ty):
-        """Move text position"""
-        (a, b, c, d, e, f) = self.textstate.matrix
-        self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
-        self.textstate.linematrix = (0, 0)
-        return
-
-    def do_TD(self, tx, ty):
-        """Move text position and set leading"""
-        (a, b, c, d, e, f) = self.textstate.matrix
-        self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
-        self.textstate.leading = ty
-        self.textstate.linematrix = (0, 0)
-        return
-
-    def do_Tm(self, a, b, c, d, e, f):
-        """Set text matrix and text line matrix"""
-        self.textstate.matrix = (a, b, c, d, e, f)
-        self.textstate.linematrix = (0, 0)
-        return
-
-    def do_T_a(self):
-        """Move to start of next text line"""
-        (a, b, c, d, e, f) = self.textstate.matrix
-        self.textstate.matrix = (a, b, c, d, self.textstate.leading*c+e,
-                                 self.textstate.leading*d+f)
-        self.textstate.linematrix = (0, 0)
-        return
-
-    def do_TJ(self, seq):
-        """Show text, allowing individual glyph positioning"""
-        if self.textstate.font is None:
-            if settings.STRICT:
-                raise PDFInterpreterError('No font specified!')
-            return
-        self.device.render_string(self.textstate, seq, self.ncs,
-                                  self.graphicstate.copy())
-        return
-
-    def do_Tj(self, s):
-        """Show text"""
-        self.do_TJ([s])
-        return
-
-    def do__q(self, s):
-        """Move to next line and show text
-
-        The ' (single quote) operator.
-        """
-        self.do_T_a()
-        self.do_TJ([s])
-        return
-
-    def do__w(self, aw, ac, s):
-        """Set word and character spacing, move to next line, and show text
-
-        The " (double quote) operator.
-        """
-        self.do_Tw(aw)
-        self.do_Tc(ac)
-        self.do_TJ([s])
-        return
-
-    def do_BI(self):
-        """Begin inline image object"""
-        return
-
-    def do_ID(self):
-        """Begin inline image data"""
-        return
-
-    def do_EI(self, obj):
-        """End inline image object"""
-        if isinstance(obj, PDFStream) and 'W' in obj and 'H' in obj:
-            iobjid = str(id(obj))
-            self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
-            self.device.render_image(iobjid, obj)
-            self.device.end_figure(iobjid)
-        return
-
-    def do_Do(self, xobjid):
-        """Invoke named XObject"""
-        xobjid = literal_name(xobjid)
-        try:
-            xobj = stream_value(self.xobjmap[xobjid])
-        except KeyError:
-            if settings.STRICT:
-                raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
-            return
-        log.info('Processing xobj: %r', xobj)
-        subtype = xobj.get('Subtype')
-        if subtype is LITERAL_FORM and 'BBox' in xobj:
-            interpreter = self.dup()
-            bbox = list_value(xobj['BBox'])
-            matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
-            # According to PDF reference 1.7 section 4.9.1, XObjects in
-            # earlier PDFs (prior to v1.2) use the page's Resources entry
-            # instead of having their own Resources entry.
-            xobjres = xobj.get('Resources')
-            if xobjres:
-                resources = dict_value(xobjres)
-            else:
-                resources = self.resources.copy()
-            self.device.begin_figure(xobjid, bbox, matrix)
-            interpreter.render_contents(resources, [xobj],
-                                        ctm=mult_matrix(matrix, self.ctm))
-            self.device.end_figure(xobjid)
-        elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
-            self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
-            self.device.render_image(xobjid, xobj)
-            self.device.end_figure(xobjid)
-        else:
-            # unsupported xobject type.
-            pass
-        return
-
-    def process_page(self, page):
-        log.info('Processing page: %r', page)
-        (x0, y0, x1, y1) = page.mediabox
-        if page.rotate == 90:
-            ctm = (0, -1, 1, 0, -y0, x1)
-        elif page.rotate == 180:
-            ctm = (-1, 0, 0, -1, x1, y1)
-        elif page.rotate == 270:
-            ctm = (0, 1, -1, 0, y1, -x0)
-        else:
-            ctm = (1, 0, 0, 1, -x0, -y0)
-        self.device.begin_page(page, ctm)
-        self.render_contents(page.resources, page.contents, ctm=ctm)
-        self.device.end_page(page)
-        return
-
-    def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
-        """Render the content streams.
-
-        This method may be called recursively.
-        """
-        log.info('render_contents: resources=%r, streams=%r, ctm=%r',
-                 resources, streams, ctm)
-        self.init_resources(resources)
-        self.init_state(ctm)
-        self.execute(list_value(streams))
-        return
-
-    def execute(self, streams):
-        try:
-            parser = PDFContentParser(streams)
-        except PSEOF:
-            # empty page
-            return
-        while 1:
-            try:
-                (_, obj) = parser.nextobject()
-            except PSEOF:
-                break
-            if isinstance(obj, PSKeyword):
-                name = keyword_name(obj)
-                method = 'do_%s' % name.replace('*', '_a').replace('"', '_w')\
-                    .replace("'", '_q')
-                if hasattr(self, method):
-                    func = getattr(self, method)
-                    nargs = func.__code__.co_argcount-1
-                    if nargs:
-                        args = self.pop(nargs)
-                        log.debug('exec: %s %r', name, args)
-                        if len(args) == nargs:
-                            func(*args)
-                    else:
-                        log.debug('exec: %s', name)
-                        func()
-                else:
-                    if settings.STRICT:
-                        error_msg = 'Unknown operator: %r' % name
-                        raise PDFInterpreterError(error_msg)
-            else:
-                self.push(obj)
-        return
@@ -1,148 +0,0 @@
-import logging
-import warnings
-from . import settings
-from .psparser import LIT
-from .pdftypes import PDFObjectNotFound
-from .pdftypes import resolve1
-from .pdftypes import int_value
-from .pdftypes import list_value
-from .pdftypes import dict_value
-from .pdfparser import PDFParser
-from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
-from .pdfdocument import PDFTextExtractionNotAllowedWarning
-
-
-log = logging.getLogger(__name__)
-
-# some predefined literals and keywords.
-LITERAL_PAGE = LIT('Page')
-LITERAL_PAGES = LIT('Pages')
-
-
-class PDFPage:
-    """An object that holds the information about a page.
-
-    A PDFPage object is merely a convenience class that has a set
-    of keys and values, which describe the properties of a page
-    and point to its contents.
-
-    Attributes:
-      doc: a PDFDocument object.
-      pageid: any Python object that can uniquely identify the page.
-      attrs: a dictionary of page attributes.
-      contents: a list of PDFStream objects that represents the page content.
-      lastmod: the last modified time of the page.
-      resources: a list of resources used by the page.
-      mediabox: the physical size of the page.
-      cropbox: the crop rectangle of the page.
-      rotate: the page rotation (in degree).
-      annots: the page annotations.
-      beads: a chain that represents natural reading order.
-    """
-
-    def __init__(self, doc, pageid, attrs):
-        """Initialize a page object.
-
-        doc: a PDFDocument object.
-        pageid: any Python object that can uniquely identify the page.
-        attrs: a dictionary of page attributes.
-        """
-        self.doc = doc
-        self.pageid = pageid
-        self.attrs = dict_value(attrs)
-        self.lastmod = resolve1(self.attrs.get('LastModified'))
-        self.resources = resolve1(self.attrs.get('Resources', dict()))
-        self.mediabox = resolve1(self.attrs['MediaBox'])
-        if 'CropBox' in self.attrs:
-            self.cropbox = resolve1(self.attrs['CropBox'])
-        else:
-            self.cropbox = self.mediabox
-        self.rotate = (int_value(self.attrs.get('Rotate', 0))+360) % 360
-        self.annots = self.attrs.get('Annots')
-        self.beads = self.attrs.get('B')
-        if 'Contents' in self.attrs:
-            contents = resolve1(self.attrs['Contents'])
-        else:
-            contents = []
-        if not isinstance(contents, list):
-            contents = [contents]
-        self.contents = contents
-        return
-
-    def __repr__(self):
-        return '<PDFPage: Resources={!r}, MediaBox={!r}>'\
-            .format(self.resources, self.mediabox)
-
-    INHERITABLE_ATTRS = {'Resources', 'MediaBox', 'CropBox', 'Rotate'}
-
-    @classmethod
-    def create_pages(cls, document):
-        def search(obj, parent):
-            if isinstance(obj, int):
-                objid = obj
-                tree = dict_value(document.getobj(objid)).copy()
-            else:
-                objid = obj.objid
-                tree = dict_value(obj).copy()
-            for (k, v) in parent.items():
-                if k in cls.INHERITABLE_ATTRS and k not in tree:
-                    tree[k] = v
-
-            tree_type = tree.get('Type')
-            if tree_type is None and not settings.STRICT:  # See #64
-                tree_type = tree.get('type')
-
-            if tree_type is LITERAL_PAGES and 'Kids' in tree:
-                log.info('Pages: Kids=%r', tree['Kids'])
-                for c in list_value(tree['Kids']):
-                    yield from search(c, tree)
-            elif tree_type is LITERAL_PAGE:
-                log.info('Page: %r', tree)
-                yield (objid, tree)
-        pages = False
-        if 'Pages' in document.catalog:
-            objects = search(document.catalog['Pages'], document.catalog)
-            for (objid, tree) in objects:
-                yield cls(document, objid, tree)
-                pages = True
-        if not pages:
-            # fallback when /Pages is missing.
-            for xref in document.xrefs:
-                for objid in xref.get_objids():
-                    try:
-                        obj = document.getobj(objid)
-                        if isinstance(obj, dict) \
-                                and obj.get('Type') is LITERAL_PAGE:
-                            yield cls(document, objid, obj)
-                    except PDFObjectNotFound:
-                        pass
-        return
-
-    @classmethod
-    def get_pages(cls, fp,
-                  pagenos=None, maxpages=0, password='',
-                  caching=True, check_extractable=False):
-        # Create a PDF parser object associated with the file object.
-        parser = PDFParser(fp)
-        # Create a PDF document object that stores the document structure.
-        doc = PDFDocument(parser, password=password, caching=caching)
-        # Check if the document allows text extraction.
-        # If not, warn the user and proceed.
-        if not doc.is_extractable:
-            if check_extractable:
-                error_msg = 'Text extraction is not allowed: %r' % fp
-                raise PDFTextExtractionNotAllowed(error_msg)
-            else:
-                warning_msg = 'The PDF %r contains a metadata field '\
-                            'indicating that it should not allow '   \
-                            'text extraction. Ignoring this field '  \
-                            'and proceeding.' % fp
-                warnings.warn(warning_msg, PDFTextExtractionNotAllowedWarning)
-        # Process each page contained in the document.
-        for (pageno, page) in enumerate(cls.create_pages(doc)):
-            if pagenos and (pageno not in pagenos):
-                continue
-            yield page
-            if maxpages and maxpages <= pageno+1:
-                break
-        return
@@ -1,170 +0,0 @@
-import logging
-from io import BytesIO
-from .psparser import PSStackParser
-from .psparser import PSSyntaxError
-from .psparser import PSEOF
-from .psparser import KWD
-from . import settings
-from .pdftypes import PDFException
-from .pdftypes import PDFStream
-from .pdftypes import PDFObjRef
-from .pdftypes import int_value
-from .pdftypes import dict_value
-
-log = logging.getLogger(__name__)
-
-
-class PDFSyntaxError(PDFException):
-    pass
-
-
-class PDFParser(PSStackParser):
-    """
-    PDFParser fetch PDF objects from a file stream.
-    It can handle indirect references by referring to
-    a PDF document set by set_document method.
-    It also reads XRefs at the end of every PDF file.
-
-    Typical usage:
-      parser = PDFParser(fp)
-      parser.read_xref()
-      parser.read_xref(fallback=True) # optional
-      parser.set_document(doc)
-      parser.seek(offset)
-      parser.nextobject()
-
-    """
-
-    def __init__(self, fp):
-        PSStackParser.__init__(self, fp)
-        self.doc = None
-        self.fallback = False
-        return
-
-    def set_document(self, doc):
-        """Associates the parser with a PDFDocument object."""
-        self.doc = doc
-        return
-
-    KEYWORD_R = KWD(b'R')
-    KEYWORD_NULL = KWD(b'null')
-    KEYWORD_ENDOBJ = KWD(b'endobj')
-    KEYWORD_STREAM = KWD(b'stream')
-    KEYWORD_XREF = KWD(b'xref')
-    KEYWORD_STARTXREF = KWD(b'startxref')
-
-    def do_keyword(self, pos, token):
-        """Handles PDF-related keywords."""
-
-        if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
-            self.add_results(*self.pop(1))
-
-        elif token is self.KEYWORD_ENDOBJ:
-            self.add_results(*self.pop(4))
-
-        elif token is self.KEYWORD_NULL:
-            # null object
-            self.push((pos, None))
-
-        elif token is self.KEYWORD_R:
-            # reference to indirect object
-            try:
-                ((_, objid), (_, genno)) = self.pop(2)
-                (objid, genno) = (int(objid), int(genno))
-                obj = PDFObjRef(self.doc, objid, genno)
-                self.push((pos, obj))
-            except PSSyntaxError:
-                pass
-
-        elif token is self.KEYWORD_STREAM:
-            # stream object
-            ((_, dic),) = self.pop(1)
-            dic = dict_value(dic)
-            objlen = 0
-            if not self.fallback:
-                try:
-                    objlen = int_value(dic['Length'])
-                except KeyError:
-                    if settings.STRICT:
-                        raise PDFSyntaxError('/Length is undefined: %r' % dic)
-            self.seek(pos)
-            try:
-                (_, line) = self.nextline()  # 'stream'
-            except PSEOF:
-                if settings.STRICT:
-                    raise PDFSyntaxError('Unexpected EOF')
-                return
-            pos += len(line)
-            self.fp.seek(pos)
-            data = bytearray(self.fp.read(objlen))
-            self.seek(pos+objlen)
-            while 1:
-                try:
-                    (linepos, line) = self.nextline()
-                except PSEOF:
-                    if settings.STRICT:
-                        raise PDFSyntaxError('Unexpected EOF')
-                    break
-                if b'endstream' in line:
-                    i = line.index(b'endstream')
-                    objlen += i
-                    if self.fallback:
-                        data += line[:i]
-                    break
-                objlen += len(line)
-                if self.fallback:
-                    data += line
-            data = bytes(data)
-            self.seek(pos+objlen)
-            # XXX limit objlen not to exceed object boundary
-            log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos,
-                      objlen, dic, data[:10])
-            obj = PDFStream(dic, data, self.doc.decipher)
-            self.push((pos, obj))
-
-        else:
-            # others
-            self.push((pos, token))
-
-        return
-
-
-class PDFStreamParser(PDFParser):
-    """
-    PDFStreamParser is used to parse PDF content streams
-    that is contained in each page and has instructions
-    for rendering the page. A reference to a PDF document is
-    needed because a PDF content stream can also have
-    indirect references to other objects in the same document.
-    """
-
-    def __init__(self, data):
-        PDFParser.__init__(self, BytesIO(data))
-        return
-
-    def flush(self):
-        self.add_results(*self.popall())
-        return
-
-    KEYWORD_OBJ = KWD(b'obj')
-
-    def do_keyword(self, pos, token):
-        if token is self.KEYWORD_R:
-            # reference to indirect object
-            try:
-                ((_, objid), (_, genno)) = self.pop(2)
-                (objid, genno) = (int(objid), int(genno))
-                obj = PDFObjRef(self.doc, objid, genno)
-                self.push((pos, obj))
-            except PSSyntaxError:
-                pass
-            return
-        elif token in (self.KEYWORD_OBJ, self.KEYWORD_ENDOBJ):
-            if settings.STRICT:
-                # See PDF Spec 3.4.6: Only the object values are stored in the
-                # stream; the obj and endobj keywords are not used.
-                raise PDFSyntaxError('Keyword endobj found in stream')
-            return
-        # others
-        self.push((pos, token))
-        return
@@ -1,323 +0,0 @@
-import zlib
-import logging
-from .lzw import lzwdecode
-from .ascii85 import ascii85decode
-from .ascii85 import asciihexdecode
-from .runlength import rldecode
-from .ccitt import ccittfaxdecode
-from .psparser import PSException
-from .psparser import PSObject
-from .psparser import LIT
-from . import settings
-from .utils import apply_png_predictor
-from .utils import isnumber
-
-
-log = logging.getLogger(__name__)
-
-LITERAL_CRYPT = LIT('Crypt')
-
-# Abbreviation of Filter names in PDF 4.8.6. "Inline Images"
-LITERALS_FLATE_DECODE = (LIT('FlateDecode'), LIT('Fl'))
-LITERALS_LZW_DECODE = (LIT('LZWDecode'), LIT('LZW'))
-LITERALS_ASCII85_DECODE = (LIT('ASCII85Decode'), LIT('A85'))
-LITERALS_ASCIIHEX_DECODE = (LIT('ASCIIHexDecode'), LIT('AHx'))
-LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL'))
-LITERALS_CCITTFAX_DECODE = (LIT('CCITTFaxDecode'), LIT('CCF'))
-LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT'))
-LITERALS_JBIG2_DECODE = (LIT('JBIG2Decode'),)
-
-
-class PDFObject(PSObject):
-    pass
-
-
-class PDFException(PSException):
-    pass
-
-
-class PDFTypeError(PDFException):
-    pass
-
-
-class PDFValueError(PDFException):
-    pass
-
-
-class PDFObjectNotFound(PDFException):
-    pass
-
-
-class PDFNotImplementedError(PDFException):
-    pass
-
-
-class PDFObjRef(PDFObject):
-
-    def __init__(self, doc, objid, _):
-        if objid == 0:
-            if settings.STRICT:
-                raise PDFValueError('PDF object id cannot be 0.')
-        self.doc = doc
-        self.objid = objid
-        return
-
-    def __repr__(self):
-        return '<PDFObjRef:%d>' % (self.objid)
-
-    def resolve(self, default=None):
-        try:
-            return self.doc.getobj(self.objid)
-        except PDFObjectNotFound:
-            return default
-
-
-def resolve1(x, default=None):
-    """Resolves an object.
-
-    If this is an array or dictionary, it may still contains
-    some indirect objects inside.
-    """
-    while isinstance(x, PDFObjRef):
-        x = x.resolve(default=default)
-    return x
-
-
-def resolve_all(x, default=None):
-    """Recursively resolves the given object and all the internals.
-
-    Make sure there is no indirect reference within the nested object.
-    This procedure might be slow.
-    """
-    while isinstance(x, PDFObjRef):
-        x = x.resolve(default=default)
-    if isinstance(x, list):
-        x = [resolve_all(v, default=default) for v in x]
-    elif isinstance(x, dict):
-        for (k, v) in x.items():
-            x[k] = resolve_all(v, default=default)
-    return x
-
-
-def decipher_all(decipher, objid, genno, x):
-    """Recursively deciphers the given object.
-    """
-    if isinstance(x, bytes):
-        return decipher(objid, genno, x)
-    if isinstance(x, list):
-        x = [decipher_all(decipher, objid, genno, v) for v in x]
-    elif isinstance(x, dict):
-        for (k, v) in x.items():
-            x[k] = decipher_all(decipher, objid, genno, v)
-    return x
-
-
-def int_value(x):
-    x = resolve1(x)
-    if not isinstance(x, int):
-        if settings.STRICT:
-            raise PDFTypeError('Integer required: %r' % x)
-        return 0
-    return x
-
-
-def float_value(x):
-    x = resolve1(x)
-    if not isinstance(x, float):
-        if settings.STRICT:
-            raise PDFTypeError('Float required: %r' % x)
-        return 0.0
-    return x
-
-
-def num_value(x):
-    x = resolve1(x)
-    if not isnumber(x):
-        if settings.STRICT:
-            raise PDFTypeError('Int or Float required: %r' % x)
-        return 0
-    return x
-
-
-def uint_value(x, n_bits):
-    """Resolve number and interpret it as a two's-complement unsigned number"""
-    x = int_value(x)
-    if x > 0:
-        return x
-    else:
-        return x + 2**n_bits
-
-
-def str_value(x):
-    x = resolve1(x)
-    if not isinstance(x, bytes):
-        if settings.STRICT:
-            raise PDFTypeError('String required: %r' % x)
-        return ''
-    return x
-
-
-def list_value(x):
-    x = resolve1(x)
-    if not isinstance(x, (list, tuple)):
-        if settings.STRICT:
-            raise PDFTypeError('List required: %r' % x)
-        return []
-    return x
-
-
-def dict_value(x):
-    x = resolve1(x)
-    if not isinstance(x, dict):
-        if settings.STRICT:
-            log.error('PDFTypeError : Dict required: %r', x)
-            raise PDFTypeError('Dict required: %r' % x)
-        return {}
-    return x
-
-
-def stream_value(x):
-    x = resolve1(x)
-    if not isinstance(x, PDFStream):
-        if settings.STRICT:
-            raise PDFTypeError('PDFStream required: %r' % x)
-        return PDFStream({}, b'')
-    return x
-
-
-class PDFStream(PDFObject):
-
-    def __init__(self, attrs, rawdata, decipher=None):
-        assert isinstance(attrs, dict), str(type(attrs))
-        self.attrs = attrs
-        self.rawdata = rawdata
-        self.decipher = decipher
-        self.data = None
-        self.objid = None
-        self.genno = None
-        return
-
-    def set_objid(self, objid, genno):
-        self.objid = objid
-        self.genno = genno
-        return
-
-    def __repr__(self):
-        if self.data is None:
-            assert self.rawdata is not None
-            return '<PDFStream(%r): raw=%d, %r>' % \
-                   (self.objid, len(self.rawdata), self.attrs)
-        else:
-            assert self.data is not None
-            return '<PDFStream(%r): len=%d, %r>' % \
-                   (self.objid, len(self.data), self.attrs)
-
-    def __contains__(self, name):
-        return name in self.attrs
-
-    def __getitem__(self, name):
-        return self.attrs[name]
-
-    def get(self, name, default=None):
-        return self.attrs.get(name, default)
-
-    def get_any(self, names, default=None):
-        for name in names:
-            if name in self.attrs:
-                return self.attrs[name]
-        return default
-
-    def get_filters(self):
-        filters = self.get_any(('F', 'Filter'))
-        params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
-        if not filters:
-            return []
-        if not isinstance(filters, list):
-            filters = [filters]
-        if not isinstance(params, list):
-            # Make sure the parameters list is the same as filters.
-            params = [params] * len(filters)
-        if settings.STRICT and len(params) != len(filters):
-            raise PDFException("Parameters len filter mismatch")
-        # resolve filter if possible
-        _filters = []
-        for fltr in filters:
-            if hasattr(fltr, 'resolve'):
-                fltr = fltr.resolve()[0]
-            _filters.append(fltr)
-        # return list solves https://github.com/pdfminer/pdfminer.six/issues/15
-        return list(zip(_filters, params))
-
-    def decode(self):
-        assert self.data is None \
-               and self.rawdata is not None, str((self.data, self.rawdata))
-        data = self.rawdata
-        if self.decipher:
-            # Handle encryption
-            data = self.decipher(self.objid, self.genno, data, self.attrs)
-        filters = self.get_filters()
-        if not filters:
-            self.data = data
-            self.rawdata = None
-            return
-        for (f, params) in filters:
-            if f in LITERALS_FLATE_DECODE:
-                # will get errors if the document is encrypted.
-                try:
-                    data = zlib.decompress(data)
-                except zlib.error as e:
-                    if settings.STRICT:
-                        error_msg = 'Invalid zlib bytes: {!r}, {!r}'\
-                            .format(e, data)
-                        raise PDFException(error_msg)
-                    data = b''
-            elif f in LITERALS_LZW_DECODE:
-                data = lzwdecode(data)
-            elif f in LITERALS_ASCII85_DECODE:
-                data = ascii85decode(data)
-            elif f in LITERALS_ASCIIHEX_DECODE:
-                data = asciihexdecode(data)
-            elif f in LITERALS_RUNLENGTH_DECODE:
-                data = rldecode(data)
-            elif f in LITERALS_CCITTFAX_DECODE:
-                data = ccittfaxdecode(data, params)
-            elif f in LITERALS_DCT_DECODE:
-                # This is probably a JPG stream
-                # it does not need to be decoded twice.
-                # Just return the stream to the user.
-                pass
-            elif f in LITERALS_JBIG2_DECODE:
-                pass
-            elif f == LITERAL_CRYPT:
-                # not yet..
-                raise PDFNotImplementedError('/Crypt filter is unsupported')
-            else:
-                raise PDFNotImplementedError('Unsupported filter: %r' % f)
-            # apply predictors
-            if params and 'Predictor' in params:
-                pred = int_value(params['Predictor'])
-                if pred == 1:
-                    # no predictor
-                    pass
-                elif 10 <= pred:
-                    # PNG predictor
-                    colors = int_value(params.get('Colors', 1))
-                    columns = int_value(params.get('Columns', 1))
-                    raw_bits_per_component = params.get('BitsPerComponent', 8)
-                    bitspercomponent = int_value(raw_bits_per_component)
-                    data = apply_png_predictor(pred, colors, columns,
-                                               bitspercomponent, data)
-                else:
-                    error_msg = 'Unsupported predictor: %r' % pred
-                    raise PDFNotImplementedError(error_msg)
-        self.data = data
-        self.rawdata = None
-        return
-
-    def get_data(self):
-        if self.data is None:
-            self.decode()
-        return self.data
-
-    def get_rawdata(self):
-        return self.rawdata
@@ -1,625 +0,0 @@
-#!/usr/bin/env python3
-
-# -*- coding: utf-8 -*-
-
-import re
-import logging
-
-
-from . import settings
-from .utils import choplist
-
-log = logging.getLogger(__name__)
-
-
-class PSException(Exception):
-    pass
-
-
-class PSEOF(PSException):
-    pass
-
-
-class PSSyntaxError(PSException):
-    pass
-
-
-class PSTypeError(PSException):
-    pass
-
-
-class PSValueError(PSException):
-    pass
-
-
-class PSObject:
-    """Base class for all PS or PDF-related data types."""
-
-    pass
-
-
-class PSLiteral(PSObject):
-
-    """A class that represents a PostScript literal.
-
-    Postscript literals are used as identifiers, such as
-    variable names, property names and dictionary keys.
-    Literals are case sensitive and denoted by a preceding
-    slash sign (e.g. "/Name")
-
-    Note: Do not create an instance of PSLiteral directly.
-    Always use PSLiteralTable.intern().
-    """
-
-    def __init__(self, name):
-        self.name = name
-
-    def __repr__(self):
-        name = self.name
-        return '/%r' % name
-
-
-class PSKeyword(PSObject):
-
-    """A class that represents a PostScript keyword.
-
-    PostScript keywords are a dozen of predefined words.
-    Commands and directives in PostScript are expressed by keywords.
-    They are also used to denote the content boundaries.
-
-    Note: Do not create an instance of PSKeyword directly.
-    Always use PSKeywordTable.intern().
-    """
-
-    def __init__(self, name):
-        self.name = name
-        return
-
-    def __repr__(self):
-        name = self.name
-        return '/%r' % name
-
-
-class PSSymbolTable:
-    """A utility class for storing PSLiteral/PSKeyword objects.
-
-    Interned objects can be checked its identity with "is" operator.
-    """
-
-    def __init__(self, klass):
-        self.dict = {}
-        self.klass = klass
-        return
-
-    def intern(self, name):
-        if name in self.dict:
-            lit = self.dict[name]
-        else:
-            lit = self.klass(name)
-            self.dict[name] = lit
-        return lit
-
-
-PSLiteralTable = PSSymbolTable(PSLiteral)
-PSKeywordTable = PSSymbolTable(PSKeyword)
-LIT = PSLiteralTable.intern
-KWD = PSKeywordTable.intern
-KEYWORD_PROC_BEGIN = KWD(b'{')
-KEYWORD_PROC_END = KWD(b'}')
-KEYWORD_ARRAY_BEGIN = KWD(b'[')
-KEYWORD_ARRAY_END = KWD(b']')
-KEYWORD_DICT_BEGIN = KWD(b'<<')
-KEYWORD_DICT_END = KWD(b'>>')
-
-
-def literal_name(x):
-    if not isinstance(x, PSLiteral):
-        if settings.STRICT:
-            raise PSTypeError('Literal required: {!r}'.format(x))
-        else:
-            name = x
-    else:
-        name = x.name
-        try:
-            name = str(name, 'utf-8')
-        except Exception:
-            pass
-    return name
-
-
-def keyword_name(x):
-    if not isinstance(x, PSKeyword):
-        if settings.STRICT:
-            raise PSTypeError('Keyword required: %r' % x)
-        else:
-            name = x
-    else:
-        name = str(x.name, 'utf-8', 'ignore')
-    return name
-
-
-EOL = re.compile(br'[\r\n]')
-SPC = re.compile(br'\s')
-NONSPC = re.compile(br'\S')
-HEX = re.compile(br'[0-9a-fA-F]')
-END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]')
-END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]')
-HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.')
-END_NUMBER = re.compile(br'[^0-9]')
-END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]')
-END_STRING = re.compile(br'[()\134]')
-OCT_STRING = re.compile(br'[0-7]')
-ESC_STRING = {
-    b'b': 8,
-    b't': 9,
-    b'n': 10,
-    b'f': 12,
-    b'r': 13,
-    b'(': 40,
-    b')': 41,
-    b'\\': 92
-}
-
-
-class PSBaseParser:
-
-    """Most basic PostScript parser that performs only tokenization.
-    """
-    BUFSIZ = 4096
-
-    def __init__(self, fp):
-        self.fp = fp
-        self.seek(0)
-        return
-
-    def __repr__(self):
-        return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp,
-                                        self.bufpos)
-
-    def flush(self):
-        return
-
-    def close(self):
-        self.flush()
-        return
-
-    def tell(self):
-        return self.bufpos+self.charpos
-
-    def poll(self, pos=None, n=80):
-        pos0 = self.fp.tell()
-        if not pos:
-            pos = self.bufpos+self.charpos
-        self.fp.seek(pos)
-        log.info('poll(%d): %r', pos, self.fp.read(n))
-        self.fp.seek(pos0)
-        return
-
-    def seek(self, pos):
-        """Seeks the parser to the given position.
-        """
-        log.debug('seek: %r', pos)
-        self.fp.seek(pos)
-        # reset the status for nextline()
-        self.bufpos = pos
-        self.buf = b''
-        self.charpos = 0
-        # reset the status for nexttoken()
-        self._parse1 = self._parse_main
-        self._curtoken = b''
-        self._curtokenpos = 0
-        self._tokens = []
-        return
-
-    def fillbuf(self):
-        if self.charpos < len(self.buf):
-            return
-        # fetch next chunk.
-        self.bufpos = self.fp.tell()
-        self.buf = self.fp.read(self.BUFSIZ)
-        if not self.buf:
-            raise PSEOF('Unexpected EOF')
-        self.charpos = 0
-        return
-
-    def nextline(self):
-        """Fetches a next line that ends either with \\r or \\n.
-        """
-        linebuf = b''
-        linepos = self.bufpos + self.charpos
-        eol = False
-        while 1:
-            self.fillbuf()
-            if eol:
-                c = self.buf[self.charpos:self.charpos+1]
-                # handle b'\r\n'
-                if c == b'\n':
-                    linebuf += c
-                    self.charpos += 1
-                break
-            m = EOL.search(self.buf, self.charpos)
-            if m:
-                linebuf += self.buf[self.charpos:m.end(0)]
-                self.charpos = m.end(0)
-                if linebuf[-1:] == b'\r':
-                    eol = True
-                else:
-                    break
-            else:
-                linebuf += self.buf[self.charpos:]
-                self.charpos = len(self.buf)
-        log.debug('nextline: %r, %r', linepos, linebuf)
-
-        return (linepos, linebuf)
-
-    def revreadlines(self):
-        """Fetches a next line backword.
-
-        This is used to locate the trailers at the end of a file.
-        """
-        self.fp.seek(0, 2)
-        pos = self.fp.tell()
-        buf = b''
-        while 0 < pos:
-            prevpos = pos
-            pos = max(0, pos-self.BUFSIZ)
-            self.fp.seek(pos)
-            s = self.fp.read(prevpos-pos)
-            if not s:
-                break
-            while 1:
-                n = max(s.rfind(b'\r'), s.rfind(b'\n'))
-                if n == -1:
-                    buf = s + buf
-                    break
-                yield s[n:] + buf
-                s = s[:n]
-                buf = b''
-        return
-
-    def _parse_main(self, s, i):
-        m = NONSPC.search(s, i)
-        if not m:
-            return len(s)
-        j = m.start(0)
-        c = s[j:j+1]
-        self._curtokenpos = self.bufpos+j
-        if c == b'%':
-            self._curtoken = b'%'
-            self._parse1 = self._parse_comment
-            return j+1
-        elif c == b'/':
-            self._curtoken = b''
-            self._parse1 = self._parse_literal
-            return j+1
-        elif c in b'-+' or c.isdigit():
-            self._curtoken = c
-            self._parse1 = self._parse_number
-            return j+1
-        elif c == b'.':
-            self._curtoken = c
-            self._parse1 = self._parse_float
-            return j+1
-        elif c.isalpha():
-            self._curtoken = c
-            self._parse1 = self._parse_keyword
-            return j+1
-        elif c == b'(':
-            self._curtoken = b''
-            self.paren = 1
-            self._parse1 = self._parse_string
-            return j+1
-        elif c == b'<':
-            self._curtoken = b''
-            self._parse1 = self._parse_wopen
-            return j+1
-        elif c == b'>':
-            self._curtoken = b''
-            self._parse1 = self._parse_wclose
-            return j+1
-        else:
-            self._add_token(KWD(c))
-            return j+1
-
-    def _add_token(self, obj):
-        self._tokens.append((self._curtokenpos, obj))
-        return
-
-    def _parse_comment(self, s, i):
-        m = EOL.search(s, i)
-        if not m:
-            self._curtoken += s[i:]
-            return len(s)
-        j = m.start(0)
-        self._curtoken += s[i:j]
-        self._parse1 = self._parse_main
-        # We ignore comments.
-        # self._tokens.append(self._curtoken)
-        return j
-
-    def _parse_literal(self, s, i):
-        m = END_LITERAL.search(s, i)
-        if not m:
-            self._curtoken += s[i:]
-            return len(s)
-        j = m.start(0)
-        self._curtoken += s[i:j]
-        c = s[j:j+1]
-        if c == b'#':
-            self.hex = b''
-            self._parse1 = self._parse_literal_hex
-            return j+1
-        try:
-            self._curtoken = str(self._curtoken, 'utf-8')
-        except Exception:
-            pass
-        self._add_token(LIT(self._curtoken))
-        self._parse1 = self._parse_main
-        return j
-
-    def _parse_literal_hex(self, s, i):
-        c = s[i:i+1]
-        if HEX.match(c) and len(self.hex) < 2:
-            self.hex += c
-            return i+1
-        if self.hex:
-            self._curtoken += bytes((int(self.hex, 16),))
-        self._parse1 = self._parse_literal
-        return i
-
-    def _parse_number(self, s, i):
-        m = END_NUMBER.search(s, i)
-        if not m:
-            self._curtoken += s[i:]
-            return len(s)
-        j = m.start(0)
-        self._curtoken += s[i:j]
-        c = s[j:j+1]
-        if c == b'.':
-            self._curtoken += c
-            self._parse1 = self._parse_float
-            return j+1
-        try:
-            self._add_token(int(self._curtoken))
-        except ValueError:
-            pass
-        self._parse1 = self._parse_main
-        return j
-
-    def _parse_float(self, s, i):
-        m = END_NUMBER.search(s, i)
-        if not m:
-            self._curtoken += s[i:]
-            return len(s)
-        j = m.start(0)
-        self._curtoken += s[i:j]
-        try:
-            self._add_token(float(self._curtoken))
-        except ValueError:
-            pass
-        self._parse1 = self._parse_main
-        return j
-
-    def _parse_keyword(self, s, i):
-        m = END_KEYWORD.search(s, i)
-        if not m:
-            self._curtoken += s[i:]
-            return len(s)
-        j = m.start(0)
-        self._curtoken += s[i:j]
-        if self._curtoken == b'true':
-            token = True
-        elif self._curtoken == b'false':
-            token = False
-        else:
-            token = KWD(self._curtoken)
-        self._add_token(token)
-        self._parse1 = self._parse_main
-        return j
-
-    def _parse_string(self, s, i):
-        m = END_STRING.search(s, i)
-        if not m:
-            self._curtoken += s[i:]
-            return len(s)
-        j = m.start(0)
-        self._curtoken += s[i:j]
-        c = s[j:j+1]
-        if c == b'\\':
-            self.oct = b''
-            self._parse1 = self._parse_string_1
-            return j+1
-        if c == b'(':
-            self.paren += 1
-            self._curtoken += c
-            return j+1
-        if c == b')':
-            self.paren -= 1
-            if self.paren:
-                # WTF, they said balanced parens need no special treatment.
-                self._curtoken += c
-                return j+1
-        self._add_token(self._curtoken)
-        self._parse1 = self._parse_main
-        return j+1
-
-    def _parse_string_1(self, s, i):
-        c = s[i:i+1]
-        if OCT_STRING.match(c) and len(self.oct) < 3:
-            self.oct += c
-            return i+1
-        if self.oct:
-            self._curtoken += bytes((int(self.oct, 8),))
-            self._parse1 = self._parse_string
-            return i
-        if c in ESC_STRING:
-            self._curtoken += bytes((ESC_STRING[c],))
-        self._parse1 = self._parse_string
-        return i+1
-
-    def _parse_wopen(self, s, i):
-        c = s[i:i+1]
-        if c == b'<':
-            self._add_token(KEYWORD_DICT_BEGIN)
-            self._parse1 = self._parse_main
-            i += 1
-        else:
-            self._parse1 = self._parse_hexstring
-        return i
-
-    def _parse_wclose(self, s, i):
-        c = s[i:i+1]
-        if c == b'>':
-            self._add_token(KEYWORD_DICT_END)
-            i += 1
-        self._parse1 = self._parse_main
-        return i
-
-    def _parse_hexstring(self, s, i):
-        m = END_HEX_STRING.search(s, i)
-        if not m:
-            self._curtoken += s[i:]
-            return len(s)
-        j = m.start(0)
-        self._curtoken += s[i:j]
-        token = HEX_PAIR.sub(lambda m: bytes((int(m.group(0), 16),)),
-                             SPC.sub(b'', self._curtoken))
-        self._add_token(token)
-        self._parse1 = self._parse_main
-        return j
-
-    def nexttoken(self):
-        while not self._tokens:
-            self.fillbuf()
-            self.charpos = self._parse1(self.buf, self.charpos)
-        token = self._tokens.pop(0)
-        log.debug('nexttoken: %r', token)
-        return token
-
-
-class PSStackParser(PSBaseParser):
-    def __init__(self, fp):
-        PSBaseParser.__init__(self, fp)
-        self.reset()
-        return
-
-    def reset(self):
-        self.context = []
-        self.curtype = None
-        self.curstack = []
-        self.results = []
-        return
-
-    def seek(self, pos):
-        PSBaseParser.seek(self, pos)
-        self.reset()
-        return
-
-    def push(self, *objs):
-        self.curstack.extend(objs)
-        return
-
-    def pop(self, n):
-        objs = self.curstack[-n:]
-        self.curstack[-n:] = []
-        return objs
-
-    def popall(self):
-        objs = self.curstack
-        self.curstack = []
-        return objs
-
-    def add_results(self, *objs):
-        try:
-            log.debug('add_results: %r', objs)
-        except Exception:
-            log.debug('add_results: (unprintable object)')
-        self.results.extend(objs)
-        return
-
-    def start_type(self, pos, type):
-        self.context.append((pos, self.curtype, self.curstack))
-        (self.curtype, self.curstack) = (type, [])
-        log.debug('start_type: pos=%r, type=%r', pos, type)
-        return
-
-    def end_type(self, type):
-        if self.curtype != type:
-            raise PSTypeError('Type mismatch: {!r} != {!r}'
-                              .format(self.curtype, type))
-        objs = [obj for (_, obj) in self.curstack]
-        (pos, self.curtype, self.curstack) = self.context.pop()
-        log.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs)
-        return (pos, objs)
-
-    def do_keyword(self, pos, token):
-        return
-
-    def nextobject(self):
-        """Yields a list of objects.
-
-        Arrays and dictionaries are represented as Python lists and
-        dictionaries.
-
-        :return: keywords, literals, strings, numbers, arrays and dictionaries.
-        """
-        while not self.results:
-            (pos, token) = self.nexttoken()
-            if isinstance(token, (int, float, bool, str, bytes, PSLiteral)):
-                # normal token
-                self.push((pos, token))
-            elif token == KEYWORD_ARRAY_BEGIN:
-                # begin array
-                self.start_type(pos, 'a')
-            elif token == KEYWORD_ARRAY_END:
-                # end array
-                try:
-                    self.push(self.end_type('a'))
-                except PSTypeError:
-                    if settings.STRICT:
-                        raise
-            elif token == KEYWORD_DICT_BEGIN:
-                # begin dictionary
-                self.start_type(pos, 'd')
-            elif token == KEYWORD_DICT_END:
-                # end dictionary
-                try:
-                    (pos, objs) = self.end_type('d')
-                    if len(objs) % 2 != 0:
-                        error_msg = 'Invalid dictionary construct: %r' % objs
-                        raise PSSyntaxError(error_msg)
-                    d = {literal_name(k): v
-                         for (k, v) in choplist(2, objs) if v is not None}
-                    self.push((pos, d))
-                except PSTypeError:
-                    if settings.STRICT:
-                        raise
-            elif token == KEYWORD_PROC_BEGIN:
-                # begin proc
-                self.start_type(pos, 'p')
-            elif token == KEYWORD_PROC_END:
-                # end proc
-                try:
-                    self.push(self.end_type('p'))
-                except PSTypeError:
-                    if settings.STRICT:
-                        raise
-            elif isinstance(token, PSKeyword):
-                log.debug('do_keyword: pos=%r, token=%r, stack=%r', pos,
-                          token, self.curstack)
-                self.do_keyword(pos, token)
-            else:
-                log.error('unknown token: pos=%r, token=%r, stack=%r', pos,
-                          token, self.curstack)
-                self.do_keyword(pos, token)
-                raise
-            if self.context:
-                continue
-            else:
-                self.flush()
-        obj = self.results.pop(0)
-        try:
-            log.debug('nextobject: %r', obj)
-        except Exception:
-            log.debug('nextobject: (unprintable object)')
-        return obj
@@ -1,40 +0,0 @@
-#
-# RunLength decoder (Adobe version) implementation based on PDF Reference
-# version 1.4 section 3.3.4.
-#
-#  * public domain *
-#
-
-
-def rldecode(data):
-    """
-    RunLength decoder (Adobe version) implementation based on PDF Reference
-    version 1.4 section 3.3.4:
-        The RunLengthDecode filter decodes data that has been encoded in a
-        simple byte-oriented format based on run length. The encoded data
-        is a sequence of runs, where each run consists of a length byte
-        followed by 1 to 128 bytes of data. If the length byte is in the
-        range 0 to 127, the following length + 1 (1 to 128) bytes are
-        copied literally during decompression. If length is in the range
-        129 to 255, the following single byte is to be copied 257 - length
-        (2 to 128) times during decompression. A length value of 128
-        denotes EOD.
-    """
-    decoded = b''
-    i = 0
-    while i < len(data):
-        length = data[i]
-        if length == 128:
-            break
-
-        if length >= 0 and length < 128:
-            for j in range(i+1, (i+1)+(length+1)):
-                decoded += bytes((data[j],))
-            i = (i+1) + (length+1)
-
-        if length > 128:
-            run = bytes((data[i+1],))*(257-length)
-            decoded += run
-            i = (i+1) + 1
-
-    return decoded
@@ -1 +0,0 @@
-STRICT = False
@@ -1,406 +0,0 @@
-"""
-Miscellaneous Routines.
-"""
-import io
-import pathlib
-import struct
-from html import escape
-
-import chardet  # For str encoding detection
-
-# from sys import maxint as INF doesn't work anymore under Python3, but PDF
-# still uses 32 bits ints
-INF = (1 << 31) - 1
-
-
-class open_filename(object):
-    """
-    Context manager that allows opening a filename
-    (str or pathlib.PurePath type is supported) and closes it on exit,
-    (just like `open`), but does nothing for file-like objects.
-    """
-    def __init__(self, filename, *args, **kwargs):
-        if isinstance(filename, pathlib.PurePath):
-            filename = str(filename)
-        if isinstance(filename, str):
-            self.file_handler = open(filename, *args, **kwargs)
-            self.closing = True
-        elif isinstance(filename, io.IOBase):
-            self.file_handler = filename
-            self.closing = False
-        else:
-            raise TypeError('Unsupported input type: %s' % type(filename))
-
-    def __enter__(self):
-        return self.file_handler
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if self.closing:
-            self.file_handler.close()
-        return False
-
-
-def make_compat_bytes(in_str):
-    "Converts to bytes, encoding to unicode."
-    assert isinstance(in_str, str), str(type(in_str))
-    return in_str.encode()
-
-
-def make_compat_str(in_str):
-    """Converts to string, guessing encoding."""
-    assert isinstance(in_str, (bytes, str)), str(type(in_str))
-    if isinstance(in_str, bytes):
-        enc = chardet.detect(in_str)
-        in_str = in_str.decode(enc['encoding'])
-    return in_str
-
-
-def shorten_str(s, size):
-    if size < 7:
-        return s[:size]
-    if len(s) > size:
-        length = (size - 5) // 2
-        return '{} ... {}'.format(s[:length], s[-length:])
-    else:
-        return s
-
-
-def compatible_encode_method(bytesorstring, encoding='utf-8',
-                             erraction='ignore'):
-    """When Py2 str.encode is called, it often means bytes.encode in Py3.
-
-     This does either.
-     """
-    if isinstance(bytesorstring, str):
-        return bytesorstring
-    assert isinstance(bytesorstring, bytes), str(type(bytesorstring))
-    return bytesorstring.decode(encoding, erraction)
-
-
-def apply_png_predictor(pred, colors, columns, bitspercomponent, data):
-    if bitspercomponent != 8:
-        # unsupported
-        raise ValueError("Unsupported `bitspercomponent': %d" %
-                         bitspercomponent)
-    nbytes = colors * columns * bitspercomponent // 8
-    buf = b''
-    line0 = b'\x00' * columns
-    for i in range(0, len(data), nbytes + 1):
-        ft = data[i]
-        i += 1
-        line1 = data[i:i + nbytes]
-        line2 = b''
-        if ft == 0:
-            # PNG none
-            line2 += line1
-        elif ft == 1:
-            # PNG sub (UNTESTED)
-            c = 0
-            for b in line1:
-                c = (c + b) & 255
-                line2 += bytes((c,))
-        elif ft == 2:
-            # PNG up
-            for (a, b) in zip(line0, line1):
-                c = (a + b) & 255
-                line2 += bytes((c,))
-        elif ft == 3:
-            # PNG average (UNTESTED)
-            c = 0
-            for (a, b) in zip(line0, line1):
-                c = ((c + a + b) // 2) & 255
-                line2 += bytes((c,))
-        else:
-            # unsupported
-            raise ValueError("Unsupported predictor value: %d" % ft)
-        buf += line2
-        line0 = line2
-    return buf
-
-
-#  Matrix operations
-MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)
-
-
-def mult_matrix(m1, m0):
-    (a1, b1, c1, d1, e1, f1) = m1
-    (a0, b0, c0, d0, e0, f0) = m0
-    """Returns the multiplication of two matrices."""
-    return (a0 * a1 + c0 * b1, b0 * a1 + d0 * b1,
-            a0 * c1 + c0 * d1, b0 * c1 + d0 * d1,
-            a0 * e1 + c0 * f1 + e0, b0 * e1 + d0 * f1 + f0)
-
-
-def translate_matrix(m, v):
-    """Translates a matrix by (x, y)."""
-    (a, b, c, d, e, f) = m
-    (x, y) = v
-    return a, b, c, d, x * a + y * c + e, x * b + y * d + f
-
-
-def apply_matrix_pt(m, v):
-    (a, b, c, d, e, f) = m
-    (x, y) = v
-    """Applies a matrix to a point."""
-    return a * x + c * y + e, b * x + d * y + f
-
-
-def apply_matrix_norm(m, v):
-    """Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))"""
-    (a, b, c, d, e, f) = m
-    (p, q) = v
-    return a * p + c * q, b * p + d * q
-
-
-#  Utility functions
-
-def isnumber(x):
-    return isinstance(x, (int, float))
-
-
-def uniq(objs):
-    """Eliminates duplicated elements."""
-    done = set()
-    for obj in objs:
-        if obj in done:
-            continue
-        done.add(obj)
-        yield obj
-    return
-
-
-def fsplit(pred, objs):
-    """Split a list into two classes according to the predicate."""
-    t = []
-    f = []
-    for obj in objs:
-        if pred(obj):
-            t.append(obj)
-        else:
-            f.append(obj)
-    return t, f
-
-
-def drange(v0, v1, d):
-    """Returns a discrete range."""
-    return range(int(v0) // d, int(v1 + d) // d)
-
-
-def get_bound(pts):
-    """Compute a minimal rectangle that covers all the points."""
-    (x0, y0, x1, y1) = (INF, INF, -INF, -INF)
-    for (x, y) in pts:
-        x0 = min(x0, x)
-        y0 = min(y0, y)
-        x1 = max(x1, x)
-        y1 = max(y1, y)
-    return x0, y0, x1, y1
-
-
-def pick(seq, func, maxobj=None):
-    """Picks the object obj where func(obj) has the highest value."""
-    maxscore = None
-    for obj in seq:
-        score = func(obj)
-        if maxscore is None or maxscore < score:
-            (maxscore, maxobj) = (score, obj)
-    return maxobj
-
-
-def choplist(n, seq):
-    """Groups every n elements of the list."""
-    r = []
-    for x in seq:
-        r.append(x)
-        if len(r) == n:
-            yield tuple(r)
-            r = []
-    return
-
-
-def nunpack(s, default=0):
-    """Unpacks 1 to 4 or 8 byte integers (big endian)."""
-    length = len(s)
-    if not length:
-        return default
-    elif length == 1:
-        return ord(s)
-    elif length == 2:
-        return struct.unpack('>H', s)[0]
-    elif length == 3:
-        return struct.unpack('>L', b'\x00' + s)[0]
-    elif length == 4:
-        return struct.unpack('>L', s)[0]
-    elif length == 8:
-        return struct.unpack('>Q', s)[0]
-    else:
-        raise TypeError('invalid length: %d' % length)
-
-
-PDFDocEncoding = ''.join(chr(x) for x in (
-    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
-    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
-    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0017, 0x0017,
-    0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc,
-    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
-    0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
-    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
-    0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
-    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
-    0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
-    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
-    0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
-    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
-    0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
-    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
-    0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000,
-    0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044,
-    0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
-    0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160,
-    0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000,
-    0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
-    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af,
-    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
-    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
-    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
-    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
-    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
-    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
-    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
-    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
-    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
-    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
-))
-
-
-def decode_text(s):
-    """Decodes a PDFDocEncoding string to Unicode."""
-    if s.startswith(b'\xfe\xff'):
-        return str(s[2:], 'utf-16be', 'ignore')
-    else:
-        return ''.join(PDFDocEncoding[c] for c in s)
-
-
-def enc(x):
-    """Encodes a string for SGML/XML/HTML"""
-    if isinstance(x, bytes):
-        return ''
-    return escape(x)
-
-
-def bbox2str(bbox):
-    (x0, y0, x1, y1) = bbox
-    return '{:.3f},{:.3f},{:.3f},{:.3f}'.format(x0, y0, x1, y1)
-
-
-def matrix2str(m):
-    (a, b, c, d, e, f) = m
-    return '[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]'\
-        .format(a, b, c, d, e, f)
-
-
-def vecBetweenBoxes(obj1, obj2):
-    """A distance function between two TextBoxes.
-
-    Consider the bounding rectangle for obj1 and obj2.
-    Return vector between 2 boxes boundaries if they don't overlap, otherwise
-    returns vector betweeen boxes centers
-
-             +------+..........+ (x1, y1)
-             | obj1 |          :
-             +------+www+------+
-             :          | obj2 |
-    (x0, y0) +..........+------+
-    """
-    (x0, y0) = (min(obj1.x0, obj2.x0), min(obj1.y0, obj2.y0))
-    (x1, y1) = (max(obj1.x1, obj2.x1), max(obj1.y1, obj2.y1))
-    (ow, oh) = (x1 - x0, y1 - y0)
-    (iw, ih) = (ow - obj1.width - obj2.width, oh - obj1.height - obj2.height)
-    if iw < 0 and ih < 0:
-        # if one is inside another we compute euclidean distance
-        (xc1, yc1) = ((obj1.x0 + obj1.x1) / 2, (obj1.y0 + obj1.y1) / 2)
-        (xc2, yc2) = ((obj2.x0 + obj2.x1) / 2, (obj2.y0 + obj2.y1) / 2)
-        return xc1 - xc2, yc1 - yc2
-    else:
-        return max(0, iw), max(0, ih)
-
-
-class Plane:
-    """A set-like data structure for objects placed on a plane.
-
-    Can efficiently find objects in a certain rectangular area.
-    It maintains two parallel lists of objects, each of
-    which is sorted by its x or y coordinate.
-    """
-
-    def __init__(self, bbox, gridsize=50):
-        self._seq = []  # preserve the object order.
-        self._objs = set()
-        self._grid = {}
-        self.gridsize = gridsize
-        (self.x0, self.y0, self.x1, self.y1) = bbox
-
-    def __repr__(self):
-        return '<Plane objs=%r>' % list(self)
-
-    def __iter__(self):
-        return (obj for obj in self._seq if obj in self._objs)
-
-    def __len__(self):
-        return len(self._objs)
-
-    def __contains__(self, obj):
-        return obj in self._objs
-
-    def _getrange(self, bbox):
-        (x0, y0, x1, y1) = bbox
-        if x1 <= self.x0 or self.x1 <= x0 or y1 <= self.y0 or self.y1 <= y0:
-            return
-        x0 = max(self.x0, x0)
-        y0 = max(self.y0, y0)
-        x1 = min(self.x1, x1)
-        y1 = min(self.y1, y1)
-        for grid_y in drange(y0, y1, self.gridsize):
-            for grid_x in drange(x0, x1, self.gridsize):
-                yield (grid_x, grid_y)
-
-    def extend(self, objs):
-        for obj in objs:
-            self.add(obj)
-
-    def add(self, obj):
-        """place an object."""
-        for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
-            if k not in self._grid:
-                r = []
-                self._grid[k] = r
-            else:
-                r = self._grid[k]
-            r.append(obj)
-        self._seq.append(obj)
-        self._objs.add(obj)
-
-    def remove(self, obj):
-        """displace an object."""
-        for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
-            try:
-                self._grid[k].remove(obj)
-            except (KeyError, ValueError):
-                pass
-        self._objs.remove(obj)
-
-    def find(self, bbox):
-        """finds objects that are in a certain area."""
-        (x0, y0, x1, y1) = bbox
-        done = set()
-        for k in self._getrange(bbox):
-            if k not in self._grid:
-                continue
-            for obj in self._grid[k]:
-                if obj in done:
-                    continue
-                done.add(obj)
-                if obj.x1 <= x0 or x1 <= obj.x0 or obj.y1 <= y0 \
-                        or y1 <= obj.y0:
-                    continue
-                yield obj
@@ -1,177 +0,0 @@
-#!/bin/bash
-
-RED=$'\x1b[31m'
-GREEN=$'\x1b[32m'
-GREY=$'\x1b[90m'
-RESET=$'\x1b[39m'
-
-[[ $# -lt 1 ]] && {
-	echo "$0 'needle' where/ [/usr/bin/find options]"
-	echo "example: $0 's3cr3t' /mnt/share/ -size -10M ! -iname '*.wav' ! -iname '*.mp3'"
-	exit
-}
-
-function fork(){
-	needle="$1"
-	tempdir="$2"
-	ln -s "$(realpath $0)" "$tempdir/$(basename $0)"
-	( cd "$tempdir"; "./$(basename $0)" "$needle" "." "${opts[@]}"; )
-}
-
-needle="$1"
-shift
-where="$1"
-shift
-opts=("$@")
-
-find "$where" "${opts[@]}" -type f -print 2> /dev/null |
-while read path
-do
-	filename=$(basename "$path")
-	filename=${filename%\?*}
-	ext=${filename##*.}
-	[[ $filename = $ext ]] && ext=''
-	mime=$(file -bi "$path")
-	mime=${mime%' '*}
-	#echo "$path"
-	case $mime in
-		*/xml\;)
-			content=$(cat "$path")
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[xml] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		*/*html*)
-			codepage=$(uchardet "$path")
-			content=$(cat "$path" | iconv -f $codepage | lynx -nolist -dump -stdin)
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[html] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		text/*|*/*script\;)
-			content=$(cat "$path")
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[text] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		application/msword\;)
-			content=$(catdoc "$path")
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[doc] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		application/vnd.openxmlformats-officedocument.wordprocessingml.document\;)
-			content=$(unzip -p "$path" | grep -a '<w:r' | sed 's/<w:p[^<\/]*>/ /g' | sed 's/<[^<]*>//g' | grep -a -v '^[[:space:]]*$' | sed G)
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[docx] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		application/vnd.ms-excel\;)
-			content=$(xls2csv -x "$path")
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[xls] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\;)
-			content=$(unzip -p "$path" | grep -a -e '<si><t>' -e '<vt:lpstr>' | sed 's/<[^<\/]*>/ /g' | sed 's/<[^<]*>//g')
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[xlsx] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		application/pdf\;)
-			content=$(pdf2txt -t text "$path" 2> /dev/null)
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[pdf] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		application/x-executable\;|application/x*dos*)
-			content=$(rabin2 -z "$path" 2> /dev/null)
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[exe] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		application/x-object\;|application/x-sharedlib|application/x-executable\;)
-			content=$(rabin2 -z "$path" 2> /dev/null)
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[elf] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		application/*compressed*|application/*zip*|application/*rar*|application/*tar*|application/*gzip*)
-			content=$(7z l "$path" | tail -n +13)
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[archive] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			temp=$(tempfile)
-			rm $temp && mkdir -p "$temp/$path"
-			7z x "$path" -o"$temp/$path" 1> /dev/null 2> /dev/null
-			fork "$needle" "$temp"
-			rm -r "$temp"
-			#break
-			;;
-		image/*)
-			content=$(identify -verbose "$path" 2> /dev/null)
-			#content=$(tesseract "$path" stdout -l eng; tesseract "$path" stdout -l rus)
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[img] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		message/*)
-			content=$(mu view "$path")
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[message] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			temp=$(tempfile)
-			rm $temp && mkdir -p "$temp/$path"
-			cp "$path" "$temp/$path/"
-			munpack -t -f -C "$(realpath $temp/$path)" "$(basename $path)" > /dev/null
-			rm "$temp/$path/$(basename $path)"
-			fork "$needle" "$temp"
-			rm -r "$temp"
-			#break
-			;;
-		application/octet-stream\;)
-			#content=$(strings "$path")
-			#if echo "$content"|grep -q -ai "$needle"; then
-			#	echo $GREEN "[raw] $path" $RESET
-			#	echo "$content"|grep -ai "$needle" --color=auto
-			#fi
-			false
-			;;
-		application/x-raw-disk-image\;)
-			content=$(binwalk "$path")
-			if echo "$content"|grep -q -ai "$needle"; then
-				echo $GREEN "[disk] $path" $RESET
-				echo "$content"|grep -ai "$needle" --color=auto
-			fi
-			;;
-		*)
-			file "$path" | grep -q text &&
-			{
-				content=$(cat "$path")
-				if echo "$content"|grep -q -ai "$needle"; then
-					echo $GREEN "[unknown] $path" $RESET
-					echo "$content"|grep -ai "$needle" --color=auto
-				fi
-			} || {
-				content=$(strings "$path")
-				if echo "$content"|grep -q -ai "$needle"; then
-					echo $GREEN "[unknown] $path" $RESET
-					echo "$content"|grep -ai "$needle" --color=auto
-				fi
-			}
-			;;
-	esac
-done
@@ -1,32 +0,0 @@
-#!/bin/bash
-
-GREEN=$'\x1b[32m'
-RESET=$'\x1b[39m'
-
-LIMIT=10
-OFFSET=1
-
-while getopts "c:o:" opt
-do
-	case $opt in
-		c) LIMIT=$OPTARG;;
-		o) OFFSET=$OPTARG;;
-esac
-done
-
-[[ $(($#-$OPTIND)) -lt 1 ]] && {
-	echo $0 [opts] words.db QUERY
-	echo "opts:"
-	echo "  -c count"
-	echo "  -o offset"
-	exit
-}
-
-DB="${@:$OPTIND:1}"
-shift $OPTIND
-echo $GREEN
-#echo "SELECT uri FROM words WHERE text MATCH '$*' limit $LIMIT offset $OFFSET;" | sqlite3 "$DB"
-echo "SELECT uri FROM words WHERE text LIKE '%$*%' limit $LIMIT offset $OFFSET;" | sqlite3 "$DB"
-echo $RESET
-#echo "SELECT text FROM words WHERE text MATCH '$*' limit $LIMIT offset $OFFSET;" | sqlite3 "$DB" | grep -i -o -P ".{0,100}$*..{0,100}" | grep -i --color=auto "$*"
-echo "SELECT text FROM words WHERE text LIKE '%$*%' limit $LIMIT offset $OFFSET;" | sqlite3 "$DB" | grep -i -o -P ".{0,100}$*..{0,100}" | grep -i --color=auto "$*"
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-USERAGENT="Mozilla"
-IGNORE_EXT="gif,GIF,jpg,JPG,png,PNG,ico,ICO,svg,SVG,woff,ttf,eot"
-
-[ $# -lt 1 ] && {
-	echo "$0 url [/usr/bin/wget options]"
-	echo "example: $0 --level 5 --wait 2 --domains www.site.com --quota=10000000 -A html,php -R pdf,jpg -X uploads --no-parent http://site.com/path/to"
-	exit
-}
-
-function crawl(){
-	wget --no-check-certificate --recursive --spider -e robots=off -U $USERAGENT -O "/tmp/spider" --no-verbose $* 2>&1 | sed -rn 's|.*URL:[ ]*([^ ]+).*|\1|p'
-}
-
-function save(){
-	wget --no-check-certificate --recursive -N -e robots=off -U $USERAGENT --no-verbose -R "$IGNORE_EXT" $* 2>&1 | sed -rn 's|.*URL:[ ]*([^ ]+).*|\1|p'
-}
-
-#crawl $*
-save $*
@@ -0,0 +1,194 @@
+#!/usr/bin/python3
+import csv
+import json
+from hashlib import md5
+from opensearchpy import OpenSearch
+from os import path
+from datetime import datetime
+from colorama import Fore
+import argparse
+
+
+CREDS = ('admin', 'admin')
+parser = argparse.ArgumentParser( description='search machine control tool' )
+parser.add_argument("opensearch", type=str, default="localhost:9200", help="opensearch address (localhost:9200)")
+parser.add_argument("-i", "--index", type=str, metavar="index", default="", help="index where to search")
+parser.add_argument("-o", "--offset", type=int, metavar="offset", default=0, help="offset results in query")
+parser.add_argument("-c", "--count", type=int, metavar="count", default=10, help="count results in query")
+parser.add_argument("-init", action="store_true", help="init index")
+parser.add_argument("-drop", action="store_true", help="drop index")
+parser.add_argument("-import", dest="file_import", metavar="input.csv", help="import data")
+parser.add_argument("-delete", dest="file_delete", metavar="input.csv", help="delete data")
+parser.add_argument("-query", metavar="query", help="search query")
+parser.add_argument("-cache", metavar="cache", help="get cache of a document")
+args = parser.parse_args()
+
+host,port = args.opensearch.split(":")
+client = OpenSearch(
+  hosts = [{'host': host, 'port': int(port)}],
+  http_compress = True,
+  http_auth = CREDS,
+  use_ssl = True,
+  verify_certs = False,
+  ssl_assert_hostname = False,
+  ssl_show_warn = False
+)
+
+def indexes():
+  for index in client.indices.get("*"):
+    print(index, client.cat.count(index))
+
+def info(index):
+  print(json.dumps(client.indices.get_settings(index=index), indent=4))
+  #json.dumps(client.indices.get_mapping(index=index))
+
+def init(index):
+  SETTINGS = {
+    "mappings": {
+      "properties": {
+        "timestamp": {"type": "text"},
+        "inurl": { "type" : "text" },
+        "site": { "type" : "text" },
+        "ext": { "type" : "text" },
+        "intitle": { "type" : "text" },
+        "intext": { "type" : "text" },
+        "filetype": { "type" : "text" }
+      }
+    },
+    "settings": {
+      "analysis": {
+        "analyzer": {
+          "russian": {
+            "type": "custom",
+            "tokenizer": "standard",
+            "filter": ["lowercase", "russian_stop"],
+          },
+          "autocomplete": {
+            "type": "custom",
+            "tokenizer": "standard",
+            "filter": ["lowercase", "russian_stop", "autocomplete_filter"]
+          }
+        },
+        "filter": {
+          "russian_stop": {
+            "type": "stop",
+            "stopwords": "_russian_"
+          },
+          "autocomplete_filter": {
+            "type": "edge_ngram",
+            "min_gram": 1,
+            "max_gram": 20
+          }
+        }
+      }
+    }
+  }
+
+  response = client.indices.create(index, body=SETTINGS)
+  print(response)
+
+def add(index, source):
+  csv.field_size_limit(2**32)
+  reader = csv.reader(open(source, errors="surrogateescape"), delimiter=',', quotechar='"')
+  for row in reader:
+    try:
+      timestamp,filepath,ext,filetype,content,*_ = row
+
+      document = {
+        "timestamp": datetime.fromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S'),
+        "inurl": filepath,
+        "site": path.splitext(path.basename(source))[0],
+        "ext": ext,
+        "intitle": "",
+        "intext": content,
+        "filetype": filetype
+      }
+
+      response = client.index(
+          index = index,
+          id = md5(filepath.encode()).hexdigest(),
+          body = document,
+          refresh = True
+      )
+      #print(response)
+    except Exception as e:
+      print(str(e))
+
+def query(index, text):
+  query = {
+    "size": args.count,
+    "from": args.offset,
+    "query": {
+      "query_string": {
+        "query": text,
+        "fields": ["inurl^100","intitle^50","intext^5"],
+        "default_operator": "AND",
+        "fuzziness": "AUTO",
+        "analyzer": "russian"
+      }
+    },
+    "highlight": {
+      "order": "score",
+      "fields": {
+        "*": {
+          "pre_tags" : [ Fore.RED ],
+          "post_tags" : [ Fore.RESET ],
+          "fragment_size": 50,
+          "number_of_fragments": 3
+        }
+      }
+    }
+  }
+
+  response = client.search(
+      index = index,
+      body = query
+  )
+  for result in response['hits']['hits']:
+      print("{G}{uri} {B}{cache}{R}".format(
+        uri=result['highlight']['inurl'][0] if result['highlight'].get('inurl') else result['_source']['inurl'],
+        cache=result['_id'],
+        G=Fore.GREEN, B=Fore.LIGHTBLACK_EX, R=Fore.RESET))
+      print(" ... ".join(result['highlight'].get('intext',[])))
+
+def cache(index, _id):
+  result = client.get(index='test',id=_id)
+  print(result["_source"]["intext"])
+
+def delete(index, source):
+  csv.field_size_limit(2**32)
+  reader = csv.reader(open(source, errors="surrogateescape"), delimiter=',', quotechar='"')
+  for row in reader:
+    try:
+      timestamp,filepath,ext,filetype,content,*_ = row
+      response = client.delete(
+        index = index,
+        id = md5(filepath.encode()).hexdigest(),
+      )
+      print(response)
+    except Exception as e:
+      print(str(e))
+
+def drop(index):
+  response = client.indices.delete(
+      index = index
+  )
+  print(response)
+
+if args.init:
+  init(index=args.index)
+elif args.drop:
+  drop(index=args.index)
+elif args.file_import:
+  add(index=args.index, source=args.file_import)
+elif args.file_delete:
+  delete(index=args.index, source=args.file_delete)
+elif args.query:
+  query(index=args.index, text=args.query)
+elif args.cache:
+  cache(index=args.index, _id=args.cache)
+else:
+  if args.index:
+    info(index=args.index)
+  else:
+    indexes()
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+GREEN=$'\x1b[32m'
+RESET=$'\x1b[39m'
+
+MATCH=50
+LIMIT=10
+OFFSET=0
+URI='%'
+
+while getopts "m:c:o:u:" opt
+do
+	case $opt in
+		m) MATCH=$OPTARG;;
+		c) LIMIT=$OPTARG;;
+		o) OFFSET=$OPTARG;;
+		u) URI=$OPTARG;;
+esac
+done
+
+[[ $(($#-$OPTIND)) -lt 1 ]] && [[ $URI = '%' ]] && {
+	echo $0 [opts] words.db QUERY
+	echo "opts:"
+	echo "  -m match"
+	echo "  -c count"
+	echo "  -o offset"
+	echo "  -u fragment"
+	exit
+}
+
+DB="${@:$OPTIND:1}"
+shift $OPTIND
+IFS='=%='
+echo "SELECT uri,text FROM words WHERE uri LIKE '$URI' and text LIKE '%$*%' limit $LIMIT offset $OFFSET;" | sqlite3 -separator '=%=' "$DB" | while read uri text
+do
+	echo $GREEN"$uri"$RESET
+	echo "$text" | grep -i -o -P ".{0,$MATCH}$*..{0,$MATCH}" | grep -i --color=auto "$*"
+done
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+USERAGENT="Mozilla"
+IGNORE_EXT="gif,GIF,jpg,JPG,png,PNG,ico,ICO,svg,SVG,woff,ttf,eot"
+
+[ $# -lt 1 ] && {
+	echo "$0 url [/usr/bin/wget options]"
+	echo "example: $0 --level 5 --wait 2 --domains www.site.com --limit-size=10000000 -A html,php -R pdf,jpg -X uploads --no-parent http://site.com/path/to"
+	echo "example: $0 --level 2 --wait 1 --limit-size=500k ftp://target.com/"
+	exit
+}
+
+function crawl(){
+	$(dirname "$0")/bin/wget --no-check-certificate --recursive --spider -e robots=off -U $USERAGENT -O "/tmp/spider" --no-verbose $* 2>&1 | sed -rn 's|.*URL:[ ]*([^ ]+).*|\1|p'
+}
+
+function save(){
+	$(dirname "$0")/bin/wget --no-check-certificate --recursive -N -e robots=off -U $USERAGENT --no-verbose -R "$IGNORE_EXT" $* 2>&1 | sed -rn 's|.*URL:[ ]*([^ ]+).*|\1|p'
+}
+
+#crawl $*
+save $*
+
+#https://yurichev.com/wget.html
@@ -1,7 +0,0 @@
-cd c:\path\to\crawl\windows
-.\crawl.ps1 ..\path\to > out.log
-.\grep.ps1 ..\path\to s3cr3t
-
-cme smb -d dom -u adm -p pas -X '.\grep.ps1 c:\users s3cr3t > c:\grep.log' targets.txt
-sleep 3600
-cme smb -d dom -u adm -p pas -x 'type c:\grep.log' targets.txt
@@ -1,66 +0,0 @@
-echo "begin $PID"
-$ErrorActionPreference = 'SilentlyContinue'
-$TIMEOUT=15
-$haystack = $args[0]
-$needle = $args[1]
-$files = 0
-$exts = @()
-$exts += @("*.doc","*.docx")
-$exts += @("*.xls","*.xlsx")
-$exts += @("*.pdf")
-$exts += @("*.zip")
-$exts += @("*.txt","*.bat","*.vbs","*.ps1","*.reg","*.cfg","*.conf","*.xml","*.log")
-#$exts += @("*.exe","*.dll")
-$opts = @{
-  "Path" = $haystack
-  "Recurse" = $true
-  "Include" = $exts
-}
-
-Get-ChildItem @opts 2> $null | % {
-  if((Get-Item $_.FullName) -isnot [System.IO.DirectoryInfo]) {
-    $files += 1
-  }
-}
-$i = 1
-Get-ChildItem @opts 2> $null | % {
-  if((Get-Item $_.FullName) -isnot [System.IO.DirectoryInfo]) {
-    $file = @{}
-    $file.name = $_.Name
-    $file.path = $_.FullName
-    $file.ext = $_.Extension
-    $file.content = ""
-    #echo "[*] $($file.path)"
-    $job = $null
-    switch -regex ($file.ext) {
-      '.txt|.bat|.vbs|.ps1|.reg|.cfg|.conf|.xml' { $job = Start-Job -FilePath .\lib\plaintext.ps1 -argumentlist $file.path }
-      '.doc*' { $job = Start-Job -FilePath .\lib\word.ps1 -argumentlist $file.path }
-      '.xls*' { $job = Start-Job -FilePath .\lib\excel.ps1 -argumentlist $file.path }
-      '.pdf' { $job = Start-Job -FilePath .\lib\pdf.ps1 -argumentlist $file.path -Init ([ScriptBlock]::Create("Set-Location '$pwd'")) }
-      '.zip|.7z|.tar|.gz|.gzip|.gz' { $job = Start-Job -FilePath .\lib\archive.ps1 -argumentlist $file.path,"grep.ps1",$needle -Init ([ScriptBlock]::Create("Set-Location '$pwd'")) }
-      '.exe|.dll' { $job = Start-Job -FilePath .\lib\executable.ps1 -argumentlist $file.path -Init ([ScriptBlock]::Create("Set-Location '$pwd'")) }
-    }
-    if($job)
-    {
-      Wait-Job -timeout $TIMEOUT $job > $null
-      $file.content = Receive-Job $job
-      #echo $file.content
-      Stop-Job $job
-      Remove-Job $job
-    }
-    if(echo $file.content | select-string $needle) {
-      Write-Output "[+] [$i/$files] $($file.path)"
-      echo $file.content | select-string -Pattern $needle
-      #Write-Host -ForegroundColor green (echo $file.content | select-string -Pattern $needle)
-      #highlight(echo $file.content | select-string $needle)
-    }
-    elseif($file.content -eq 0) {
-      echo "[!] [$i/$files] $($file.path)"
-    }
-    elseif($i % 1 -eq 0) {
-      echo "[*] [$i/$files] $($file.path)"
-    }
-    $i += 1
-  }
-}
-echo 'done'
--- a/Show More
+++ b/Show More