mirror of
https://github.com/scummvm/scummvm-web.git
synced 2026-05-21 05:40:47 +00:00
306 lines
11 KiB
PHP
306 lines
11 KiB
PHP
<?php
|
|
namespace ScummVM;
|
|
|
|
/**
|
|
* An XML parser that will build a multidimensional array (aka a tree) from the
|
|
* given XML data, either by filename or by data. The parser is namespace aware,
|
|
* and will add anything found in a namespace as data to the current open
|
|
* element.
|
|
*
|
|
* @access public
|
|
* @author Fredrik Wendel
|
|
* @version 1.0
|
|
*/
|
|
class XMLParser
|
|
{
|
|
const FILE_NOT_FOUND = 'The filename specified doesn\'t point to an exiting file.';
|
|
const FILE_NOT_READABLE = 'Unable to read the contents of the file.';
|
|
const DATA_NOT_XML = 'The data provided is not XML.';
|
|
const PARSER_ERROR = 'Error parsing XML.';
|
|
|
|
const NS_HTML4 = 'http://www.w3.org/TR/html4/';
|
|
const NS_XHTML = 'http://www.w3.org/TR/xhtml1/';
|
|
|
|
private static $empty_elements = array('br', 'hr', 'img');
|
|
private $tree;
|
|
private $data;
|
|
private $ptr;
|
|
|
|
/**
|
|
* Constructor.
|
|
*
|
|
* @access public
|
|
* @since 1.0
|
|
*/
|
|
public function __construct()
|
|
{
|
|
$this->tree = array();
|
|
$this->data = null;
|
|
$this->ptr = null;
|
|
}
|
|
|
|
/**
|
|
* Parse XML by filename, will read the XML data from a file and then parse
|
|
* it. Returns a multidimensional array (aka tree). Optionally it will also
|
|
* clear the tree and remove nested single arrays and link the values to the
|
|
* parent directly instead.
|
|
*
|
|
* @param string $filename full path the XML file to parse
|
|
* @return bool|array
|
|
* @access public
|
|
* @since 1.0
|
|
* @throws \ErrorException
|
|
*/
|
|
public function parseByFilename($filename)
|
|
{
|
|
global $lang;
|
|
|
|
if ($lang != DEFAULT_LOCALE) {
|
|
$fname = substr($filename, 0, strrpos($filename, '.'));
|
|
$fext = strrchr($filename, '.');
|
|
|
|
$localized = $fname . "." . $lang . $fext;
|
|
|
|
if (is_file($localized)) {
|
|
if (!is_readable($localized)) {
|
|
$file = "\n\nFilename: " . basename($localized) . "\n";
|
|
throw new \ErrorException(self::FILE_NOT_FOUND . $file);
|
|
} else {
|
|
$filename = $localized;
|
|
}
|
|
}
|
|
}
|
|
|
|
$file = "\n\nFilename: " . basename($filename) . "\n";
|
|
/* If we can't read the file there is nothing we can do. */
|
|
if (!is_file($filename) || !is_readable($filename)) {
|
|
throw new \ErrorException(self::FILE_NOT_FOUND . $file);
|
|
}
|
|
/* Read the file contents. */
|
|
if (!($xml = @file_get_contents($filename))) {
|
|
throw new \ErrorException(self::FILE_NOT_READABLE . $file);
|
|
}
|
|
|
|
/* Parse the XML. */
|
|
try {
|
|
return $this->parseByData($xml);
|
|
} catch (\ErrorException $e) {
|
|
$msg = "{$e->getMessage()}{$file}";
|
|
throw new \ErrorException($msg);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parses the XML data and returns a multidimensional array (aka tree).
|
|
* Optionally it will also clear the tree and remove nested single arrays
|
|
* and link the values to the parent directly instead.
|
|
*
|
|
* @param string $xml the XML to parse
|
|
* @return bool|array
|
|
* @access public
|
|
* @since 1.0
|
|
* @throws \ErrorException
|
|
*/
|
|
public function parseByData($xml)
|
|
{
|
|
if (!is_string($xml) || strlen($xml) == 0) {
|
|
throw new \ErrorException(self::DATA_NOT_XML);
|
|
}
|
|
/* Create a parser and set the options */
|
|
$parser = xml_parser_create_ns();
|
|
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
|
|
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
|
|
xml_set_object($parser, $this);
|
|
xml_set_element_handler($parser, 'startElement', 'endElement');
|
|
xml_set_character_data_handler($parser, 'getElement');
|
|
|
|
/**
|
|
* Workaround the XML-parser not being able to handle HTML-entities by
|
|
* encapsulating them as CDATA.
|
|
*/
|
|
$pattern = '/(&(?:(?!quot|amp|apos|lt|gt)([a-z]+)|(#\d+));)/iU';
|
|
$replace = '<![CDATA[\\1]]>';
|
|
$xml = preg_replace($pattern, $replace, $xml);
|
|
/* Parse the data and free the parser resource. */
|
|
if (!xml_parse($parser, $xml, true)) {
|
|
$error = "\n\nError code: " . xml_get_error_code($parser) . "\n";
|
|
$error .= "Line: " . xml_get_current_line_number($parser)
|
|
. ", character: " . xml_get_current_column_number($parser) . "\n";
|
|
$error .= "Error message: " . xml_error_string(xml_get_error_code($parser)) . "\n";
|
|
xml_parser_free($parser);
|
|
throw new \ErrorException(self::PARSER_ERROR . $error);
|
|
}
|
|
xml_parser_free($parser);
|
|
/**
|
|
* The root element will contain an array with an empty key, so we can
|
|
* skip that one right now.
|
|
*/
|
|
$tree = $this->tree[''];
|
|
$this->simplifyArray($tree);
|
|
return $tree;
|
|
}
|
|
|
|
/**
|
|
* Handles new tags opening in the XML-document.
|
|
*
|
|
* @param resource $parser XML parser resource
|
|
* @param string $name name of the tag
|
|
* @param array $attrs list of all attributes for the tag (if any)
|
|
* @access private
|
|
* @since 1.0
|
|
*/
|
|
private function startElement($parser, $name, $attrs)
|
|
{
|
|
/* If we find a colon in the name, we need to check the namespace. */
|
|
if (strpos($name, ':') !== false) {
|
|
$namespace = implode(':', explode(':', $name, -1));
|
|
/* Got (X)HTML data. */
|
|
if (in_array($namespace, array(self::NS_HTML4, self::NS_XHTML))) {
|
|
$pos = strrpos($name, ':');
|
|
$namespace = substr($name, 0, $pos);
|
|
$name = substr($name, ($pos+1));
|
|
$data = "<{$name}";
|
|
foreach ($attrs as $key => $value) {
|
|
$data .= " {$key}=\"{$value}\"";
|
|
}
|
|
/* Handle HTML "empty" elements (ie: <br>, <hr>) properly. */
|
|
if ($namespace == self::NS_XHTML && in_array($name, self::$empty_elements)) {
|
|
$data .= " /";
|
|
}
|
|
$data .= ">";
|
|
$this->getElement($parser, $data);
|
|
}
|
|
/* If not we can just rock on. */
|
|
} else {
|
|
if (!is_array($attrs) || (is_array($attrs) && count($attrs) == 0)) {
|
|
$element = null;
|
|
} else {
|
|
$element = $attrs;
|
|
/*$element = array();
|
|
foreach ($attrs as $key => $value) {
|
|
$element[$key] = $value;
|
|
}*/
|
|
}
|
|
|
|
/* Get the key for the last node in the tree. */
|
|
end($this->tree);
|
|
$key = key($this->tree);
|
|
|
|
/* Store the position so we can add the data later. */
|
|
$this->ptr = &$this->tree[$key][$name];
|
|
/* Store a reference the attributes. */
|
|
if ($element != null) {
|
|
$this->ptr['@attributes'] = &$element;
|
|
} else {
|
|
/**
|
|
* For one reason or another that escapes me, we must do this,
|
|
* or the tree won't be properly built. We will work against it
|
|
* in endElement() by overwriting the empty elements created
|
|
* here.
|
|
*/
|
|
$this->ptr[] = &$element;
|
|
}
|
|
/**
|
|
* Store the reference directly in the tree until this node (and
|
|
* it's children) are done. Will get removed in endElement().
|
|
*/
|
|
$this->tree[$name] = &$element;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Handles data between tags in the XML-document.
|
|
*
|
|
* @param resource $parser XML parser resource
|
|
* @param mixed $data data found between tags
|
|
* @access private
|
|
* @since 1.0
|
|
*/
|
|
private function getElement($parser, $data)
|
|
{
|
|
$this->data .= $data;
|
|
}
|
|
|
|
/**
|
|
* Handles tags closing in the XML-document.
|
|
*
|
|
* @param resource $parser XML parser resource
|
|
* @param string $name name of the tag
|
|
* @access private
|
|
* @since 1.0
|
|
*/
|
|
private function endElement($parser, $name)
|
|
{
|
|
/* If we find a colon in the name, we need to check the namespace. */
|
|
if (strpos($name, ':') !== false) {
|
|
$namespace = implode(':', explode(':', $name, -1));
|
|
/* Got (X)HTML data. */
|
|
if (in_array($namespace, array(self::NS_HTML4, self::NS_XHTML))) {
|
|
$pos = strrpos($name, ':');
|
|
$namespace = substr($name, 0, $pos);
|
|
$name = substr($name, ($pos+1));
|
|
/* Handle HTML "empty" elements (ie: <br>, <hr>) properly. */
|
|
if (!in_array($name, self::$empty_elements)) {
|
|
$this->getElement($parser, "</{$name}>");
|
|
}
|
|
}
|
|
/* Otherwise we can just add the data. */
|
|
} else {
|
|
$data = trim($this->data);
|
|
if (!empty($data)) {
|
|
/* If we got an empty element in the array, overwrite it. */
|
|
$pos = count($this->ptr);
|
|
if (is_null($this->ptr[($pos-1)])) {
|
|
$pos--;
|
|
}
|
|
$this->ptr[$pos] = $data;
|
|
}
|
|
|
|
/* Reset the internal data holder. */
|
|
$this->data = null;
|
|
/* Remove the reference. */
|
|
$pop = array_pop($this->tree);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Removes unnecessary arrays in an array tree, if an array contains an
|
|
* array with just one element in it, the middle array will be removed.
|
|
* It won't touch 'attributes' keys though.
|
|
*
|
|
* @param array $array reference to the array tree
|
|
* @param string $parent name of the parent
|
|
* @param boolean $all_singles remove named single arrays or just 0 ones
|
|
* @access private
|
|
* @since 1.0
|
|
*/
|
|
private function simplifyArray(&$array, $parent = '', $all_singles = false)
|
|
{
|
|
if (is_array($array) && count($array) > 0) {
|
|
foreach ((array)$array as $key => $value) {
|
|
if (is_array($value) && $key !== '@attributes') {
|
|
$this->simplifyArray($array[$key], $key);
|
|
}
|
|
if (count($array) == 1) {
|
|
if (array_key_exists(0, $array)) {
|
|
// HACK: The compatibility page assumes that the entry
|
|
// 'game' is always an array. In case only one 'game'
|
|
// tag is specified in a 'games' tag this would result
|
|
// in the array of 'game' being simplified (i.e.
|
|
// replaced by its contents). This breaks the
|
|
// compatibility page. We work around this issue by
|
|
// simply not simplifying arrays when the parent is
|
|
// named 'game'.
|
|
if ($parent !== 'game') {
|
|
$array = $array[0];
|
|
}
|
|
} else {
|
|
$keys = array_keys($array);
|
|
$this->simplifyArray($array[$keys[0]], $keys[0]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|