tree = array();
$this->data = null;
$this->ptr = null;
}
/**
* Parse XML by filename, will read the XML data from a file and then parse
* it. Returns a multidimensional array (aka tree). Optionally it will also
* clear the tree and remove nested single arrays and link the values to the
* parent directly instead.
*
* @param string $filename full path the XML file to parse
* @return bool|array
* @access public
* @since 1.0
* @throws \ErrorException
*/
public function parseByFilename($filename)
{
global $lang;
if ($lang != DEFAULT_LOCALE) {
$fname = substr($filename, 0, strrpos($filename, '.'));
$fext = strrchr($filename, '.');
$localized = $fname . "." . $lang . $fext;
if (is_file($localized)) {
if (!is_readable($localized)) {
$file = "\n\nFilename: " . basename($localized) . "\n";
throw new \ErrorException(self::FILE_NOT_FOUND . $file);
} else {
$filename = $localized;
}
}
}
$file = "\n\nFilename: " . basename($filename) . "\n";
/* If we can't read the file there is nothing we can do. */
if (!is_file($filename) || !is_readable($filename)) {
throw new \ErrorException(self::FILE_NOT_FOUND . $file);
}
/* Read the file contents. */
if (!($xml = @file_get_contents($filename))) {
throw new \ErrorException(self::FILE_NOT_READABLE . $file);
}
/* Parse the XML. */
try {
return $this->parseByData($xml);
} catch (\ErrorException $e) {
$msg = "{$e->getMessage()}{$file}";
throw new \ErrorException($msg);
}
}
/**
* Parses the XML data and returns a multidimensional array (aka tree).
* Optionally it will also clear the tree and remove nested single arrays
* and link the values to the parent directly instead.
*
* @param string $xml the XML to parse
* @return bool|array
* @access public
* @since 1.0
* @throws \ErrorException
*/
public function parseByData($xml)
{
if (!is_string($xml) || strlen($xml) == 0) {
throw new \ErrorException(self::DATA_NOT_XML);
}
/* Create a parser and set the options */
$parser = xml_parser_create_ns();
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
xml_set_object($parser, $this);
xml_set_element_handler($parser, 'startElement', 'endElement');
xml_set_character_data_handler($parser, 'getElement');
/**
* Workaround the XML-parser not being able to handle HTML-entities by
* encapsulating them as CDATA.
*/
$pattern = '/(&(?:(?!quot|amp|apos|lt|gt)([a-z]+)|(#\d+));)/iU';
$replace = '';
$xml = preg_replace($pattern, $replace, $xml);
/* Parse the data and free the parser resource. */
if (!xml_parse($parser, $xml, true)) {
$error = "\n\nError code: " . xml_get_error_code($parser) . "\n";
$error .= "Line: " . xml_get_current_line_number($parser)
. ", character: " . xml_get_current_column_number($parser) . "\n";
$error .= "Error message: " . xml_error_string(xml_get_error_code($parser)) . "\n";
xml_parser_free($parser);
throw new \ErrorException(self::PARSER_ERROR . $error);
}
xml_parser_free($parser);
/**
* The root element will contain an array with an empty key, so we can
* skip that one right now.
*/
$tree = $this->tree[''];
$this->simplifyArray($tree);
return $tree;
}
/**
* Handles new tags opening in the XML-document.
*
* @param resource $parser XML parser resource
* @param string $name name of the tag
* @param array $attrs list of all attributes for the tag (if any)
* @access private
* @since 1.0
*/
private function startElement($parser, $name, $attrs)
{
/* If we find a colon in the name, we need to check the namespace. */
if (strpos($name, ':') !== false) {
$namespace = implode(':', explode(':', $name, -1));
/* Got (X)HTML data. */
if (in_array($namespace, array(self::NS_HTML4, self::NS_XHTML))) {
$pos = strrpos($name, ':');
$namespace = substr($name, 0, $pos);
$name = substr($name, ($pos+1));
$data = "<{$name}";
foreach ($attrs as $key => $value) {
$data .= " {$key}=\"{$value}\"";
}
/* Handle HTML "empty" elements (ie:
,