QueryPath  2.1.1
src/QueryPath/QueryPath.php
Go to the documentation of this file.
00001 <?php
00002 /** @file
00003  * The Query Path package provides tools for manipulating a Document Object Model.
00004  * The two major DOMs are the XML DOM and the HTML DOM. Using Query Path, you can 
00005  * build, parse, search, and modify DOM documents.
00006  *
00007  * To use Query Path, this is the only file you should need to import.
00008  *
00009  * Standard usage:
00010  * @code
00011  * <?php
00012  * require 'QueryPath/QueryPath.php';
00013  * $qp = qp('#myID', '<?xml version="1.0"?><test><foo id="myID"/></test>');
00014  * $qp->append('<new><elements/></new>')->writeHTML();
00015  * ?>
00016  * @endcode
00017  *
00018  * The above would print (formatted for readability):
00019  * @code
00020  * <?xml version="1.0"?>
00021  * <test>
00022  *  <foo id="myID">
00023  *    <new>
00024  *      <element/>
00025  *    </new>
00026  *  </foo>
00027  * </test>
00028  * @endcode
00029  *
00030  * To learn about the functions available to a Query Path object, 
00031  * see {@link QueryPath}. The {@link qp()} function is used to build
00032  * new QueryPath objects. The documentation for that function explains the
00033  * wealth of arguments that the function can take.
00034  *
00035  * Included with the source code for QueryPath is a complete set of unit tests
00036  * as well as some example files. Those are good resources for learning about
00037  * how to apply QueryPath's tools. The full API documentation can be generated
00038  * from these files using PHPDocumentor.
00039  *
00040  * If you are interested in building extensions for QueryParser, see the 
00041  * {@link QueryPathExtender} class. There, you will find information on adding
00042  * your own tools to QueryPath.
00043  *
00044  * QueryPath also comes with a full CSS 3 selector parser implementation. If
00045  * you are interested in reusing that in other code, you will want to start
00046  * with {@link CssEventHandler.php}, which is the event interface for the parser.
00047  *
00048  * All of the code in QueryPath is licensed under either the LGPL or an MIT-like
00049  * license (you may choose which you prefer). All of the code is Copyright, 2009
00050  * by Matt Butcher.
00051  *
00052  * @author M Butcher <matt @aleph-null.tv>
00053  * @license http://opensource.org/licenses/lgpl-2.1.php The GNU Lesser GPL (LGPL) or an MIT-like license.
00054  * @see QueryPath
00055  * @see qp()
00056  * @see http://querypath.org The QueryPath home page.
00057  * @see http://api.querypath.org An online version of the API docs.
00058  * @see http://technosophos.com For how-tos and examples.
00059  * @copyright Copyright (c) 2009, Matt Butcher.
00060  * @version 2.1.1
00061  *
00062  */
00063 
00064 /** @addtogroup querypath_core Core API
00065  * Core classes and functions for QueryPath.
00066  *
00067  * These are the classes, objects, and functions that developers who use QueryPath
00068  * are likely to use. The qp() and htmlqp() functions are the best place to start,
00069  * while most of the frequently used methods are part of the QueryPath object.
00070  */
00071 
00072 /** @addtogroup querypath_util Utilities
00073  * Utility classes for QueryPath.
00074  *
00075  * These classes add important, but less-often used features to QueryPath. Some of
00076  * these are used transparently (QueryPathIterator). Others you can use directly in your 
00077  * code (QueryPathEntities).
00078  */
00079 
00080 /*   * @namespace QueryPath
00081  * The core classes that compose QueryPath.
00082  *
00083  * The QueryPath classes contain the brunt of the QueryPath code. If you are 
00084  * interested in working with just the CSS engine, you may want to look at CssEventHandler,
00085  * which can be used without the rest of QueryPath. If you are interested in looking 
00086  * carefully at QueryPath's implementation details, then the QueryPath class is where you 
00087  * should begin. If you are interested in writing extensions, than you may want to look at
00088  * QueryPathExtension, and also at some of the simple extensions, such as QPXML.
00089  */
00090  
00091 /**
00092  * Regular expression for checking whether a string looks like XML.
00093  * @deprecated This is no longer used in QueryPath.
00094  */
00095 define('ML_EXP','/^[^<]*(<(.|\s)+>)[^>]*$/');
00096 
00097 /**
00098  * The CssEventHandler interfaces with the CSS parser.
00099  */
00100 require_once 'CssEventHandler.php';
00101 /**
00102  * The extender is used to provide support for extensions.
00103  */
00104 require_once 'QueryPathExtension.php';
00105 
00106 /**
00107  * Build a new Query Path.
00108  * This builds a new Query Path object. The new object can be used for 
00109  * reading, search, and modifying a document.
00110  *
00111  * While it is permissible to directly create new instances of a QueryPath
00112  * implementation, it is not advised. Instead, you should use this function
00113  * as a factory.
00114  *
00115  * Example:
00116  * @code
00117  * <?php
00118  * qp(); // New empty QueryPath
00119  * qp('path/to/file.xml'); // From a file
00120  * qp('<html><head></head><body></body></html>'); // From HTML or XML
00121  * qp(QueryPath::XHTML_STUB); // From a basic HTML document.
00122  * qp(QueryPath::XHTML_STUB, 'title'); // Create one from a basic HTML doc and position it at the title element.
00123  *
00124  * // Most of the time, methods are chained directly off of this call.
00125  * qp(QueryPath::XHTML_STUB, 'body')->append('<h1>Title</h1>')->addClass('body-class');
00126  * ?>
00127  * @endcode
00128  *
00129  * This function is used internally by QueryPath. Anything that modifies the
00130  * behavior of this function may also modify the behavior of common QueryPath
00131  * methods.
00132  *
00133  * <b>Types of documents that QueryPath can support</b>
00134  *
00135  *  qp() can take any of these as its first argument:
00136  *
00137  *  - A string of XML or HTML (See {@link XHTML_STUB})
00138  *  - A path on the file system or a URL
00139  *  - A {@link DOMDocument} object
00140  *  - A {@link SimpleXMLElement} object.
00141  *  - A {@link DOMNode} object.
00142  *  - An array of {@link DOMNode} objects (generally {@link DOMElement} nodes).
00143  *  - Another {@link QueryPath} object.
00144  *
00145  * Keep in mind that most features of QueryPath operate on elements. Other 
00146  * sorts of DOMNodes might not work with all features.
00147  *
00148  * <b>Supported Options</b>
00149  *  - context: A stream context object. This is used to pass context info
00150  *    to the underlying file IO subsystem.
00151  *  - encoding: A valid character encoding, such as 'utf-8' or 'ISO-8859-1'.
00152  *    The default is system-dependant, typically UTF-8. Note that this is 
00153  *    only used when creating new documents, not when reading existing content.
00154  *    (See convert_to_encoding below.)
00155  *  - parser_flags: An OR-combined set of parser flags. The flags supported
00156  *    by the DOMDocument PHP class are all supported here.
00157  *  - omit_xml_declaration: Boolean. If this is TRUE, then certain output
00158  *    methods (like {@link QueryPath::xml()}) will omit the XML declaration
00159  *    from the beginning of a document.
00160  *  - replace_entities: Boolean. If this is TRUE, then any of the insertion
00161  *    functions (before(), append(), etc.) will replace named entities with
00162  *    their decimal equivalent, and will replace un-escaped ampersands with 
00163  *    a numeric entity equivalent.
00164  *  - ignore_parser_warnings: Boolean. If this is TRUE, then E_WARNING messages
00165  *    generated by the XML parser will not cause QueryPath to throw an exception.
00166  *    This is useful when parsing
00167  *    badly mangled HTML, or when failure to find files should not result in 
00168  *    an exception. By default, this is FALSE -- that is, parsing warnings and 
00169  *    IO warnings throw exceptions.
00170  *  - convert_to_encoding: Use the MB library to convert the document to the 
00171  *    named encoding before parsing. This is useful for old HTML (set it to
00172  *    iso-8859-1 for best results). If this is not supplied, no character set 
00173  *    conversion will be performed. See {@link mb_convert_encoding()}.
00174  *    (QueryPath 1.3 and later)
00175  *  - convert_from_encoding: If 'convert_to_encoding' is set, this option can be
00176  *    used to explicitly define what character set the source document is using.
00177  *    By default, QueryPath will allow the MB library to guess the encoding.
00178  *    (QueryPath 1.3 and later)
00179  *  - strip_low_ascii: If this is set to TRUE then markup will have all low ASCII
00180  *    characters (<32) stripped out before parsing. This is good in cases where 
00181  *    icky HTML has (illegal) low characters in the document.
00182  *  - use_parser: If 'xml', Parse the document as XML. If 'html', parse the 
00183  *    document as HTML. Note that the XML parser is very strict, while the 
00184  *    HTML parser is more lenient, but does enforce some of the DTD/Schema.
00185  *    <i>By default, QueryPath autodetects the type.</i>
00186  *  - escape_xhtml_js_css_sections: XHTML needs script and css sections to be
00187  *    escaped. Yet older readers do not handle CDATA sections, and comments do not
00188  *    work properly (for numerous reasons). By default, QueryPath's *XHTML methods
00189  *    will wrap a script body with a CDATA declaration inside of C-style comments.
00190  *    If you want to change this, you can set this option with one of the 
00191  *    JS_CSS_ESCAPE_* constants, or you can write your own.
00192  *  - QueryPath_class: (ADVANCED) Use this to set the actual classname that
00193  *    {@link qp()} loads as a QueryPath instance. It is assumed that the 
00194  *    class is either {@link QueryPath} or a subclass thereof. See the test 
00195  *    cases for an example.
00196  *
00197  * @ingroup querypath_core
00198  * @param mixed $document
00199  *  A document in one of the forms listed above.
00200  * @param string $string 
00201  *  A CSS 3 selector.
00202  * @param array $options
00203  *  An associative array of options. Currently supported options are listed above.
00204  * @return QueryPath
00205  */
00206 function qp($document = NULL, $string = NULL, $options = array()) {
00207   
00208   $qpClass = isset($options['QueryPath_class']) ? $options['QueryPath_class'] : 'QueryPath';
00209   
00210   $qp = new $qpClass($document, $string, $options);
00211   return $qp;
00212 }
00213 
00214 /**
00215  * A special-purpose version of {@link qp()} designed specifically for HTML.
00216  *
00217  * XHTML (if valid) can be easily parsed by {@link qp()} with no problems. However,
00218  * because of the way that libxml handles HTML, there are several common steps that
00219  * need to be taken to reliably parse non-XML HTML documents. This function is
00220  * a convenience tool for configuring QueryPath to parse HTML.
00221  *
00222  * The following options are automatically set unless overridden:
00223  *  - ignore_parser_warnings: TRUE
00224  *  - convert_to_encoding: ISO-8859-1 (the best for the HTML parser).
00225  *  - convert_from_encoding: auto (autodetect encoding)
00226  *  - use_parser: html
00227  *
00228  * Parser warning messages are also suppressed, so if the parser emits a warning,
00229  * the application will not be notified. This is equivalent to 
00230  * calling @code@qp()@endcode.
00231  *
00232  * Warning: Character set conversions will only work if the Multi-Byte (mb) library
00233  * is installed and enabled. This is usually enabled, but not always.
00234  *
00235  * @ingroup querypath_core
00236  * @see qp()
00237  */
00238 function htmlqp($document = NULL, $selector = NULL, $options = array()) {
00239 
00240   // Need a way to force an HTML parse instead of an XML parse when the 
00241   // doctype is XHTML, since many XHTML documents are not valid XML
00242   // (because of coding errors, not by design).
00243   
00244   $options += array(
00245     'ignore_parser_warnings' => TRUE,
00246     'convert_to_encoding' => 'ISO-8859-1',
00247     'convert_from_encoding' => 'auto',
00248     //'replace_entities' => TRUE,
00249     'use_parser' => 'html',
00250     // This is stripping actually necessary low ASCII.
00251     //'strip_low_ascii' => TRUE,
00252   );
00253   return @qp($document, $selector, $options);
00254 }
00255 
00256 /**
00257  * The Query Path object is the primary tool in this library.
00258  *
00259  * To create a new Query Path, use the {@link qp()} function.
00260  *
00261  * If you are new to these documents, start at the {@link QueryPath.php} page.
00262  * There you will find a quick guide to the tools contained in this project.
00263  *
00264  * A note on serialization: QueryPath uses DOM classes internally, and those
00265  * do not serialize well at all. In addition, QueryPath may contain many
00266  * extensions, and there is no guarantee that extensions can serialize. The
00267  * moral of the story: Don't serialize QueryPath.
00268  *
00269  * @see qp()
00270  * @see QueryPath.php
00271  * @ingroup querypath_core
00272  */
00273 class QueryPath implements IteratorAggregate, Countable {
00274   
00275   /**
00276    * The version string for this version of QueryPath.
00277    *
00278    * Standard releases will be of the following form: <MAJOR>.<MINOR>[.<PATCH>][-STABILITY].
00279    *
00280    * Examples:
00281    * - 2.0
00282    * - 2.1.1
00283    * - 2.0-alpha1
00284    *
00285    * Developer releases will always be of the form dev-<DATE>.
00286    *
00287    * @since 2.0
00288    */
00289   const VERSION = '2.1.1';
00290   
00291   /**
00292    * This is a stub HTML 4.01 document.
00293    *
00294    * <b>Using {@link QueryPath::XHTML_STUB} is preferred.</b>
00295    *
00296    * This is primarily for generating legacy HTML content. Modern web applications
00297    * should use {@link QueryPath::XHTML_STUB}.
00298    *
00299    * Use this stub with the HTML familiy of methods ({@link html()}, 
00300    * {@link writeHTML()}, {@link innerHTML()}).
00301    */
00302   const HTML_STUB = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
00303   <html lang="en">
00304   <head>
00305   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
00306   <title>Untitled</title>
00307   </head>
00308   <body></body>
00309   </html>';
00310   
00311   /**
00312    * This is a stub XHTML document.
00313    * 
00314    * Since XHTML is an XML format, you should use XML functions with this document
00315    * fragment. For example, you should use {@link xml()}, {@link innerXML()}, and 
00316    * {@link writeXML()}.
00317    * 
00318    * This can be passed into {@link qp()} to begin a new basic HTML document.
00319    *
00320    * Example:
00321    * @code
00322    * $qp = qp(QueryPath::XHTML_STUB); // Creates a new XHTML document
00323    * $qp->writeXML(); // Writes the document as well-formed XHTML.
00324    * @endcode
00325    * @since 2.0
00326    */
00327   const XHTML_STUB = '<?xml version="1.0"?>
00328   <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
00329   <html xmlns="http://www.w3.org/1999/xhtml">
00330   <head>
00331   <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
00332   <title>Untitled</title>
00333   </head>
00334   <body></body>
00335   </html>';
00336   
00337   /**
00338    * Default parser flags.
00339    *
00340    * These are flags that will be used if no global or local flags override them.
00341    * @since 2.0
00342    */
00343   const DEFAULT_PARSER_FLAGS = NULL;
00344   
00345   const JS_CSS_ESCAPE_CDATA = '\\1';
00346   const JS_CSS_ESCAPE_CDATA_CCOMMENT = '/* \\1 */';
00347   const JS_CSS_ESCAPE_CDATA_DOUBLESLASH = '// \\1';
00348   const JS_CSS_ESCAPE_NONE = '';
00349   
00350   //const IGNORE_ERRORS = 1544; //E_NOTICE | E_USER_WARNING | E_USER_NOTICE;
00351   private $errTypes = 771; //E_ERROR; | E_USER_ERROR;
00352   
00353   /**
00354    * The base DOMDocument.
00355    */
00356   protected $document = NULL;
00357   private $options = array(
00358     'parser_flags' => NULL,
00359     'omit_xml_declaration' => FALSE,
00360     'replace_entities' => FALSE,
00361     'exception_level' => 771, // E_ERROR | E_USER_ERROR | E_USER_WARNING | E_WARNING
00362     'ignore_parser_warnings' => FALSE,
00363     'escape_xhtml_js_css_sections' => self::JS_CSS_ESCAPE_CDATA_CCOMMENT,
00364   );
00365   /**
00366    * The array of matches.
00367    */
00368   protected $matches = array();
00369   /**
00370    * The last array of matches.
00371    */
00372   protected $last = array(); // Last set of matches.
00373   private $ext = array(); // Extensions array.
00374   
00375   /**
00376    * The number of current matches.
00377    *
00378    * @see count()
00379    */
00380   public $length = 0;
00381   
00382   /**
00383    * Constructor.
00384    *
00385    * This should not be called directly. Use the {@link qp()} factory function
00386    * instead.
00387    *
00388    * @param mixed $document 
00389    *   A document-like object.
00390    * @param string $string
00391    *   A CSS 3 Selector
00392    * @param array $options
00393    *   An associative array of options.
00394    * @see qp()
00395    */
00396   public function __construct($document = NULL, $string = NULL, $options = array()) {
00397     $string = trim($string);
00398     $this->options = $options + QueryPathOptions::get() + $this->options;
00399     
00400     $parser_flags = isset($options['parser_flags']) ? $options['parser_flags'] : self::DEFAULT_PARSER_FLAGS;
00401     if (!empty($this->options['ignore_parser_warnings'])) {
00402       // Don't convert parser warnings into exceptions.
00403       $this->errTypes = 257; //E_ERROR | E_USER_ERROR;
00404     }
00405     elseif (isset($this->options['exception_level'])) {
00406       // Set the error level at which exceptions will be thrown. By default, 
00407       // QueryPath will throw exceptions for 
00408       // E_ERROR | E_USER_ERROR | E_WARNING | E_USER_WARNING.
00409       $this->errTypes = $this->options['exception_level'];
00410     }
00411     
00412     // Empty: Just create an empty QP.
00413     if (empty($document)) {
00414       $this->document = isset($this->options['encoding']) ? new DOMDocument('1.0', $this->options['encoding']) : new DOMDocument();
00415       $this->setMatches(new SplObjectStorage());
00416     }
00417     // Figure out if document is DOM, HTML/XML, or a filename
00418     elseif (is_object($document)) {
00419       
00420       if ($document instanceof QueryPath) {
00421         $this->matches = $document->get(NULL, TRUE);
00422         if ($this->matches->count() > 0)
00423           $this->document = $this->getFirstMatch()->ownerDocument;
00424       }
00425       elseif ($document instanceof DOMDocument) {
00426         $this->document = $document;
00427         //$this->matches = $this->matches($document->documentElement);
00428         $this->setMatches($document->documentElement);
00429       }
00430       elseif ($document instanceof DOMNode) {
00431         $this->document = $document->ownerDocument;
00432         //$this->matches = array($document);
00433         $this->setMatches($document);
00434       }
00435       elseif ($document instanceof SimpleXMLElement) {
00436         $import = dom_import_simplexml($document);
00437         $this->document = $import->ownerDocument;
00438         //$this->matches = array($import);
00439         $this->setMatches($import);
00440       }
00441       elseif ($document instanceof SplObjectStorage) {
00442         if ($document->count() == 0) {
00443           throw new QueryPathException('Cannot initialize QueryPath from an empty SplObjectStore');
00444         }
00445         $this->matches = $document;
00446         $this->document = $this->getFirstMatch()->ownerDocument;
00447       }
00448       else {
00449         throw new QueryPathException('Unsupported class type: ' . get_class($document));
00450       }
00451     }
00452     elseif (is_array($document)) {
00453       //trigger_error('Detected deprecated array support', E_USER_NOTICE);
00454       if (!empty($document) && $document[0] instanceof DOMNode) {
00455         $found = new SplObjectStorage();
00456         foreach ($document as $item) $found->attach($item);
00457         //$this->matches = $found;
00458         $this->setMatches($found);
00459         $this->document = $this->getFirstMatch()->ownerDocument;
00460       }
00461     }
00462     elseif ($this->isXMLish($document)) {
00463       // $document is a string with XML
00464       $this->document = $this->parseXMLString($document);
00465       $this->setMatches($this->document->documentElement);
00466     }
00467     else {
00468       
00469       // $document is a filename
00470       $context = empty($options['context']) ? NULL : $options['context'];
00471       $this->document = $this->parseXMLFile($document, $parser_flags, $context);
00472       $this->setMatches($this->document->documentElement);
00473     }
00474     
00475     // Do a find if the second param was set.
00476     if (isset($string) && strlen($string) > 0) {
00477       $this->find($string);
00478     }
00479   }
00480   
00481   /**
00482    * A static function for transforming data into a Data URL.
00483    *
00484    * This can be used to create Data URLs for injection into CSS, JavaScript, or other 
00485    * non-XML/HTML content. If you are working with QP objects, you may want to use
00486    * {@link dataURL()} instead.
00487    *
00488    * @param mixed $data
00489    *  The contents to inject as the data. The value can be any one of the following:
00490    *  - A URL: If this is given, then the subsystem will read the content from that URL. THIS 
00491    *    MUST BE A FULL URL, not a relative path.
00492    *  - A string of data: If this is given, then the subsystem will encode the string.
00493    *  - A stream or file handle: If this is given, the stream's contents will be encoded
00494    *    and inserted as data.
00495    *  (Note that we make the assumption here that you would never want to set data to be
00496    *  a URL. If this is an incorrect assumption, file a bug.)
00497    * @param string $mime
00498    *  The MIME type of the document.
00499    * @param resource $context
00500    *  A valid context. Use this only if you need to pass a stream context. This is only necessary
00501    *  if $data is a URL. (See {@link stream_context_create()}).
00502    * @return 
00503    *  An encoded data URL.
00504    */
00505   public static function encodeDataURL($data, $mime = 'application/octet-stream', $context = NULL) {
00506     if (is_resource($data)) {
00507       $data = stream_get_contents($data);
00508     }
00509     elseif (filter_var($data, FILTER_VALIDATE_URL)) {
00510       $data = file_get_contents($data, FALSE, $context);
00511     }
00512     
00513     $encoded = base64_encode($data);
00514     
00515     return 'data:' . $mime . ';base64,' . $encoded;
00516   }
00517   
00518   /**
00519    * Get the effective options for the current QueryPath object.
00520    *
00521    * This returns an associative array of all of the options as set
00522    * for the current QueryPath object. This includes default options,
00523    * options directly passed in via {@link qp()} or the constructor,
00524    * an options set in the {@link QueryPathOptions} object.
00525    *
00526    * The order of merging options is this:
00527    *  - Options passed in using {@link qp()} are highest priority, and will
00528    *    override other options.
00529    *  - Options set with {@link QueryPathOptions} will override default options,
00530    *    but can be overridden by options passed into {@link qp()}.
00531    *  - Default options will be used when no overrides are present.
00532    *
00533    * This function will return the options currently used, with the above option
00534    * overriding having been calculated already.
00535    *
00536    * @return array
00537    *  An associative array of options, calculated from defaults and overridden 
00538    *  options. 
00539    * @see qp()
00540    * @see QueryPathOptions::set()
00541    * @see QueryPathOptions::merge()
00542    * @since 2.0
00543    */
00544   public function getOptions() {
00545     return $this->options;
00546   }
00547   
00548   /**
00549    * Select the root element of the document.
00550    *
00551    * This sets the current match to the document's root element. For 
00552    * practical purposes, this is the same as:
00553    * @code
00554    * qp($someDoc)->find(':root');
00555    * @endcode
00556    * However, since it doesn't invoke a parser, it has less overhead. It also 
00557    * works in cases where the QueryPath has been reduced to zero elements (a
00558    * case that is not handled by find(':root') because there is no element
00559    * whose root can be found).
00560    *
00561    * @param string $selector
00562    *  A selector. If this is supplied, QueryPath will navigate to the 
00563    *  document root and then run the query. (Added in QueryPath 2.0 Beta 2) 
00564    * @return QueryPath
00565    *  The QueryPath object, wrapping the root element (document element)
00566    *  for the current document.
00567    */
00568   public function top($selector = NULL) {
00569     $this->setMatches($this->document->documentElement);
00570     // print '=====================' . PHP_EOL;
00571     // var_dump($this->document);
00572     // print '=====================' . PHP_EOL;
00573     return !empty($selector) ? $this->find($selector) : $this;
00574   }
00575   
00576   /**
00577    * Given a CSS Selector, find matching items.
00578    *
00579    * @param string $selector
00580    *   CSS 3 Selector
00581    * @return QueryPath
00582    * @see filter()
00583    * @see is()
00584    * @todo If a find() returns zero matches, then a subsequent find() will
00585    *  also return zero matches, even if that find has a selector like :root.
00586    *  The reason for this is that the {@link QueryPathCssEventHandler} does
00587    *  not set the root of the document tree if it cannot find any elements
00588    *  from which to determine what the root is. The workaround is to use 
00589    *  {@link top()} to select the root element again.
00590    */
00591   public function find($selector) {
00592     
00593     // Optimize for ID/Class searches. These two take a long time
00594     // when a rdp is used. Using an XPath pushes work to C code.
00595     $ids = array();
00596     
00597     $regex = '/^#([\w-]+)$|^\.([\w-]+)$/'; // $1 is ID, $2 is class.
00598     //$regex = '/^#([\w-]+)$/';
00599     if (preg_match($regex, $selector, $ids) === 1) {
00600       // If $1 is a match, we have an ID.
00601       if (!empty($ids[1])) {
00602         $xpath = new DOMXPath($this->document);
00603         foreach ($this->matches as $item) {
00604           
00605           // For whatever reasons, the .// does not work correctly
00606           // if the selected element is the root element. So we have
00607           // an awful hack.
00608           if ($item->isSameNode($this->document->documentElement) ) {
00609             $xpathQuery = "//*[@id='{$ids[1]}']";
00610           }
00611           // This is the correct XPath query.
00612           else {
00613             $xpathQuery = ".//*[@id='{$ids[1]}']";
00614           }
00615           //$nl = $xpath->query("//*[@id='{$ids[1]}']", $item);
00616           //$nl = $xpath->query(".//*[@id='{$ids[1]}']", $item);
00617           $nl = $xpath->query($xpathQuery, $item);
00618           if ($nl->length > 0) {
00619             $this->setMatches($nl->item(0));
00620             break;
00621           }
00622           else {
00623             // If no match is found, we set an empty.
00624             $this->noMatches();
00625           }
00626         }
00627       }
00628       // Quick search for class values. While the XPath can't do it
00629       // all, it is faster than doing a recusive node search.
00630       else {
00631         $xpath = new DOMXPath($this->document);
00632         $found = new SplObjectStorage();
00633         foreach ($this->matches as $item) {
00634           // See comments on this in the #id code above.
00635           if ($item->isSameNode($this->document->documentElement) ) {
00636             $xpathQuery = "//*[@class]";
00637           }
00638           // This is the correct XPath query.
00639           else {
00640             $xpathQuery = ".//*[@class]";
00641           }
00642           $nl = $xpath->query($xpathQuery, $item);
00643           for ($i = 0; $i < $nl->length; ++$i) {
00644             $vals = explode(' ', $nl->item($i)->getAttribute('class'));
00645             if (in_array($ids[2], $vals)) $found->attach($nl->item($i));
00646           }
00647         }
00648         $this->setMatches($found);
00649       }
00650       
00651       return $this;
00652     }
00653     
00654     
00655     $query = new QueryPathCssEventHandler($this->matches);
00656     $query->find($selector);
00657     //$this->matches = $query->getMatches();
00658     $this->setMatches($query->getMatches());
00659     return $this;
00660   }
00661   
00662   /**
00663    * Execute an XPath query and store the results in the QueryPath.
00664    *
00665    * Most methods in this class support CSS 3 Selectors. Sometimes, though,
00666    * XPath provides a finer-grained query language. Use this to execute
00667    * XPath queries.
00668    *
00669    * Beware, though. QueryPath works best on DOM Elements, but an XPath 
00670    * query can return other nodes, strings, and values. These may not work with
00671    * other QueryPath functions (though you will be able to access the
00672    * values with {@link get()}).
00673    *
00674    * @param string $query
00675    *  An XPath query.
00676    * @param array $options
00677    *  Currently supported options are:
00678    *   - 'namespace_prefix': And XML namespace prefix to be used as the default. Used 
00679    *      in conjunction with 'namespace_uri'
00680    *   - 'namespace_uri': The URI to be used as the default namespace URI. Used 
00681    *      with 'namespace_prefix'
00682    * @return QueryPath 
00683    *  A QueryPath object wrapping the results of the query.
00684    * @see find()
00685    * @author M Butcher
00686    * @author Xavier Prud'homme
00687    */
00688   public function xpath($query, $options = array()) {
00689     $xpath = new DOMXPath($this->document);
00690     
00691     // Register a default namespace.
00692     if (!empty($options['namespace_prefix']) && !empty($options['namespace_uri'])) {
00693       $xpath->registerNamespace($options['namespace_prefix'], $options['namespace_uri']);
00694     }
00695     
00696     $found = new SplObjectStorage();
00697     foreach ($this->matches as $item) {
00698       $nl = $xpath->query($query, $item);
00699       if ($nl->length > 0) {
00700         for ($i = 0; $i < $nl->length; ++$i) $found->attach($nl->item($i));
00701       }
00702     }
00703     $this->setMatches($found);
00704     return $this;
00705   }
00706   
00707   /**
00708    * Get the number of elements currently wrapped by this object.
00709    *
00710    * Note that there is no length property on this object.
00711    *
00712    * @return int
00713    *  Number of items in the object.
00714    * @deprecated QueryPath now implements Countable, so use count().
00715    */
00716   public function size() {
00717     return $this->matches->count();
00718   }
00719   
00720   /**
00721    * Get the number of elements currently wrapped by this object.
00722    *
00723    * Since QueryPath is Countable, the PHP count() function can also
00724    * be used on a QueryPath.
00725    *
00726    * @code
00727    * <?php
00728    *  count(qp($xml, 'div'));
00729    * ?>
00730    * @endcode
00731    *
00732    * @return int
00733    *  The number of matches in the QueryPath.
00734    */
00735   public function count() {
00736     return $this->matches->count();
00737   }
00738   
00739   /**
00740    * Get one or all elements from this object.
00741    *
00742    * When called with no paramaters, this returns all objects wrapped by 
00743    * the QueryPath. Typically, these are DOMElement objects (unless you have
00744    * used {@link map()}, {@link xpath()}, or other methods that can select
00745    * non-elements).
00746    *
00747    * When called with an index, it will return the item in the QueryPath with 
00748    * that index number.
00749    *
00750    * Calling this method does not change the QueryPath (e.g. it is 
00751    * non-destructive).
00752    *
00753    * You can use qp()->get() to iterate over all elements matched. You can
00754    * also iterate over qp() itself (QueryPath implementations must be Traversable). 
00755    * In the later case, though, each item 
00756    * will be wrapped in a QueryPath object. To learn more about iterating
00757    * in QueryPath, see {@link examples/techniques.php}.
00758    *
00759    * @param int $index
00760    *   If specified, then only this index value will be returned. If this 
00761    *   index is out of bounds, a NULL will be returned.
00762    * @param boolean $asObject
00763    *   If this is TRUE, an {@link SplObjectStorage} object will be returned 
00764    *   instead of an array. This is the preferred method for extensions to use.
00765    * @return mixed
00766    *   If an index is passed, one element will be returned. If no index is
00767    *   present, an array of all matches will be returned.
00768    * @see eq()
00769    * @see SplObjectStorage
00770    */
00771   public function get($index = NULL, $asObject = FALSE) {
00772     if (isset($index)) {
00773       return ($this->size() > $index) ? $this->getNthMatch($index) : NULL;
00774     }
00775     // Retain support for legacy.
00776     if (!$asObject) {
00777       $matches = array();
00778       foreach ($this->matches as $m) $matches[] = $m;
00779       return $matches;
00780     }
00781     return $this->matches;
00782   }
00783   
00784   /**
00785    * Get the DOMDocument that we currently work with.
00786    *
00787    * This returns the current DOMDocument. Any changes made to this document will be
00788    * accessible to QueryPath, as both will share access to the same object.
00789    * 
00790    * @return DOMDocument
00791    */
00792   public function document() {
00793     return $this->document;
00794   }
00795   
00796   /**
00797    * On an XML document, load all XIncludes.
00798    *
00799    * @return QueryPath
00800    */
00801   public function xinclude() {
00802     $this->document->xinclude();
00803     return $this;
00804   }
00805   
00806   /**
00807    * Get all current elements wrapped in an array.
00808    * Compatibility function for jQuery 1.4, but identical to calling {@link get()}
00809    * with no parameters.
00810    *
00811    * @return array
00812    *  An array of DOMNodes (typically DOMElements).
00813    */
00814   public function toArray() {
00815     return $this->get();
00816   }
00817   /**
00818    * Get/set an attribute.
00819    * - If no parameters are specified, this returns an associative array of all 
00820    *   name/value pairs.
00821    * - If both $name and $value are set, then this will set the attribute name/value
00822    *   pair for all items in this object. 
00823    * - If $name is set, and is an array, then
00824    *   all attributes in the array will be set for all items in this object.
00825    * - If $name is a string and is set, then the attribute value will be returned.
00826    *
00827    * When an attribute value is retrieved, only the attribute value of the FIRST
00828    * match is returned.
00829    *
00830    * @param mixed $name
00831    *   The name of the attribute or an associative array of name/value pairs.
00832    * @param string $value
00833    *   A value (used only when setting an individual property).
00834    * @return mixed
00835    *   If this was a setter request, return the QueryPath object. If this was
00836    *   an access request (getter), return the string value.
00837    * @see removeAttr()
00838    * @see tag()
00839    * @see hasAttr()
00840    * @see hasClass()
00841    */
00842   public function attr($name = NULL, $value = NULL) {
00843     
00844     // Default case: Return all attributes as an assoc array.
00845     if (is_null($name)) {
00846       if ($this->matches->count() == 0) return NULL;
00847       $ele = $this->getFirstMatch();
00848       $buffer = array();
00849       
00850       // This does not appear to be part of the DOM
00851       // spec. Nor is it documented. But it works.
00852       foreach ($ele->attributes as $name => $attrNode) {
00853         $buffer[$name] = $attrNode->value;
00854       }
00855       return $buffer;
00856     }
00857     
00858     // multi-setter
00859     if (is_array($name)) {
00860       foreach ($name as $k => $v) {
00861         foreach ($this->matches as $m) $m->setAttribute($k, $v);
00862       }
00863       return $this;
00864     }
00865     // setter
00866     if (isset($value)) {
00867       foreach ($this->matches as $m) $m->setAttribute($name, $value);
00868       return $this;
00869     }
00870     
00871     //getter
00872     if ($this->matches->count() == 0) return NULL;
00873     
00874     // Special node type handler:
00875     if ($name == 'nodeType') {
00876       return $this->getFirstMatch()->nodeType;
00877     }
00878     
00879     // Always return first match's attr.
00880     return $this->getFirstMatch()->getAttribute($name);
00881   }
00882   /**
00883    * Check to see if the given attribute is present.
00884    *
00885    * This returns TRUE if <em>all</em> selected items have the attribute, or 
00886    * FALSE if at least one item does not have the attribute.
00887    *
00888    * @param string $attrName
00889    *  The attribute name.
00890    * @return boolean
00891    *  TRUE if all matches have the attribute, FALSE otherwise.
00892    * @since 2.0
00893    * @see attr()
00894    * @see hasClass()
00895    */
00896   public function hasAttr($attrName) {
00897     foreach ($this->matches as $match) {
00898       if (!$match->hasAttribute($attrName)) return FALSE;
00899     }
00900     return TRUE;
00901   }
00902   
00903   /**
00904    * Set/get a CSS value for the current element(s).
00905    * This sets the CSS value for each element in the QueryPath object.
00906    * It does this by setting (or getting) the style attribute (without a namespace).
00907    *
00908    * For example, consider this code:
00909    * @code
00910    * <?php
00911    * qp(HTML_STUB, 'body')->css('background-color','red')->html();
00912    * ?>
00913    * @endcode
00914    * This will return the following HTML:
00915    * @code
00916    * <body style="background-color: red"/>
00917    * @endcode
00918    *
00919    * If no parameters are passed into this function, then the current style
00920    * element will be returned unparsed. Example:
00921    * @code
00922    * <?php
00923    * qp(HTML_STUB, 'body')->css('background-color','red')->css();
00924    * ?>
00925    * @endcode
00926    * This will return the following:
00927    * @code
00928    * background-color: red
00929    * @endcode
00930    *
00931    * As of QueryPath 2.1, existing style attributes will be merged with new attributes.
00932    * (In previous versions of QueryPath, a call to css() overwrite the existing style
00933    * values).
00934    *
00935    * @param mixed $name
00936    *  If this is a string, it will be used as a CSS name. If it is an array,
00937    *  this will assume it is an array of name/value pairs of CSS rules. It will
00938    *  apply all rules to all elements in the set.
00939    * @param string $value
00940    *  The value to set. This is only set if $name is a string.
00941    * @return QueryPath
00942    */
00943   public function css($name = NULL, $value = '') {
00944     if (empty($name)) {
00945       return $this->attr('style');
00946     }
00947     
00948     // Get any existing CSS.
00949     $css = array();
00950     foreach ($this->matches as $match) {
00951       $style = $match->getAttribute('style');
00952       if (!empty($style)) {
00953         // XXX: Is this sufficient?
00954         $style_array = explode(';', $style);
00955         foreach ($style_array as $item) {
00956           $item = trim($item);
00957           
00958           // Skip empty attributes.
00959           if (strlen($item) == 0) continue;
00960           
00961           list($css_att, $css_val) = explode(':',$item, 2);
00962           $css[$css_att] = trim($css_val);
00963         }
00964       }
00965     }
00966     
00967     if (is_array($name)) {
00968       // Use array_merge instead of + to preserve order.
00969       $css = array_merge($css, $name);
00970     }
00971     else {
00972       $css[$name] = $value;
00973     }
00974     
00975     // Collapse CSS into a string.
00976     $format = '%s: %s;';
00977     $css_string = '';
00978     foreach ($css as $n => $v) {
00979       $css_string .= sprintf($format, $n, trim($v));
00980     }
00981     
00982     $this->attr('style', $css_string);
00983     return $this;
00984   }
00985   
00986   /**
00987    * Insert or retrieve a Data URL.
00988    *
00989    * When called with just $attr, it will fetch the result, attempt to decode it, and
00990    * return an array with the MIME type and the application data.
00991    *
00992    * When called with both $attr and $data, it will inject the data into all selected elements
00993    * So @code$qp->dataURL('src', file_get_contents('my.png'), 'image/png')@endcode will inject 
00994    * the given PNG image into the selected elements.
00995    *
00996    * The current implementation only knows how to encode and decode Base 64 data.
00997    *
00998    * Note that this is known *not* to work on IE 6, but should render fine in other browsers.
00999    *
01000    * @param string $attr
01001    *  The name of the attribute.
01002    * @param mixed $data
01003    *  The contents to inject as the data. The value can be any one of the following:
01004    *  - A URL: If this is given, then the subsystem will read the content from that URL. THIS 
01005    *    MUST BE A FULL URL, not a relative path.
01006    *  - A string of data: If this is given, then the subsystem will encode the string.
01007    *  - A stream or file handle: If this is given, the stream's contents will be encoded
01008    *    and inserted as data.
01009    *  (Note that we make the assumption here that you would never want to set data to be
01010    *  a URL. If this is an incorrect assumption, file a bug.)
01011    * @param string $mime
01012    *  The MIME type of the document.
01013    * @param resource $context
01014    *  A valid context. Use this only if you need to pass a stream context. This is only necessary
01015    *  if $data is a URL. (See {@link stream_context_create()}).
01016    * @return
01017    *  If this is called as a setter, this will return a QueryPath object. Otherwise, it
01018    *  will attempt to fetch data out of the attribute and return that.
01019    * @see http://en.wikipedia.org/wiki/Data:_URL
01020    * @see attr()
01021    * @since 2.1
01022    */
01023   public function dataURL($attr, $data = NULL, $mime = 'application/octet-stream', $context = NULL) {
01024     if (is_null($data)) {
01025       // Attempt to fetch the data
01026       $data = $this->attr($attr);
01027       if (empty($data) || is_array($data) || strpos($data, 'data:') !== 0) {
01028         return;
01029       }
01030       
01031       // So 1 and 2 should be MIME types, and 3 should be the base64-encoded data.
01032       $regex = '/^data:([a-zA-Z0-9]+)\/([a-zA-Z0-9]+);base64,(.*)$/';
01033       $matches = array();
01034       preg_match($regex, $data, $matches);
01035       
01036       if (!empty($matches)) {
01037         $result = array(
01038           'mime' => $matches[1] . '/' . $matches[2],
01039           'data' => base64_decode($matches[3]),
01040         );
01041         return $result;
01042       }
01043     }
01044     else {
01045       
01046       $attVal = self::encodeDataURL($data, $mime, $context);
01047       
01048       return $this->attr($attr, $attVal);
01049       
01050     }
01051   }
01052   
01053 
01054   
01055   /**
01056    * Remove the named attribute from all elements in the current QueryPath.
01057    *
01058    * This will remove any attribute with the given name. It will do this on each
01059    * item currently wrapped by QueryPath.
01060    *
01061    * As is the case in jQuery, this operation is not considered destructive.
01062    *
01063    * @param string $name
01064    *  Name of the parameter to remove.
01065    * @return QueryPath
01066    *  The QueryPath object with the same elements.
01067    * @see attr()
01068    */
01069   public function removeAttr($name) {
01070     foreach ($this->matches as $m) {
01071       //if ($m->hasAttribute($name))
01072         $m->removeAttribute($name);
01073     }
01074     return $this;
01075   }
01076   /**
01077    * Reduce the matched set to just one.
01078    *
01079    * This will take a matched set and reduce it to just one item -- the item 
01080    * at the index specified. This is a destructive operation, and can be undone
01081    * with {@link end()}.
01082    *
01083    * @param $index
01084    *  The index of the element to keep. The rest will be 
01085    *  discarded.
01086    * @return QueryPath
01087    * @see get()
01088    * @see is()
01089    * @see end()
01090    */
01091   public function eq($index) {
01092     // XXX: Might there be a more efficient way of doing this?
01093     $this->setMatches($this->getNthMatch($index));
01094     return $this;
01095   }
01096   /**
01097    * Given a selector, this checks to see if the current set has one or more matches.
01098    *
01099    * Unlike jQuery's version, this supports full selectors (not just simple ones).
01100    *
01101    * @param string $selector
01102    *   The selector to search for. As of QueryPath 2.1.1, this also supports passing a
01103    *   DOMNode object.
01104    * @return boolean
01105    *   TRUE if one or more elements match. FALSE if no match is found.
01106    * @see get()
01107    * @see eq()
01108    */
01109   public function is($selector) {
01110     
01111     if (is_object($selector)) {
01112       if ($selector instanceof DOMNode) {
01113         return count($this->matches) == 1 && $selector->isSameNode($this->get(0));
01114       }
01115       elseif ($selector instanceof Traversable) {
01116         if (count($selector) != count($this->matches)) {
01117           return FALSE;
01118         }
01119         // Without $seen, there is an edge case here if $selector contains the same object
01120         // more than once, but the counts are equal. For example, [a, a, a, a] will
01121         // pass an is() on [a, b, c, d]. We use the $seen SPLOS to prevent this.
01122         $seen = new SplObjectStorage();
01123         foreach ($selector as $item) {
01124           if (!$this->matches->contains($item) || $seen->contains($item)) {
01125             return FALSE;
01126           }
01127           $seen->attach($item);
01128         }
01129         return TRUE;
01130       }
01131       throw new Exception('Cannot compare an object to a QueryPath.');
01132       return FALSE;
01133     }
01134     
01135     foreach ($this->matches as $m) {
01136       $q = new QueryPathCssEventHandler($m);
01137       if ($q->find($selector)->getMatches()->count()) {
01138         return TRUE;
01139       }
01140     }
01141     return FALSE;
01142   }
01143   /**
01144    * Filter a list down to only elements that match the selector.
01145    * Use this, for example, to find all elements with a class, or with 
01146    * certain children.
01147    *
01148    * @param string $selector
01149    *   The selector to use as a filter.
01150    * @return QueryPath
01151    *   The QueryPath with non-matching items filtered out.
01152    * @see filterLambda()
01153    * @see filterCallback()
01154    * @see map()
01155    * @see find()
01156    * @see is()
01157    */
01158   public function filter($selector) {
01159     $found = new SplObjectStorage();
01160     foreach ($this->matches as $m) if (qp($m, NULL, $this->options)->is($selector)) $found->attach($m);
01161     $this->setMatches($found);
01162     return $this;
01163   }
01164   /**
01165    * Filter based on a lambda function.
01166    *
01167    * The function string will be executed as if it were the body of a 
01168    * function. It is passed two arguments:
01169    * - $index: The index of the item.
01170    * - $item: The current Element.
01171    * If the function returns boolean FALSE, the item will be removed from
01172    * the list of elements. Otherwise it will be kept.
01173    *
01174    * Example:
01175    * @code
01176    * qp('li')->filterLambda('qp($item)->attr("id") == "test"');
01177    * @endcode
01178    *
01179    * The above would filter down the list to only an item whose ID is
01180    * 'text'.
01181    *
01182    * @param string $fn
01183    *  Inline lambda function in a string.
01184    * @return QueryPath
01185    * @see filter()
01186    * @see map()
01187    * @see mapLambda()
01188    * @see filterCallback()
01189    */
01190   public function filterLambda($fn) {
01191     $function = create_function('$index, $item', $fn);
01192     $found = new SplObjectStorage();
01193     $i = 0;
01194     foreach ($this->matches as $item)
01195       if ($function($i++, $item) !== FALSE) $found->attach($item);
01196     
01197     $this->setMatches($found);
01198     return $this;
01199   }
01200   
01201   /**
01202    * Use regular expressions to filter based on the text content of matched elements.
01203    *
01204    * Only items that match the given regular expression will be kept. All others will
01205    * be removed.
01206    *
01207    * The regular expression is run against the <i>text content</i> (the PCDATA) of the 
01208    * elements. This is a way of filtering elements based on their content. 
01209    *
01210    * Example:
01211    * @code
01212    *  <?xml version="1.0"?>
01213    *  <div>Hello <i>World</i></div>
01214    * @endcode
01215    *
01216    * @code
01217    *  <?php
01218    *    // This will be 1.
01219    *    qp($xml, 'div')->filterPreg('/World/')->size();
01220    *  ?>
01221    * @endcode
01222    *
01223    * The return value above will be 1 because the text content of @codeqp($xml, 'div')@endcode is
01224    * @codeHello World@endcode.
01225    *
01226    * Compare this to the behavior of the <em>:contains()</em> CSS3 pseudo-class.
01227    * 
01228    * @param string $regex
01229    *  A regular expression.
01230    * @return QueryPath
01231    * @see filter()
01232    * @see filterCallback()
01233    * @see preg_match()
01234    */
01235   public function filterPreg($regex) {
01236     
01237     $found = new SplObjectStorage();
01238     
01239     foreach ($this->matches as $item) {
01240       if (preg_match($regex, $item->textContent) > 0) {
01241         $found->attach($item);
01242       }
01243     }
01244     $this->setMatches($found);
01245     
01246     return $this;
01247   }
01248   /**
01249    * Filter based on a callback function.
01250    *
01251    * A callback may be any of the following:
01252    *  - a function: 'my_func'.
01253    *  - an object/method combo: $obj, 'myMethod'
01254    *  - a class/method combo: 'MyClass', 'myMethod'
01255    * Note that classes are passed in strings. Objects are not.
01256    *
01257    * Each callback is passed to arguments:
01258    *  - $index: The index position of the object in the array.
01259    *  - $item: The item to be operated upon.
01260    *
01261    * If the callback function returns FALSE, the item will be removed from the 
01262    * set of matches. Otherwise the item will be considered a match and left alone.
01263    *
01264    * @param callback $callback.
01265    *   A callback either as a string (function) or an array (object, method OR 
01266    *   classname, method).
01267    * @return QueryPath
01268    *   Query path object augmented according to the function.
01269    * @see filter()
01270    * @see filterLambda()
01271    * @see map()
01272    * @see is()
01273    * @see find()
01274    */
01275   public function filterCallback($callback) {
01276     $found = new SplObjectStorage();
01277     $i = 0;
01278     if (is_callable($callback)) {
01279       foreach($this->matches as $item) 
01280         if (call_user_func($callback, $i++, $item) !== FALSE) $found->attach($item);
01281     }
01282     else {
01283       throw new QueryPathException('The specified callback is not callable.');
01284     }
01285     $this->setMatches($found);
01286     return $this;
01287   }
01288   /**
01289    * Filter a list to contain only items that do NOT match.
01290    *
01291    * @param string $selector
01292    *  A selector to use as a negation filter. If the filter is matched, the 
01293    *  element will be removed from the list.
01294    * @return QueryPath
01295    *  The QueryPath object with matching items filtered out.
01296    * @see find()
01297    */
01298   public function not($selector) {
01299     $found = new SplObjectStorage();
01300     if ($selector instanceof DOMElement) {
01301       foreach ($this->matches as $m) if ($m !== $selector) $found->attach($m); 
01302     }
01303     elseif (is_array($selector)) {
01304       foreach ($this->matches as $m) {
01305         if (!in_array($m, $selector, TRUE)) $found->attach($m);
01306       }
01307     }
01308     elseif ($selector instanceof SplObjectStorage) {
01309       foreach ($this->matches as $m) if ($selector->contains($m)) $found->attach($m); 
01310     }
01311     else {
01312       foreach ($this->matches as $m) if (!qp($m, NULL, $this->options)->is($selector)) $found->attach($m);
01313     }
01314     $this->setMatches($found);
01315     return $this;
01316   }
01317   /**
01318    * Get an item's index.
01319    *
01320    * Given a DOMElement, get the index from the matches. This is the 
01321    * converse of {@link get()}.
01322    *
01323    * @param DOMElement $subject
01324    *  The item to match.
01325    * 
01326    * @return mixed
01327    *  The index as an integer (if found), or boolean FALSE. Since 0 is a 
01328    *  valid index, you should use strong equality (===) to test..
01329    * @see get()
01330    * @see is()
01331    */
01332   public function index($subject) {
01333     
01334     $i = 0;
01335     foreach ($this->matches as $m) {
01336       if ($m === $subject) {
01337         return $i;
01338       }
01339       ++$i;
01340     }
01341     return FALSE;
01342   }
01343   /**
01344    * Run a function on each item in a set.
01345    *
01346    * The mapping callback can return anything. Whatever it returns will be
01347    * stored as a match in the set, though. This means that afer a map call, 
01348    * there is no guarantee that the elements in the set will behave correctly
01349    * with other QueryPath functions.
01350    *
01351    * Callback rules:
01352    * - If the callback returns NULL, the item will be removed from the array.
01353    * - If the callback returns an array, the entire array will be stored in 
01354    *   the results.
01355    * - If the callback returns anything else, it will be appended to the array 
01356    *   of matches.
01357    *
01358    * @param callback $callback
01359    *  The function or callback to use. The callback will be passed two params:
01360    *  - $index: The index position in the list of items wrapped by this object.
01361    *  - $item: The current item.
01362    *
01363    * @return QueryPath
01364    *  The QueryPath object wrapping a list of whatever values were returned
01365    *  by each run of the callback.
01366    *
01367    * @see QueryPath::get()
01368    * @see filter()
01369    * @see find()
01370    */
01371   public function map($callback) {
01372     $found = new SplObjectStorage();
01373     
01374     if (is_callable($callback)) {
01375       $i = 0;
01376       foreach ($this->matches as $item) {
01377         $c = call_user_func($callback, $i, $item);
01378         if (isset($c)) {
01379           if (is_array($c) || $c instanceof Iterable) {
01380             foreach ($c as $retval) {
01381               if (!is_object($retval)) {
01382                 $tmp = new stdClass();
01383                 $tmp->textContent = $retval;
01384                 $retval = $tmp;
01385               }
01386               $found->attach($retval);
01387             }
01388           }
01389           else {
01390             if (!is_object($c)) {
01391               $tmp = new stdClass();
01392               $tmp->textContent = $c;
01393               $c = $tmp;
01394             }
01395             $found->attach($c);
01396           }
01397         }
01398         ++$i;
01399       }
01400     }
01401     else {
01402       throw new QueryPathException('Callback is not callable.');
01403     }
01404     $this->setMatches($found, FALSE);
01405     return $this;
01406   }
01407   /**
01408    * Narrow the items in this object down to only a slice of the starting items.
01409    *
01410    * @param integer $start
01411    *  Where in the list of matches to begin the slice.
01412    * @param integer $length
01413    *  The number of items to include in the slice. If nothing is specified, the 
01414    *  all remaining matches (from $start onward) will be included in the sliced
01415    *  list.
01416    * @return QueryPath
01417    * @see array_slice()
01418    */
01419   public function slice($start, $length = 0) {
01420     $end = $length;
01421     $found = new SplObjectStorage();
01422     if ($start >= $this->size()) {
01423       $this->setMatches($found);
01424       return $this;
01425     }
01426     
01427     $i = $j = 0;
01428     foreach ($this->matches as $m) {
01429       if ($i >= $start) {
01430         if ($end > 0 && $j >= $end) {
01431           break;
01432         }
01433         $found->attach($m);
01434         ++$j;
01435       }
01436       ++$i;
01437     }
01438     
01439     $this->setMatches($found);
01440     return $this;
01441   }
01442   /**
01443    * Run a callback on each item in the list of items.
01444    *
01445    * Rules of the callback:
01446    * - A callback is passed two variables: $index and $item. (There is no 
01447    *   special treatment of $this, as there is in jQuery.)
01448    *   - You will want to pass $item by reference if it is not an
01449    *     object (DOMNodes are all objects).
01450    * - A callback that returns FALSE will stop execution of the each() loop. This
01451    *   works like break in a standard loop.
01452    * - A TRUE return value from the callback is analogous to a continue statement.
01453    * - All other return values are ignored.
01454    *
01455    * @param callback $callback
01456    *  The callback to run.
01457    * @return QueryPath
01458    *  The QueryPath.
01459    * @see eachLambda()
01460    * @see filter()
01461    * @see map()
01462    */
01463   public function each($callback) {
01464     if (is_callable($callback)) {
01465       $i = 0;
01466       foreach ($this->matches as $item) {
01467         if (call_user_func($callback, $i, $item) === FALSE) return $this;
01468         ++$i;
01469       }
01470     }
01471     else {
01472       throw new QueryPathException('Callback is not callable.');
01473     }
01474     return $this;
01475   }
01476   /**
01477    * An each() iterator that takes a lambda function.
01478    * 
01479    * @param string $lambda
01480    *  The lambda function. This will be passed ($index, &$item).
01481    * @return QueryPath
01482    *  The QueryPath object.
01483    * @see each()
01484    * @see filterLambda()
01485    * @see filterCallback()
01486    * @see map()
01487    */
01488   public function eachLambda($lambda) {
01489     $index = 0;
01490     foreach ($this->matches as $item) {
01491       $fn = create_function('$index, &$item', $lambda);
01492       if ($fn($index, $item) === FALSE) return $this;
01493       ++$index;
01494     }
01495     return $this;
01496   }
01497   /**
01498    * Insert the given markup as the last child.
01499    *
01500    * The markup will be inserted into each match in the set.
01501    *
01502    * The same element cannot be inserted multiple times into a document. DOM
01503    * documents do not allow a single object to be inserted multiple times 
01504    * into the DOM. To insert the same XML repeatedly, we must first clone
01505    * the object. This has one practical implication: Once you have inserted
01506    * an element into the object, you cannot further manipulate the original 
01507    * element and expect the changes to be replciated in the appended object.
01508    * (They are not the same -- there is no shared reference.) Instead, you
01509    * will need to retrieve the appended object and operate on that.
01510    *
01511    * @param mixed $data
01512    *  This can be either a string (the usual case), or a DOM Element.
01513    * @return QueryPath
01514    *  The QueryPath object.
01515    * @see appendTo()
01516    * @see prepend()
01517    * @throws QueryPathException
01518    *  Thrown if $data is an unsupported object type.
01519    */
01520   public function append($data) {
01521     $data = $this->prepareInsert($data);
01522     if (isset($data)) {
01523       if (empty($this->document->documentElement) && $this->matches->count() == 0) {
01524         // Then we assume we are writing to the doc root
01525         $this->document->appendChild($data);
01526         $found = new SplObjectStorage();
01527         $found->attach($this->document->documentElement);
01528         $this->setMatches($found);
01529       }
01530       else {
01531         // You can only append in item once. So in cases where we
01532         // need to append multiple times, we have to clone the node.
01533         foreach ($this->matches as $m) { 
01534           // DOMDocumentFragments are even more troublesome, as they don't
01535           // always clone correctly. So we have to clone their children.
01536           if ($data instanceof DOMDocumentFragment) {
01537             foreach ($data->childNodes as $n)
01538               $m->appendChild($n->cloneNode(TRUE));
01539           }
01540           else {
01541             // Otherwise a standard clone will do.
01542             $m->appendChild($data->cloneNode(TRUE));
01543           }
01544           
01545         }
01546       }
01547         
01548     }
01549     return $this;
01550   }
01551   /**
01552    * Append the current elements to the destination passed into the function.
01553    *
01554    * This cycles through all of the current matches and appends them to 
01555    * the context given in $destination. If a selector is provided then the 
01556    * $destination is queried (using that selector) prior to the data being
01557    * appended. The data is then appended to the found items.
01558    *
01559    * @param QueryPath $dest
01560    *  A QueryPath object that will be appended to.
01561    * @return QueryPath
01562    *  The original QueryPath, unaltered. Only the destination QueryPath will
01563    *  be modified.
01564    * @see append()
01565    * @see prependTo()
01566    * @throws QueryPathException
01567    *  Thrown if $data is an unsupported object type.
01568    */
01569   public function appendTo(QueryPath $dest) {
01570     foreach ($this->matches as $m) $dest->append($m);
01571     return $this;
01572   }
01573   /**
01574    * Insert the given markup as the first child.
01575    *
01576    * The markup will be inserted into each match in the set.
01577    *
01578    * @param mixed $data
01579    *  This can be either a string (the usual case), or a DOM Element.
01580    * @return QueryPath
01581    * @see append()
01582    * @see before()
01583    * @see after()
01584    * @see prependTo()
01585    * @throws QueryPathException
01586    *  Thrown if $data is an unsupported object type.
01587    */
01588   public function prepend($data) {
01589     $data = $this->prepareInsert($data);
01590     if (isset($data)) {
01591       foreach ($this->matches as $m) {
01592         $ins = $data->cloneNode(TRUE);
01593         if ($m->hasChildNodes())
01594           $m->insertBefore($ins, $m->childNodes->item(0));
01595         else
01596           $m->appendChild($ins);
01597       }
01598     }
01599     return $this;
01600   }
01601   /**
01602    * Take all nodes in the current object and prepend them to the children nodes of
01603    * each matched node in the passed-in QueryPath object.
01604    *
01605    * This will iterate through each item in the current QueryPath object and 
01606    * add each item to the beginning of the children of each element in the 
01607    * passed-in QueryPath object.
01608    *
01609    * @see insertBefore()
01610    * @see insertAfter()
01611    * @see prepend()
01612    * @see appendTo()
01613    * @param QueryPath $dest
01614    *  The destination QueryPath object.
01615    * @return QueryPath
01616    *  The original QueryPath, unmodified. NOT the destination QueryPath.
01617    * @throws QueryPathException
01618    *  Thrown if $data is an unsupported object type.
01619    */
01620   public function prependTo(QueryPath $dest) {
01621     foreach ($this->matches as $m) $dest->prepend($m);
01622     return $this;
01623   }
01624 
01625   /**
01626    * Insert the given data before each element in the current set of matches.
01627    *
01628    * This will take the give data (XML or HTML) and put it before each of the items that 
01629    * the QueryPath object currently contains. Contrast this with after().
01630    * 
01631    * @param mixed $data
01632    *  The data to be inserted. This can be XML in a string, a DomFragment, a DOMElement,
01633    *  or the other usual suspects. (See {@link qp()}).
01634    * @return QueryPath
01635    *  Returns the QueryPath with the new modifications. The list of elements currently
01636    *  selected will remain the same.
01637    * @see insertBefore()
01638    * @see after()
01639    * @see append()
01640    * @see prepend()
01641    * @throws QueryPathException
01642    *  Thrown if $data is an unsupported object type.
01643    */
01644   public function before($data) {
01645     $data = $this->prepareInsert($data);
01646     foreach ($this->matches as $m) {
01647       $ins = $data->cloneNode(TRUE);
01648       $m->parentNode->insertBefore($ins, $m);
01649     }
01650     
01651     return $this;
01652   }
01653   /**
01654    * Insert the current elements into the destination document.
01655    * The items are inserted before each element in the given QueryPath document.
01656    * That is, they will be siblings with the current elements.
01657    *
01658    * @param QueryPath $dest
01659    *  Destination QueryPath document.
01660    * @return QueryPath
01661    *  The current QueryPath object, unaltered. Only the destination QueryPath
01662    *  object is altered.
01663    * @see before()
01664    * @see insertAfter()
01665    * @see appendTo()
01666    * @throws QueryPathException
01667    *  Thrown if $data is an unsupported object type.
01668    */
01669   public function insertBefore(QueryPath $dest) {
01670     foreach ($this->matches as $m) $dest->before($m);
01671     return $this;
01672   }
01673   /**
01674    * Insert the contents of the current QueryPath after the nodes in the 
01675    * destination QueryPath object.
01676    *
01677    * @param QueryPath $dest
01678    *  Destination object where the current elements will be deposited.
01679    * @return QueryPath
01680    *  The present QueryPath, unaltered. Only the destination object is altered.
01681    * @see after()
01682    * @see insertBefore()
01683    * @see append()
01684    * @throws QueryPathException
01685    *  Thrown if $data is an unsupported object type.
01686    */
01687   public function insertAfter(QueryPath $dest) {
01688     foreach ($this->matches as $m) $dest->after($m);
01689     return $this;
01690   }
01691   /**
01692    * Insert the given data after each element in the current QueryPath object.
01693    *
01694    * This inserts the element as a peer to the currently matched elements.
01695    * Contrast this with {@link append()}, which inserts the data as children
01696    * of matched elements.
01697    *
01698    * @param mixed $data
01699    *  The data to be appended.
01700    * @return QueryPath
01701    *  The QueryPath object (with the items inserted).
01702    * @see before()
01703    * @see append()
01704    * @throws QueryPathException
01705    *  Thrown if $data is an unsupported object type.
01706    */
01707   public function after($data) {
01708     $data = $this->prepareInsert($data);
01709     foreach ($this->matches as $m) {
01710       $ins = $data->cloneNode(TRUE);
01711       if (isset($m->nextSibling)) 
01712         $m->parentNode->insertBefore($ins, $m->nextSibling);
01713       else
01714         $m->parentNode->appendChild($ins);
01715     }
01716     return $this;
01717   }
01718   /**
01719    * Replace the existing element(s) in the list with a new one.
01720    *
01721    * @param mixed $new
01722    *  A DOMElement or XML in a string. This will replace all elements
01723    *  currently wrapped in the QueryPath object.
01724    * @return QueryPath
01725    *  The QueryPath object wrapping <b>the items that were removed</b>.
01726    *  This remains consistent with the jQuery API.
01727    * @see append()
01728    * @see prepend()
01729    * @see before()
01730    * @see after()
01731    * @see remove()
01732    * @see replaceAll()
01733    */
01734   public function replaceWith($new) {
01735     $data = $this->prepareInsert($new);
01736     $found = new SplObjectStorage();
01737     foreach ($this->matches as $m) {
01738       $parent = $m->parentNode;
01739       $parent->insertBefore($data->cloneNode(TRUE), $m);
01740       $found->attach($parent->removeChild($m));
01741     }
01742     $this->setMatches($found);
01743     return $this;
01744   }
01745   /**
01746    * Remove the parent element from the selected node or nodes.
01747    *
01748    * This takes the given list of nodes and "unwraps" them, moving them out of their parent
01749    * node, and then deleting the parent node.
01750    *
01751    * For example, consider this:
01752    *
01753    * @code
01754    *   <root><wrapper><content/></wrapper></root>
01755    * @endcode
01756    * 
01757    * Now we can run this code:
01758    * @code
01759    *   qp($xml, 'content')->unwrap();
01760    * @endcode
01761    *
01762    * This will result in:
01763    *
01764    * @code
01765    *   <root><content/></root>
01766    * @endcode
01767    * This is the opposite of {@link wrap()}.
01768    *
01769    * <b>The root element cannot be unwrapped.</b> It has no parents.
01770    * If you attempt to use unwrap on a root element, this will throw a QueryPathException.
01771    * (You can, however, "Unwrap" a child that is a direct descendant of the root element. This
01772    * will remove the root element, and replace the child as the root element. Be careful, though.
01773    * You cannot set more than one child as a root element.)
01774    *
01775    * @return QueryPath
01776    *  The QueryPath object, with the same element(s) selected.
01777    * @throws QueryPathException
01778    *  An exception is thrown if one attempts to unwrap a root element.
01779    * @see wrap()
01780    * @since 2.1
01781    * @author mbutcher
01782    */
01783   public function unwrap() {
01784     
01785     // We do this in two loops in order to
01786     // capture the case where two matches are
01787     // under the same parent. Othwerwise we might
01788     // remove a match before we can move it.
01789     $parents = new SplObjectStorage();
01790     foreach ($this->matches as $m) {
01791       
01792       // Cannot unwrap the root element.
01793       if ($m->isSameNode($m->ownerDocument->documentElement)) {
01794         throw new QueryPathException('Cannot unwrap the root element.');
01795       }
01796       
01797       // Move children to peer of parent.
01798       $parent = $m->parentNode;
01799       $old = $parent->removeChild($m);
01800       $parent->parentNode->insertBefore($old, $parent);
01801       $parents->attach($parent);
01802     }
01803     
01804     // Now that all the children are moved, we 
01805     // remove all of the parents.
01806     foreach ($parents as $ele) {
01807       $ele->parentNode->removeChild($ele);
01808     }
01809     
01810     return $this;
01811   }
01812   /**
01813    * Wrap each element inside of the given markup.
01814    *
01815    * Markup is usually a string, but it can also be a DOMNode, a document
01816    * fragment, a SimpleXMLElement, or another QueryPath object (in which case
01817    * the first item in the list will be used.)
01818    *
01819    * @param mixed $markup 
01820    *  Markup that will wrap each element in the current list.
01821    * @return QueryPath
01822    *  The QueryPath object with the wrapping changes made.
01823    * @see wrapAll()
01824    * @see wrapInner()
01825    */
01826   public function wrap($markup) {
01827     $data = $this->prepareInsert($markup);
01828     
01829     // If the markup passed in is empty, we don't do any wrapping.
01830     if (empty($data)) {
01831       return $this;
01832     }
01833     
01834     foreach ($this->matches as $m) {
01835       $copy = $data->firstChild->cloneNode(TRUE);
01836       
01837       // XXX: Should be able to avoid doing this over and over.
01838       if ($copy->hasChildNodes()) {
01839         $deepest = $this->deepestNode($copy); 
01840         // FIXME: Does this need a different data structure?
01841         $bottom = $deepest[0];
01842       }
01843       else
01844         $bottom = $copy;
01845 
01846       $parent = $m->parentNode;
01847       $parent->insertBefore($copy, $m);
01848       $m = $parent->removeChild($m);
01849       $bottom->appendChild($m);
01850       //$parent->appendChild($copy);
01851     }
01852     return $this;  
01853   }
01854   /**
01855    * Wrap all elements inside of the given markup.
01856    *
01857    * So all elements will be grouped together under this single marked up 
01858    * item. This works by first determining the parent element of the first item
01859    * in the list. It then moves all of the matching elements under the wrapper
01860    * and inserts the wrapper where that first element was found. (This is in 
01861    * accordance with the way jQuery works.)
01862    *
01863    * Markup is usually XML in a string, but it can also be a DOMNode, a document
01864    * fragment, a SimpleXMLElement, or another QueryPath object (in which case
01865    * the first item in the list will be used.)
01866    * 
01867    * @param string $markup 
01868    *  Markup that will wrap all elements in the current list.
01869    * @return QueryPath
01870    *  The QueryPath object with the wrapping changes made.
01871    * @see wrap()
01872    * @see wrapInner()
01873    */
01874   public function wrapAll($markup) {
01875     if ($this->matches->count() == 0) return;
01876     
01877     $data = $this->prepareInsert($markup);
01878     
01879     if (empty($data)) {
01880       return $this;
01881     }
01882     
01883     if ($data->hasChildNodes()) {
01884       $deepest = $this->deepestNode($data); 
01885       // FIXME: Does this need fixing?
01886       $bottom = $deepest[0];
01887     }
01888     else
01889       $bottom = $data;
01890 
01891     $first = $this->getFirstMatch();
01892     $parent = $first->parentNode;
01893     $parent->insertBefore($data, $first);
01894     foreach ($this->matches as $m) {
01895       $bottom->appendChild($m->parentNode->removeChild($m));
01896     }
01897     return $this;
01898   }
01899   /**
01900    * Wrap the child elements of each item in the list with the given markup.
01901    *
01902    * Markup is usually a string, but it can also be a DOMNode, a document
01903    * fragment, a SimpleXMLElement, or another QueryPath object (in which case
01904    * the first item in the list will be used.)
01905    *
01906    * @param string $markup 
01907    *  Markup that will wrap children of each element in the current list.
01908    * @return QueryPath
01909    *  The QueryPath object with the wrapping changes made.
01910    * @see wrap()
01911    * @see wrapAll()
01912    */
01913   public function wrapInner($markup) {
01914     $data = $this->prepareInsert($markup);
01915     
01916     // No data? Short circuit.
01917     if (empty($data)) return $this;
01918     
01919     if ($data->hasChildNodes()) {
01920       $deepest = $this->deepestNode($data); 
01921       // FIXME: ???
01922       $bottom = $deepest[0];
01923     }
01924     else
01925       $bottom = $data;
01926       
01927     foreach ($this->matches as $m) {
01928       if ($m->hasChildNodes()) {
01929         while($m->firstChild) {
01930           $kid = $m->removeChild($m->firstChild);
01931           $bottom->appendChild($kid);
01932         }
01933       }
01934       $m->appendChild($data);
01935     }
01936     return $this; 
01937   }
01938   /**
01939    * Reduce the set of matches to the deepest child node in the tree.
01940    *
01941    * This loops through the matches and looks for the deepest child node of all of 
01942    * the matches. "Deepest", here, is relative to the nodes in the list. It is 
01943    * calculated as the distance from the starting node to the most distant child
01944    * node. In other words, it is not necessarily the farthest node from the root
01945    * element, but the farthest note from the matched element.
01946    *
01947    * In the case where there are multiple nodes at the same depth, all of the 
01948    * nodes at that depth will be included.
01949    *
01950    * @return QueryPath
01951    *  The QueryPath wrapping the single deepest node.
01952    */
01953   public function deepest() {
01954     $deepest = 0;
01955     $winner = new SplObjectStorage();
01956     foreach ($this->matches as $m) {
01957       $local_deepest = 0;
01958       $local_ele = $this->deepestNode($m, 0, NULL, $local_deepest);
01959       
01960       // Replace with the new deepest.
01961       if ($local_deepest > $deepest) {
01962         $winner = new SplObjectStorage();
01963         foreach ($local_ele as $lele) $winner->attach($lele);
01964         $deepest = $local_deepest;
01965       }
01966       // Augument with other equally deep elements.
01967       elseif ($local_deepest == $deepest) {
01968         foreach ($local_ele as $lele)
01969           $winner->attach($lele);
01970       }
01971     }
01972     $this->setMatches($winner);
01973     return $this;
01974   }
01975   
01976   /**
01977    * A depth-checking function. Typically, it only needs to be
01978    * invoked with the first parameter. The rest are used for recursion.
01979    * @see deepest();
01980    * @param DOMNode $ele
01981    *  The element.
01982    * @param int $depth
01983    *  The depth guage
01984    * @param mixed $current
01985    *  The current set.
01986    * @param DOMNode $deepest
01987    *  A reference to the current deepest node.
01988    * @return array
01989    *  Returns an array of DOM nodes.
01990    */
01991   protected function deepestNode(DOMNode $ele, $depth = 0, $current = NULL, &$deepest = NULL) {
01992     // FIXME: Should this use SplObjectStorage?
01993     if (!isset($current)) $current = array($ele);
01994     if (!isset($deepest)) $deepest = $depth;
01995     if ($ele->hasChildNodes()) {
01996       foreach ($ele->childNodes as $child) {
01997         if ($child->nodeType === XML_ELEMENT_NODE) {
01998           $current = $this->deepestNode($child, $depth + 1, $current, $deepest);
01999         }
02000       }
02001     }
02002     elseif ($depth > $deepest) {
02003       $current = array($ele);
02004       $deepest = $depth;
02005     }
02006     elseif ($depth === $deepest) {
02007       $current[] = $ele;
02008     }
02009     return $current;
02010   }
02011   
02012   /**
02013    * Prepare an item for insertion into a DOM.
02014    *
02015    * This handles a variety of boilerplate tasks that need doing before an 
02016    * indeterminate object can be inserted into a DOM tree.
02017    * - If item is a string, this is converted into a document fragment and returned.
02018    * - If item is a QueryPath, then the first item is retrieved and this call function
02019    *   is called recursivel.
02020    * - If the item is a DOMNode, it is imported into the current DOM if necessary.
02021    * - If the item is a SimpleXMLElement, it is converted into a DOM node and then
02022    *   imported.
02023    *
02024    * @param mixed $item
02025    *  Item to prepare for insert.
02026    * @return mixed
02027    *  Returns the prepared item.
02028    * @throws QueryPathException
02029    *  Thrown if the object passed in is not of a supprted object type.
02030    */
02031   protected function prepareInsert($item) {
02032     if(empty($item)) {
02033       return;
02034     }
02035     elseif (is_string($item)) {
02036       // If configured to do so, replace all entities.
02037       if ($this->options['replace_entities']) {
02038         $item = QueryPathEntities::replaceAllEntities($item);
02039       }
02040       
02041       $frag = $this->document->createDocumentFragment();
02042       try {
02043         set_error_handler(array('QueryPathParseException', 'initializeFromError'), $this->errTypes);
02044         $frag->appendXML($item);
02045       }
02046       // Simulate a finally block.
02047       catch (Exception $e) {
02048         restore_error_handler();
02049         throw $e;
02050       }
02051       restore_error_handler();
02052       return $frag;
02053     }
02054     elseif ($item instanceof QueryPath) {
02055       if ($item->size() == 0) 
02056         return;
02057         
02058       return $this->prepareInsert($item->get(0));
02059     }
02060     elseif ($item instanceof DOMNode) {
02061       if ($item->ownerDocument !== $this->document) {
02062         // Deep clone this and attach it to this document
02063         $item = $this->document->importNode($item, TRUE);
02064       }
02065       return $item;
02066     }
02067     elseif ($item instanceof SimpleXMLElement) {
02068       $element = dom_import_simplexml($item);
02069       return $this->document->importNode($element, TRUE);
02070     }
02071     // What should we do here?
02072     //var_dump($item);
02073     throw new QueryPathException("Cannot prepare item of unsupported type: " . gettype($item));
02074   }
02075   /**
02076    * The tag name of the first element in the list.
02077    *
02078    * This returns the tag name of the first element in the list of matches. If
02079    * the list is empty, an empty string will be used.
02080    *
02081    * @see replaceAll()
02082    * @see replaceWith()
02083    * @return string
02084    *  The tag name of the first element in the list.
02085    */
02086   public function tag() {
02087     return ($this->size() > 0) ? $this->getFirstMatch()->tagName : '';
02088   }
02089   /**
02090    * Remove any items from the list if they match the selector.
02091    *
02092    * In other words, each item that matches the selector will be remove 
02093    * from the DOM document. The returned QueryPath wraps the list of 
02094    * removed elements.
02095    *
02096    * If no selector is specified, this will remove all current matches from
02097    * the document.
02098    *
02099    * @param string $selector
02100    *  A CSS Selector.
02101    * @return QueryPath
02102    *  The Query path wrapping a list of removed items.
02103    * @see replaceAll()
02104    * @see replaceWith()
02105    * @see removeChildren()
02106    */
02107   public function remove($selector = NULL) {
02108     if(!empty($selector)) {
02109       // Do a non-destructive find.
02110       $query = new QueryPathCssEventHandler($this->matches);
02111       $query->find($selector);
02112       $matches = $query->getMatches();
02113     }
02114     else {
02115       $matches = $this->matches;
02116     }
02117 
02118     $found = new SplObjectStorage();
02119     foreach ($matches as $item) {
02120       // The item returned is (according to docs) different from 
02121       // the one passed in, so we have to re-store it.
02122       $found->attach($item->parentNode->removeChild($item));
02123     }
02124     
02125     // Return a clone QueryPath with just the removed items.
02126     return new QueryPath($found);
02127   }
02128   /**
02129    * This replaces everything that matches the selector with the first value
02130    * in the current list.
02131    *
02132    * This is the reverse of replaceWith.
02133    *
02134    * Unlike jQuery, QueryPath cannot assume a default document. Consequently,
02135    * you must specify the intended destination document. If it is omitted, the
02136    * present document is assumed to be tthe document. However, that can result
02137    * in undefined behavior if the selector and the replacement are not sufficiently
02138    * distinct.
02139    *
02140    * @param string $selector
02141    *  The selector.
02142    * @param DOMDocument $document
02143    *  The destination document.
02144    * @return QueryPath
02145    *  The QueryPath wrapping the modified document.
02146    * @deprecated Due to the fact that this is not a particularly friendly method,
02147    *  and that it can be easily replicated using {@see replaceWith()}, it is to be 
02148    *  considered deprecated.
02149    * @see remove()
02150    * @see replaceWith()
02151    */
02152   public function replaceAll($selector, DOMDocument $document) {
02153     $replacement = $this->size() > 0 ? $this->getFirstMatch() : $this->document->createTextNode('');
02154     
02155     $c = new QueryPathCssEventHandler($document);
02156     $c->find($selector);
02157     $temp = $c->getMatches();
02158     foreach ($temp as $item) {
02159       $node = $replacement->cloneNode();
02160       $node = $document->importNode($node);
02161       $item->parentNode->replaceChild($node, $item);
02162     }
02163     return qp($document, NULL, $this->options);
02164   }
02165   /**
02166    * Add more elements to the current set of matches.
02167    *
02168    * This begins the new query at the top of the DOM again. The results found
02169    * when running this selector are then merged into the existing results. In
02170    * this way, you can add additional elements to the existing set.
02171    *
02172    * @param string $selector
02173    *  A valid selector.
02174    * @return QueryPath
02175    *  The QueryPath object with the newly added elements.
02176    * @see append()
02177    * @see after()
02178    * @see andSelf()
02179    * @see end()
02180    */
02181   public function add($selector) {
02182     
02183     // This is destructive, so we need to set $last:
02184     $this->last = $this->matches;
02185     
02186     foreach (qp($this->document, $selector, $this->options)->get() as $item)
02187       $this->matches->attach($item);
02188     return $this;
02189   }
02190   /**
02191    * Revert to the previous set of matches.
02192    *
02193    * This will revert back to the last set of matches (before the last 
02194    * "destructive" set of operations). This undoes any change made to the set of
02195    * matched objects. Functions like find() and filter() change the 
02196    * list of matched objects. The end() function will revert back to the last set of
02197    * matched items.
02198    *
02199    * Note that functions that modify the document, but do not change the list of 
02200    * matched objects, are not "destructive". Thus, calling append('something')->end()
02201    * will not undo the append() call.
02202    *
02203    * Only one level of changes is stored. Reverting beyond that will result in 
02204    * an empty set of matches. Example:
02205    *
02206    * @code
02207    * // The line below returns the same thing as qp(document, 'p');
02208    * qp(document, 'p')->find('div')->end();
02209    * // This returns an empty array:
02210    * qp(document, 'p')->end();
02211    * // This returns an empty array:
02212    * qp(document, 'p')->find('div')->find('span')->end()->end();
02213    * @endcode
02214    *
02215    * The last one returns an empty array because only one level of changes is stored.
02216    *
02217    * @return QueryPath
02218    *  A QueryPath object reflecting the list of matches prior to the last destructive
02219    *  operation.
02220    * @see andSelf()
02221    * @see add()
02222    */
02223   public function end() {
02224     // Note that this does not use setMatches because it must set the previous
02225     // set of matches to empty array.
02226     $this->matches = $this->last;
02227     $this->last = new SplObjectStorage();
02228     return $this;
02229   }
02230   /**
02231    * Combine the current and previous set of matched objects.
02232    *
02233    * Example:
02234    *
02235    * @code
02236    * qp(document, 'p')->find('div')->andSelf();
02237    * @endcode
02238    *
02239    * The code above will contain a list of all p elements and all div elements that 
02240    * are beneath p elements.
02241    *
02242    * @see end();
02243    * @return QueryPath
02244    *  A QueryPath object with the results of the last two "destructive" operations.
02245    * @see add()
02246    * @see end()
02247    */
02248   public function andSelf() {
02249     // This is destructive, so we need to set $last:
02250     $last = $this->matches;
02251     
02252     foreach ($this->last as $item) $this->matches->attach($item);
02253     
02254     $this->last = $last;
02255     return $this;
02256   }
02257   /**
02258    * Remove all child nodes.
02259    *
02260    * This is equivalent to jQuery's empty() function. (However, empty() is a 
02261    * PHP built-in, and cannot be used as a method name.)
02262    *
02263    * @return QueryPath
02264    *  The QueryPath object with the child nodes removed.
02265    * @see replaceWith()
02266    * @see replaceAll()
02267    * @see remove()
02268    */
02269   public function removeChildren() {
02270     foreach ($this->matches as $m) {
02271       while($kid = $m->firstChild) {
02272         $m->removeChild($kid);
02273       }
02274     }
02275     return $this;
02276   }
02277   /**
02278    * Get the children of the elements in the QueryPath object.
02279    *
02280    * If a selector is provided, the list of children will be filtered through
02281    * the selector.
02282    *
02283    * @param string $selector
02284    *  A valid selector.
02285    * @return QueryPath
02286    *  A QueryPath wrapping all of the children.
02287    * @see removeChildren()
02288    * @see parent()
02289    * @see parents()
02290    * @see next()
02291    * @see prev()
02292    */
02293   public function children($selector = NULL) {
02294     $found = new SplObjectStorage();
02295     foreach ($this->matches as $m) {
02296       foreach($m->childNodes as $c) {
02297         if ($c->nodeType == XML_ELEMENT_NODE) $found->attach($c);
02298       }
02299     }
02300     if (empty($selector)) {
02301       $this->setMatches($found);
02302     }
02303     else {
02304       $this->matches = $found; // Don't buffer this. It is temporary.
02305       $this->filter($selector);
02306     }
02307     return $this;
02308   }
02309   /**
02310    * Get all child nodes (not just elements) of all items in the matched set.
02311    *
02312    * It gets only the immediate children, not all nodes in the subtree.
02313    *
02314    * This does not process iframes. Xinclude processing is dependent on the 
02315    * DOM implementation and configuration.
02316    *
02317    * @return QueryPath
02318    *  A QueryPath object wrapping all child nodes for all elements in the 
02319    *  QueryPath object.
02320    * @see find()
02321    * @see text()
02322    * @see html()
02323    * @see innerHTML()
02324    * @see xml()
02325    * @see innerXML()
02326    */
02327   public function contents() {
02328     $found = new SplObjectStorage();
02329     foreach ($this->matches as $m) {
02330       if (empty($m->childNodes)) continue; // Issue #51
02331       foreach ($m->childNodes as $c) {
02332         $found->attach($c);
02333       }
02334     }
02335     $this->setMatches($found);
02336     return $this;
02337   }
02338   /**
02339    * Get a list of siblings for elements currently wrapped by this object.
02340    *
02341    * This will compile a list of every sibling of every element in the 
02342    * current list of elements. 
02343    *
02344    * Note that if two siblings are present in the QueryPath object to begin with,
02345    * then both will be returned in the matched set, since they are siblings of each 
02346    * other. In other words,if the matches contain a and b, and a and b are siblings of 
02347    * each other, than running siblings will return a set that contains 
02348    * both a and b.
02349    *
02350    * @param string $selector
02351    *  If the optional selector is provided, siblings will be filtered through
02352    *  this expression.
02353    * @return QueryPath
02354    *  The QueryPath containing the matched siblings.
02355    * @see contents()
02356    * @see children()
02357    * @see parent()
02358    * @see parents()
02359    */
02360   public function siblings($selector = NULL) {
02361     $found = new SplObjectStorage();
02362     foreach ($this->matches as $m) {
02363       $parent = $m->parentNode;
02364       foreach ($parent->childNodes as $n) {
02365         if ($n->nodeType == XML_ELEMENT_NODE && $n !== $m) {
02366           $found->attach($n);
02367         }
02368       }
02369     }
02370     if (empty($selector)) {
02371       $this->setMatches($found);
02372     }
02373     else {
02374       $this->matches = $found; // Don't buffer this. It is temporary.
02375       $this->filter($selector);
02376     }
02377     return $this;
02378   }
02379   /**
02380    * Find the closest element matching the selector.
02381    *
02382    * This finds the closest match in the ancestry chain. It first checks the 
02383    * present element. If the present element does not match, this traverses up 
02384    * the ancestry chain (e.g. checks each parent) looking for an item that matches.
02385    *
02386    * It is provided for jQuery 1.3 compatibility.
02387    * @param string $selector
02388    *  A CSS Selector to match.
02389    * @return QueryPath
02390    *  The set of matches.
02391    * @since 2.0
02392    */
02393   public function closest($selector) {
02394     $found = new SplObjectStorage();
02395     foreach ($this->matches as $m) {
02396       
02397       if (qp($m, NULL, $this->options)->is($selector) > 0) {
02398         $found->attach($m);
02399       }
02400       else {
02401         while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) {
02402           $m = $m->parentNode;
02403           // Is there any case where parent node is not an element?
02404           if ($m->nodeType === XML_ELEMENT_NODE && qp($m, NULL, $this->options)->is($selector) > 0) {
02405             $found->attach($m);
02406             break;
02407           }
02408         }
02409       }
02410       
02411     }
02412     $this->setMatches($found);
02413     return $this;
02414   }
02415   /**
02416    * Get the immediate parent of each element in the QueryPath.
02417    *
02418    * If a selector is passed, this will return the nearest matching parent for
02419    * each element in the QueryPath.
02420    *
02421    * @param string $selector
02422    *  A valid CSS3 selector.
02423    * @return QueryPath
02424    *  A QueryPath object wrapping the matching parents.
02425    * @see children()
02426    * @see siblings()
02427    * @see parents()
02428    */
02429   public function parent($selector = NULL) {
02430     $found = new SplObjectStorage();
02431     foreach ($this->matches as $m) {
02432       while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) {
02433         $m = $m->parentNode;
02434         // Is there any case where parent node is not an element?
02435         if ($m->nodeType === XML_ELEMENT_NODE) {
02436           if (!empty($selector)) {
02437             if (qp($m, NULL, $this->options)->is($selector) > 0) {
02438               $found->attach($m);
02439               break;
02440             }
02441           }
02442           else {
02443             $found->attach($m);
02444             break;
02445           }
02446         }
02447       }
02448     }
02449     $this->setMatches($found);
02450     return $this;
02451   }
02452   /**
02453    * Get all ancestors of each element in the QueryPath.
02454    * 
02455    * If a selector is present, only matching ancestors will be retrieved.
02456    *
02457    * @see parent()
02458    * @param string $selector
02459    *  A valid CSS 3 Selector.
02460    * @return QueryPath
02461    *  A QueryPath object containing the matching ancestors.
02462    * @see siblings()
02463    * @see children()
02464    */
02465   public function parents($selector = NULL) {
02466     $found = new SplObjectStorage();
02467     foreach ($this->matches as $m) {
02468       while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) {
02469         $m = $m->parentNode;
02470         // Is there any case where parent node is not an element?
02471         if ($m->nodeType === XML_ELEMENT_NODE) {
02472           if (!empty($selector)) {
02473             if (qp($m, NULL, $this->options)->is($selector) > 0)
02474               $found->attach($m);
02475           }
02476           else 
02477             $found->attach($m);
02478         }
02479       }
02480     }
02481     $this->setMatches($found);
02482     return $this;
02483   }
02484   /**
02485    * Set or get the markup for an element.
02486    * 
02487    * If $markup is set, then the giving markup will be injected into each
02488    * item in the set. All other children of that node will be deleted, and this
02489    * new code will be the only child or children. The markup MUST BE WELL FORMED.
02490    *
02491    * If no markup is given, this will return a string representing the child 
02492    * markup of the first node.
02493    *
02494    * <b>Important:</b> This differs from jQuery's html() function. This function
02495    * returns <i>the current node</i> and all of its children. jQuery returns only
02496    * the children. This means you do not need to do things like this: 
02497    * @code$qp->parent()->html()@endcode.
02498    *
02499    * By default, this is HTML 4.01, not XHTML. Use {@link xml()} for XHTML.
02500    *
02501    * @param string $markup
02502    *  The text to insert.
02503    * @return mixed
02504    *  A string if no markup was passed, or a QueryPath if markup was passed.
02505    * @see xml()
02506    * @see text()
02507    * @see contents()
02508    */
02509   public function html($markup = NULL) {
02510     if (isset($markup)) {
02511       
02512       if ($this->options['replace_entities']) {
02513         $markup = QueryPathEntities::replaceAllEntities($markup);
02514       }
02515       
02516       // Parse the HTML and insert it into the DOM
02517       //$doc = DOMDocument::loadHTML($markup);
02518       $doc = $this->document->createDocumentFragment();
02519       $doc->appendXML($markup);
02520       $this->removeChildren();
02521       $this->append($doc);
02522       return $this;
02523     }
02524     $length = $this->size();
02525     if ($length == 0) {
02526       return NULL;
02527     }
02528     // Only return the first item -- that's what JQ does.
02529     $first = $this->getFirstMatch();
02530 
02531     // Catch cases where first item is not a legit DOM object.
02532     if (!($first instanceof DOMNode)) {
02533       return NULL;
02534     }
02535     
02536     // Added by eabrand.
02537     if(!$first->ownerDocument->documentElement) {
02538       return NULL;
02539     }
02540     
02541     if ($first instanceof DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) {
02542       return $this->document->saveHTML();
02543     }
02544     // saveHTML cannot take a node and serialize it.
02545     return $this->document->saveXML($first);
02546   }
02547   
02548   /**
02549    * Fetch the HTML contents INSIDE of the first QueryPath item.
02550    *
02551    * <b>This behaves the way jQuery's @codehtml()@endcode function behaves.</b>
02552    *
02553    * This gets all children of the first match in QueryPath. 
02554    *
02555    * Consider this fragment:
02556    * @code
02557    * <div>
02558    * test <p>foo</p> test
02559    * </div>
02560    * @endcode
02561    *
02562    * We can retrieve just the contents of this code by doing something like
02563    * this:
02564    * @code
02565    * qp($xml, 'div')->innerHTML();
02566    * @endcode
02567    *
02568    * This would return the following:
02569    * @codetest <p>foo</p> test@endcode
02570    *
02571    * @return string
02572    *  Returns a string representation of the child nodes of the first
02573    *  matched element.
02574    * @see html()
02575    * @see innerXML()
02576    * @see innerXHTML()
02577    * @since 2.0
02578    */
02579   public function innerHTML() {
02580     return $this->innerXML();
02581   } 
02582   
02583   /**
02584    * Fetch child (inner) nodes of the first match.
02585    *
02586    * This will return the children of the present match. For an example, 
02587    * see {@link innerHTML()}.
02588    *
02589    * @see innerHTML()
02590    * @see innerXML()
02591    * @return string
02592    *  Returns a string of XHTML that represents the children of the present
02593    *  node.
02594    * @since 2.0
02595    */
02596   public function innerXHTML() {
02597     $length = $this->size();
02598     if ($length == 0) {
02599       return NULL;
02600     }
02601     // Only return the first item -- that's what JQ does.
02602     $first = $this->getFirstMatch();
02603 
02604     // Catch cases where first item is not a legit DOM object.
02605     if (!($first instanceof DOMNode)) {
02606       return NULL;
02607     }
02608     elseif (!$first->hasChildNodes()) {
02609       return '';
02610     }
02611     
02612     $buffer = '';
02613     foreach ($first->childNodes as $child) {
02614       $buffer .= $this->document->saveXML($child, LIBXML_NOEMPTYTAG);
02615     }
02616     
02617     return $buffer;
02618   }
02619   
02620   /**
02621    * Fetch child (inner) nodes of the first match.
02622    *
02623    * This will return the children of the present match. For an example, 
02624    * see {@link innerHTML()}.
02625    *
02626    * @see innerHTML()
02627    * @see innerXHTML()
02628    * @return string
02629    *  Returns a string of XHTML that represents the children of the present
02630    *  node.
02631    * @since 2.0
02632    */
02633   public function innerXML() {
02634     $length = $this->size();
02635     if ($length == 0) {
02636       return NULL;
02637     }
02638     // Only return the first item -- that's what JQ does.
02639     $first = $this->getFirstMatch();
02640 
02641     // Catch cases where first item is not a legit DOM object.
02642     if (!($first instanceof DOMNode)) {
02643       return NULL;
02644     }
02645     elseif (!$first->hasChildNodes()) {
02646       return '';
02647     }
02648     
02649     $buffer = '';
02650     foreach ($first->childNodes as $child) {
02651       $buffer .= $this->document->saveXML($child);
02652     }
02653     
02654     return $buffer;
02655   }
02656   
02657   /**
02658    * Retrieve the text of each match and concatenate them with the given separator.
02659    *
02660    * This has the effect of looping through all children, retrieving their text
02661    * content, and then concatenating the text with a separator.
02662    *
02663    * @param string $sep
02664    *  The string used to separate text items. The default is a comma followed by a
02665    *  space.
02666    * @param boolean $filterEmpties
02667    *  If this is true, empty items will be ignored.
02668    * @return string
02669    *  The text contents, concatenated together with the given separator between
02670    *  every pair of items.
02671    * @see implode()
02672    * @see text()
02673    * @since 2.0
02674    */
02675   public function textImplode($sep = ', ', $filterEmpties = TRUE) {
02676     $tmp = array(); 
02677     foreach ($this->matches as $m) {
02678       $txt = $m->textContent;
02679       $trimmed = trim($txt);
02680       // If filter empties out, then we only add items that have content.
02681       if ($filterEmpties) {
02682         if (strlen($trimmed) > 0) $tmp[] = $txt;
02683       }
02684       // Else add all content, even if it's empty.
02685       else {
02686         $tmp[] = $txt;
02687       }
02688     }
02689     return implode($sep, $tmp);
02690   }
02691   /**
02692    * Get the text contents from just child elements.
02693    *
02694    * This is a specialized variant of textImplode() that implodes text for just the
02695    * child elements of the current element.
02696    *
02697    * @param string $separator
02698    *  The separator that will be inserted between found text content.
02699    * @return string
02700    *  The concatenated values of all children.
02701    */
02702   function childrenText($separator = ' ') {
02703     // Branch makes it non-destructive.
02704     return $this->branch()->xpath('descendant::text()')->textImplode($separator);
02705   }
02706   /**
02707    * Get or set the text contents of a node.
02708    * @param string $text
02709    *  If this is not NULL, this value will be set as the text of the node. It
02710    *  will replace any existing content.
02711    * @return mixed 
02712    *  A QueryPath if $text is set, or the text content if no text
02713    *  is passed in as a pram.
02714    * @see html()
02715    * @see xml()
02716    * @see contents()
02717    */
02718   public function text($text = NULL) {
02719     if (isset($text)) {
02720       $this->removeChildren();
02721       $textNode = $this->document->createTextNode($text);
02722       foreach ($this->matches as $m) $m->appendChild($textNode);
02723       return $this;
02724     }
02725     // Returns all text as one string:
02726     $buf = '';
02727     foreach ($this->matches as $m) $buf .= $m->textContent;
02728     return $buf;
02729   }
02730   /**
02731    * Get or set the text before each selected item.
02732    *
02733    * If $text is passed in, the text is inserted before each currently selected item.
02734    *
02735    * If no text is given, this will return the concatenated text after each selected element.
02736    *
02737    * @code
02738    * <?php
02739    * $xml = '<?xml version="1.0"?><root>Foo<a>Bar</a><b/></root>';
02740    * 
02741    * // This will return 'Foo'
02742    * qp($xml, 'a')->textBefore();
02743    *
02744    * // This will insert 'Baz' right before <b/>.
02745    * qp($xml, 'b')->textBefore('Baz');
02746    * ?>
02747    * @endcode
02748    *
02749    * @param string $text
02750    *  If this is set, it will be inserted before each node in the current set of
02751    *  selected items.
02752    * @return mixed
02753    *  Returns the QueryPath object if $text was set, and returns a string (possibly empty)
02754    *  if no param is passed.
02755    */
02756   public function textBefore($text = NULL) {
02757     if (isset($text)) {
02758       $textNode = $this->document->createTextNode($text);
02759       return $this->before($textNode);
02760     }
02761     $buffer = '';
02762     foreach ($this->matches as $m) {
02763       $p = $m;
02764       while (isset($p->previousSibling) && $p->previousSibling->nodeType == XML_TEXT_NODE) {
02765         $p = $p->previousSibling;
02766         $buffer .= $p->textContent;
02767       }
02768     }
02769     return $buffer;
02770   }
02771   
02772   public function textAfter($text = NULL) {
02773     if (isset($text)) {
02774       $textNode = $this->document->createTextNode($text);
02775       return $this->after($textNode);
02776     }
02777     $buffer = '';
02778     foreach ($this->matches as $m) {
02779       $n = $m;
02780       while (isset($n->nextSibling) && $n->nextSibling->nodeType == XML_TEXT_NODE) {
02781         $n = $n->nextSibling;
02782         $buffer .= $n->textContent;
02783       }
02784     }
02785     return $buffer;
02786   }
02787   
02788   /**
02789    * Set or get the value of an element's 'value' attribute.
02790    *
02791    * The 'value' attribute is common in HTML form elements. This is a 
02792    * convenience function for accessing the values. Since this is not  common
02793    * task on the server side, this method may be removed in future releases. (It 
02794    * is currently provided for jQuery compatibility.)
02795    *
02796    * If a value is provided in the params, then the value will be set for all 
02797    * matches. If no params are given, then the value of the first matched element
02798    * will be returned. This may be NULL.
02799    *
02800    * @deprecated Just use attr(). There's no reason to use this on the server.
02801    * @see attr()
02802    * @param string $value
02803    * @return mixed
02804    *  Returns a QueryPath if a string was passed in, and a string if no string
02805    *  was passed in. In the later case, an error will produce NULL.
02806    */
02807   public function val($value = NULL) {
02808     if (isset($value)) {
02809       $this->attr('value', $value);
02810       return $this;
02811     }
02812     return $this->attr('value');
02813   }
02814   /**
02815    * Set or get XHTML markup for an element or elements.
02816    * 
02817    * This differs from {@link html()} in that it processes (and produces)
02818    * strictly XML 1.0 compliant markup.
02819    *
02820    * Like {@link xml()} and {@link html()}, this functions as both a 
02821    * setter and a getter.
02822    *
02823    * This is a convenience function for fetching HTML in XML format.
02824    * It does no processing of the markup (such as schema validation).
02825    * @param string $markup
02826    *  A string containing XML data.
02827    * @return mixed
02828    *  If markup is passed in, a QueryPath is returned. If no markup is passed
02829    *  in, XML representing the first matched element is returned.
02830    * @see html()
02831    * @see innerXHTML()
02832    */
02833   public function xhtml($markup = NULL) {
02834     
02835     // XXX: This is a minor reworking of the original xml() method.
02836     // This should be refactored, probably.
02837     // See http://github.com/technosophos/querypath/issues#issue/10
02838    
02839     $omit_xml_decl = $this->options['omit_xml_declaration'];
02840     if ($markup === TRUE) {
02841       // Basically, we handle the special case where we don't
02842       // want the XML declaration to be displayed.
02843       $omit_xml_decl = TRUE;
02844     }
02845     elseif (isset($markup)) {
02846       return $this->xml($markup);
02847     }
02848     
02849     $length = $this->size();
02850     if ($length == 0) {
02851       return NULL;
02852     }
02853     
02854     // Only return the first item -- that's what JQ does.
02855     $first = $this->getFirstMatch();
02856     // Catch cases where first item is not a legit DOM object.
02857     if (!($first instanceof DOMNode)) {
02858       return NULL;
02859     }
02860     
02861     if ($first instanceof DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) {
02862       
02863       // Has the unfortunate side-effect of stripping doctype.
02864       //$text = ($omit_xml_decl ? $this->document->saveXML($first->ownerDocument->documentElement, LIBXML_NOEMPTYTAG) : $this->document->saveXML(NULL, LIBXML_NOEMPTYTAG));
02865       $text = $this->document->saveXML(NULL, LIBXML_NOEMPTYTAG);
02866     }
02867     else {
02868       $text = $this->document->saveXML($first, LIBXML_NOEMPTYTAG);
02869     }
02870     
02871     // Issue #47: Using the old trick for removing the XML tag also removed the 
02872     // doctype. So we remove it with a regex:
02873     if ($omit_xml_decl) {
02874       $text = preg_replace('/<\?xml\s[^>]*\?>/', '', $text);
02875     }
02876     
02877     // This is slightly lenient: It allows for cases where code incorrectly places content
02878     // inside of these supposedly unary elements.
02879     $unary = '/<(area|base|basefont|br|col|frame|hr|img|input|isindex|link|meta|param)(?(?=\s)([^>\/]+))><\/[^>]*>/i';
02880     $text = preg_replace($unary, '<\\1\\2 />', $text);
02881     
02882     // Experimental: Support for enclosing CDATA sections with comments to be both XML compat
02883     // and HTML 4/5 compat
02884     $cdata = '/(<!\[CDATA\[|\]\]>)/i';
02885     $replace = $this->options['escape_xhtml_js_css_sections'];
02886     $text = preg_replace($cdata, $replace, $text);
02887     
02888     return $text;
02889   }
02890   /**
02891    * Set or get the XML markup for an element or elements.
02892    *
02893    * Like {@link html()}, this functions in both a setter and a getter mode.
02894    * 
02895    * In setter mode, the string passed in will be parsed and then appended to the 
02896    * elements wrapped by this QueryPath object.When in setter mode, this parses 
02897    * the XML using the DOMFragment parser. For that reason, an XML declaration 
02898    * is not necessary.
02899    *
02900    * In getter mode, the first element wrapped by this QueryPath object will be 
02901    * converted to an XML string and returned.
02902    *
02903    * @param string $markup
02904    *  A string containing XML data.
02905    * @return mixed
02906    *  If markup is passed in, a QueryPath is returned. If no markup is passed
02907    *  in, XML representing the first matched element is returned.
02908    * @see xhtml()
02909    * @see html()
02910    * @see text()
02911    * @see content()
02912    * @see innerXML()
02913    */
02914   public function xml($markup = NULL) {
02915     $omit_xml_decl = $this->options['omit_xml_declaration'];
02916     if ($markup === TRUE) {
02917       // Basically, we handle the special case where we don't
02918       // want the XML declaration to be displayed.
02919       $omit_xml_decl = TRUE;
02920     }
02921     elseif (isset($markup)) {
02922       if ($this->options['replace_entities']) {
02923         $markup = QueryPathEntities::replaceAllEntities($markup);
02924       }
02925       $doc = $this->document->createDocumentFragment();
02926       $doc->appendXML($markup);
02927       $this->removeChildren();
02928       $this->append($doc);
02929       return $this;
02930     }
02931     $length = $this->size();
02932     if ($length == 0) {
02933       return NULL;
02934     }
02935     // Only return the first item -- that's what JQ does.
02936     $first = $this->getFirstMatch();
02937     
02938     // Catch cases where first item is not a legit DOM object.
02939     if (!($first instanceof DOMNode)) {
02940       return NULL;
02941     }
02942     
02943     if ($first instanceof DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) {
02944       
02945       return  ($omit_xml_decl ? $this->document->saveXML($first->ownerDocument->documentElement) : $this->document->saveXML());
02946     }
02947     return $this->document->saveXML($first);
02948   }
02949   /**
02950    * Send the XML document to the client.
02951    * 
02952    * Write the document to a file path, if given, or 
02953    * to stdout (usually the client).
02954    *
02955    * This prints the entire document.
02956    *
02957    * @param string $path
02958    *  The path to the file into which the XML should be written. if 
02959    *  this is NULL, data will be written to STDOUT, which is usually
02960    *  sent to the remote browser.
02961    * @param int $options
02962    *  (As of QueryPath 2.1) Pass libxml options to the saving mechanism.
02963    * @return QueryPath
02964    *  The QueryPath object, unmodified.
02965    * @see xml()
02966    * @see innerXML()
02967    * @see writeXHTML()
02968    * @throws Exception 
02969    *  In the event that a file cannot be written, an Exception will be thrown.
02970    */
02971   public function writeXML($path = NULL, $options = NULL) {
02972     if ($path == NULL) {
02973       print $this->document->saveXML(NULL, $options);
02974     }
02975     else {
02976       try {
02977         set_error_handler(array('QueryPathIOException', 'initializeFromError'));
02978         $this->document->save($path, $options);
02979       }
02980       catch (Exception $e) {
02981         restore_error_handler();
02982         throw $e;
02983       }
02984       restore_error_handler();
02985     }
02986     return $this;
02987   }
02988   /**
02989    * Writes HTML to output.
02990    *
02991    * HTML is formatted as HTML 4.01, without strict XML unary tags. This is for
02992    * legacy HTML content. Modern XHTML should be written using {@link toXHTML()}.
02993    * 
02994    * Write the document to stdout (usually the client) or to a file.
02995    *
02996    * @param string $path
02997    *  The path to the file into which the XML should be written. if 
02998    *  this is NULL, data will be written to STDOUT, which is usually
02999    *  sent to the remote browser.
03000    * @return QueryPath
03001    *  The QueryPath object, unmodified.
03002    * @see html()
03003    * @see innerHTML()
03004    * @throws Exception 
03005    *  In the event that a file cannot be written, an Exception will be thrown.
03006    */
03007   public function writeHTML($path = NULL) {
03008     if ($path == NULL) {
03009       print $this->document->saveHTML();
03010     }
03011     else {
03012       try {
03013         set_error_handler(array('QueryPathParseException', 'initializeFromError'));
03014         $this->document->saveHTMLFile($path);
03015       }
03016       catch (Exception $e) {
03017         restore_error_handler();
03018         throw $e;
03019       }
03020       restore_error_handler();
03021     }
03022     return $this;
03023   }
03024   
03025   /**
03026    * Write an XHTML file to output.
03027    *
03028    * Typically, you should use this instead of {@link writeHTML()}.
03029    *
03030    * Currently, this functions identically to {@link toXML()} <i>except that</i>
03031    * it always uses closing tags (e.g. always @code<script></script>@endcode, 
03032    * never @code<script/>@endcode). It will
03033    * write the file as well-formed XML. No XHTML schema validation is done.
03034    *
03035    * @see writeXML()
03036    * @see xml()
03037    * @see writeHTML()
03038    * @see innerXHTML()
03039    * @see xhtml()
03040    * @param string $path
03041    *  The filename of the file to write to.
03042    * @return QueryPath
03043    *  Returns the QueryPath, unmodified.
03044    * @throws Exception
03045    *  In the event that the output file cannot be written, an exception is
03046    *  thrown.
03047    * @since 2.0
03048    */
03049   public function writeXHTML($path = NULL) {
03050     return $this->writeXML($path, LIBXML_NOEMPTYTAG);
03051     /*
03052     if ($path == NULL) {
03053       print $this->document->saveXML(NULL, LIBXML_NOEMPTYTAG);
03054     }
03055     else {
03056       try {
03057         set_error_handler(array('QueryPathIOException', 'initializeFromError'));
03058         $this->document->save($path, LIBXML_NOEMPTYTAG);
03059       }
03060       catch (Exception $e) {
03061         restore_error_handler();
03062         throw $e;
03063       }
03064       restore_error_handler();
03065     }
03066     return $this;
03067     */
03068   }
03069   /**
03070    * Get the next sibling of each element in the QueryPath.
03071    *
03072    * If a selector is provided, the next matching sibling will be returned.
03073    *
03074    * @param string $selector
03075    *  A CSS3 selector.
03076    * @return QueryPath
03077    *  The QueryPath object.
03078    * @see nextAll()
03079    * @see prev()
03080    * @see children()
03081    * @see contents()
03082    * @see parent()
03083    * @see parents()
03084    */
03085   public function next($selector = NULL) {
03086     $found = new SplObjectStorage();
03087     foreach ($this->matches as $m) {
03088       while (isset($m->nextSibling)) {
03089         $m = $m->nextSibling;
03090         if ($m->nodeType === XML_ELEMENT_NODE) {
03091           if (!empty($selector)) {
03092             if (qp($m, NULL, $this->options)->is($selector) > 0) {
03093               $found->attach($m);
03094               break;
03095             }
03096           }
03097           else {
03098             $found->attach($m);
03099             break;
03100           }
03101         }
03102       }
03103     }
03104     $this->setMatches($found);
03105     return $this;
03106   }
03107   /**
03108    * Get all siblings after an element.
03109    *
03110    * For each element in the QueryPath, get all siblings that appear after
03111    * it. If a selector is passed in, then only siblings that match the 
03112    * selector will be included.
03113    *
03114    * @param string $selector 
03115    *  A valid CSS 3 selector.
03116    * @return QueryPath
03117    *  The QueryPath object, now containing the matching siblings.
03118    * @see next()
03119    * @see prevAll()
03120    * @see children()
03121    * @see siblings()
03122    */
03123   public function nextAll($selector = NULL) {
03124     $found = new SplObjectStorage();
03125     foreach ($this->matches as $m) {
03126       while (isset($m->nextSibling)) {
03127         $m = $m->nextSibling;
03128         if ($m->nodeType === XML_ELEMENT_NODE) {
03129           if (!empty($selector)) {
03130             if (qp($m, NULL, $this->options)->is($selector) > 0) {
03131               $found->attach($m);
03132             }
03133           }
03134           else {
03135             $found->attach($m);
03136           }
03137         }
03138       }
03139     }
03140     $this->setMatches($found);
03141     return $this;
03142   }
03143   /**
03144    * Get the next sibling before each element in the QueryPath.
03145    *
03146    * For each element in the QueryPath, this retrieves the previous sibling
03147    * (if any). If a selector is supplied, it retrieves the first matching 
03148    * sibling (if any is found).
03149    *
03150    * @param string $selector
03151    *  A valid CSS 3 selector.
03152    * @return QueryPath
03153    *  A QueryPath object, now containing any previous siblings that have been 
03154    *  found.
03155    * @see prevAll()
03156    * @see next()
03157    * @see siblings()
03158    * @see children()
03159    */
03160   public function prev($selector = NULL) {
03161     $found = new SplObjectStorage();
03162     foreach ($this->matches as $m) {
03163       while (isset($m->previousSibling)) {
03164         $m = $m->previousSibling;
03165         if ($m->nodeType === XML_ELEMENT_NODE) {
03166           if (!empty($selector)) {
03167             if (qp($m, NULL, $this->options)->is($selector)) {
03168               $found->attach($m);
03169               break;
03170             }
03171           }
03172           else {
03173             $found->attach($m);
03174             break;
03175           }
03176         }
03177       }
03178     }
03179     $this->setMatches($found);
03180     return $this;
03181   }
03182   /**
03183    * Get the previous siblings for each element in the QueryPath.
03184    *
03185    * For each element in the QueryPath, get all previous siblings. If a 
03186    * selector is provided, only matching siblings will be retrieved.
03187    *
03188    * @param string $selector
03189    *  A valid CSS 3 selector.
03190    * @return QueryPath
03191    *  The QueryPath object, now wrapping previous sibling elements.
03192    * @see prev()
03193    * @see nextAll()
03194    * @see siblings()
03195    * @see contents()
03196    * @see children()
03197    */
03198   public function prevAll($selector = NULL) {
03199     $found = new SplObjectStorage();
03200     foreach ($this->matches as $m) {
03201       while (isset($m->previousSibling)) {
03202         $m = $m->previousSibling;
03203         if ($m->nodeType === XML_ELEMENT_NODE) {
03204           if (!empty($selector)) {
03205             if (qp($m, NULL, $this->options)->is($selector)) {
03206               $found->attach($m);
03207             }
03208           }
03209           else {
03210             $found->attach($m);
03211           }
03212         }
03213       }
03214     }
03215     $this->setMatches($found);
03216     return $this;
03217   }
03218   /**
03219    * @deprecated Use {@link siblings()}.
03220    */
03221   public function peers($selector = NULL) {
03222     $found = new SplObjectStorage();
03223     foreach ($this->matches as $m) {
03224       foreach ($m->parentNode->childNodes as $kid) {
03225         if ($kid->nodeType == XML_ELEMENT_NODE && $m !== $kid) {
03226           if (!empty($selector)) {
03227             if (qp($kid, NULL, $this->options)->is($selector)) {
03228               $found->attach($kid);
03229             }
03230           }
03231           else {
03232             $found->attach($kid);
03233           }
03234         }
03235       }
03236     }
03237     $this->setMatches($found);
03238     return $this;
03239   }
03240   /**
03241    * Add a class to all elements in the current QueryPath.
03242    *
03243    * This searchers for a class attribute on each item wrapped by the current 
03244    * QueryPath object. If no attribute is found, a new one is added and its value
03245    * is set to $class. If a class attribute is found, then the value is appended
03246    * on to the end.
03247    *
03248    * @param string $class 
03249    *  The name of the class.
03250    * @return QueryPath
03251    *  Returns the QueryPath object.
03252    * @see css()
03253    * @see attr()
03254    * @see removeClass()
03255    * @see hasClass()
03256    */
03257   public function addClass($class) {
03258     foreach ($this->matches as $m) {
03259       if ($m->hasAttribute('class')) {
03260         $val = $m->getAttribute('class');
03261         $m->setAttribute('class', $val . ' ' . $class);
03262       }
03263       else {
03264         $m->setAttribute('class', $class);
03265       }
03266     }
03267     return $this;
03268   }
03269   /**
03270    * Remove the named class from any element in the QueryPath that has it.
03271    *
03272    * This may result in the entire class attribute being removed. If there
03273    * are other items in the class attribute, though, they will not be removed.
03274    * 
03275    * Example:
03276    * Consider this XML:
03277    * @code
03278    * <element class="first second"/>
03279    * @endcode
03280    *
03281    * Executing this fragment of code will remove only the 'first' class:
03282    * @code
03283    * qp(document, 'element')->removeClass('first');
03284    * @endcode
03285    *
03286    * The resulting XML will be:
03287    * @code
03288    * <element class="second"/>
03289    * @endcode
03290    *
03291    * To remove the entire 'class' attribute, you should use {@see removeAttr()}.
03292    *
03293    * @param string $class
03294    *  The class name to remove.
03295    * @return QueryPath
03296    *  The modified QueryPath object.
03297    * @see attr()
03298    * @see addClass()
03299    * @see hasClass()
03300    */
03301   public function removeClass($class) {
03302     foreach ($this->matches as $m) {
03303       if ($m->hasAttribute('class')) {
03304         $vals = explode(' ', $m->getAttribute('class'));
03305         if (in_array($class, $vals)) {
03306           $buf = array();
03307           foreach ($vals as $v) {
03308             if ($v != $class) $buf[] = $v;
03309           }
03310           if (count($buf) == 0)
03311             $m->removeAttribute('class');
03312           else
03313             $m->setAttribute('class', implode(' ', $buf));
03314         }
03315       }
03316     }
03317     return $this;
03318   }
03319   /**
03320    * Returns TRUE if any of the elements in the QueryPath have the specified class.
03321    *
03322    * @param string $class
03323    *  The name of the class.
03324    * @return boolean 
03325    *  TRUE if the class exists in one or more of the elements, FALSE otherwise.
03326    * @see addClass()
03327    * @see removeClass()
03328    */
03329   public function hasClass($class) {
03330     foreach ($this->matches as $m) {
03331       if ($m->hasAttribute('class')) {
03332         $vals = explode(' ', $m->getAttribute('class'));
03333         if (in_array($class, $vals)) return TRUE;
03334       }
03335     }
03336     return FALSE;
03337   }
03338 
03339   /**
03340    * Branch the base QueryPath into another one with the same matches.
03341    *
03342    * This function makes a copy of the QueryPath object, but keeps the new copy 
03343    * (initially) pointed at the same matches. This object can then be queried without
03344    * changing the original QueryPath. However, changes to the elements inside of this
03345    * QueryPath will show up in the QueryPath from which it is branched.
03346    * 
03347    * Compare this operation with {@link cloneAll()}. The cloneAll() call takes
03348    * the current QueryPath object and makes a copy of all of its matches. You continue
03349    * to operate on the same QueryPath object, but the elements inside of the QueryPath
03350    * are copies of those before the call to cloneAll().
03351    *
03352    * This, on the other hand, copies <i>the QueryPath</i>, but keeps valid 
03353    * references to the document and the wrapped elements. A new query branch is 
03354    * created, but any changes will be written back to the same document.
03355    *
03356    * In practice, this comes in handy when you want to do multiple queries on a part
03357    * of the document, but then return to a previous set of matches. (see {@link QPTPL}
03358    * for examples of this in practice).
03359    *
03360    * Example:
03361    *
03362    * @code
03363    * <?php
03364    * $qp = qp(QueryPath::HTML_STUB);
03365    * $branch = $qp->branch();
03366    * $branch->find('title')->text('Title');
03367    * $qp->find('body')->text('This is the body')->writeHTML;
03368    * ?>
03369    * @endcode
03370    *
03371    * Notice that in the code, each of the QueryPath objects is doing its own 
03372    * query. However, both are modifying the same document. The result of the above 
03373    * would look something like this:
03374    *
03375    * @code
03376    * <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
03377    * <html xmlns="http://www.w3.org/1999/xhtml">
03378    * <head>
03379    *    <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
03380    *    <title>Title</title>
03381    * </head>
03382    * <body>This is the body</body>
03383    * </html>
03384    * @endcode
03385    *
03386    * Notice that while $qp and $banch were performing separate queries, they 
03387    * both modified the same document.
03388    *
03389    * In jQuery or a browser-based solution, you generally do not need a branching
03390    * function because there is (implicitly) only one document. In QueryPath, there
03391    * is no implicit document. Every document must be explicitly specified (and,
03392    * in most cases, parsed -- which is costly). Branching makes it possible to 
03393    * work on one document with multiple QueryPath objects.
03394    *
03395    * @param string $selector
03396    *  If a selector is passed in, an additional {@link find()} will be executed
03397    *  on the branch before it is returned. (Added in QueryPath 2.0.)
03398    * @return QueryPath
03399    *  A copy of the QueryPath object that points to the same set of elements that
03400    *  the original QueryPath was pointing to.
03401    * @since 1.1
03402    * @see cloneAll()
03403    * @see find()
03404    */
03405   public function branch($selector = NULL) {
03406     $temp = qp($this->matches, NULL, $this->options);
03407     if (isset($selector)) $temp->find($selector);
03408     return $temp;
03409   }
03410   /**
03411    * Perform a deep clone of each node in the QueryPath.
03412    *
03413    * This does not clone the QueryPath object, but instead clones the 
03414    * list of nodes wrapped by the QueryPath. Every element is deeply 
03415    * cloned.
03416    *
03417    * This method is analogous to jQuery's clone() method.
03418    *
03419    * This is a destructive operation, which means that end() will revert
03420    * the list back to the clone's original.
03421    * @see qp()
03422    * @return QueryPath
03423    */
03424   public function cloneAll() {
03425     $found = new SplObjectStorage();
03426     foreach ($this->matches as $m) $found->attach($m->cloneNode(TRUE));
03427     $this->setMatches($found, FALSE);
03428     return $this;
03429   }
03430   
03431   /**
03432    * Clone the QueryPath.
03433    *
03434    * This makes a deep clone of the elements inside of the QueryPath.
03435    *
03436    * This clones only the QueryPathImpl, not all of the decorators. The
03437    * clone operator in PHP should handle the cloning of the decorators.
03438    */
03439   public function __clone() {
03440     //XXX: Should we clone the document?
03441     
03442     // Make sure we clone the kids.
03443     $this->cloneAll();
03444   }  
03445 
03446   /**
03447    * Detach any items from the list if they match the selector.
03448    *
03449    * In other words, each item that matches the selector will be remove
03450    * from the DOM document. The returned QueryPath wraps the list of
03451    * removed elements.
03452    *
03453    * If no selector is specified, this will remove all current matches from
03454    * the document.
03455    *
03456    * @param string $selector
03457    *  A CSS Selector.
03458    * @return QueryPath
03459    *  The Query path wrapping a list of removed items.
03460    * @see replaceAll()
03461    * @see replaceWith()
03462    * @see removeChildren()
03463    * @since 2.1
03464    * @author eabrand
03465    */
03466   public function detach($selector = NULL) {
03467 
03468     if(!empty($selector))
03469     $this->find($selector);
03470 
03471     $found = new SplObjectStorage();
03472     $this->last = $this->matches;
03473     foreach ($this->matches as $item) {
03474       // The item returned is (according to docs) different from
03475       // the one passed in, so we have to re-store it.
03476       $found->attach($item->parentNode->removeChild($item));
03477     }
03478     $this->setMatches($found);
03479     return $this;
03480   }
03481 
03482   /**
03483    * Attach any items from the list if they match the selector.
03484    *
03485    * If no selector is specified, this will remove all current matches from
03486    * the document.
03487    *
03488    * @param QueryPath $dest
03489    *  A QueryPath Selector.
03490    * @return QueryPath
03491    *  The Query path wrapping a list of removed items.
03492    * @see replaceAll()
03493    * @see replaceWith()
03494    * @see removeChildren()
03495    * @since 2.1
03496    * @author eabrand
03497    */
03498   public function attach(QueryPath $dest) {
03499     foreach ($this->last as $m) $dest->append($m);
03500     return $this;
03501   }
03502   
03503   /**
03504    * Reduce the elements matched by QueryPath to only those which contain the given item.
03505    *
03506    * There are two ways in which this is different from jQuery's implementation:
03507    * - We allow ANY DOMNode, not just DOMElements. That means this will work on 
03508    *   processor instructions, text nodes, comments, etc.
03509    * - Unlike jQuery, this implementation of has() follows QueryPath standard behavior
03510    *   and modifies the existing object. It does not create a brand new object.
03511    * 
03512    * @param mixed $contained
03513    *   - If $contained is a CSS selector (e.g. '#foo'), this will test to see
03514    *     if the current QueryPath has any elements that contain items that match
03515    *     the selector.
03516    *   - If $contained is a DOMNode, then this will test to see if THE EXACT DOMNode
03517    *     exists in the currently matched elements. (Note that you cannot match across DOM trees, even if it is the same document.)
03518    * @since 2.1
03519    * @author eabrand
03520    * @todo It would be trivially easy to add support for iterating over an array or Iterable of DOMNodes.
03521    */
03522   public function has($contained) {
03523     $found = new SplObjectStorage();
03524     
03525     // If it's a selector, we just get all of the DOMNodes that match the selector.
03526     $nodes = array();
03527     if (is_string($contained)) {
03528       // Get the list of nodes.
03529       $nodes = $this->branch($contained)->get();
03530     }
03531     elseif ($contained instanceof DOMNode) {
03532       // Make a list with one node.
03533       $nodes = array($contained);
03534     }
03535     
03536     // Now we go through each of the nodes that we are testing. We want to find
03537     // ALL PARENTS that are in our existing QueryPath matches. Those are the
03538     // ones we add to our new matches.
03539     foreach ($nodes as $original_node) {
03540       $node = $original_node;
03541       while (!empty($node)/* && $node != $node->ownerDocument*/) {
03542         if ($this->matches->contains($node)) {
03543           $found->attach($node);
03544         }
03545         $node = $node->parentNode;
03546       }
03547     }
03548     
03549     $this->setMatches($found);
03550     return $this;
03551   }
03552 
03553   /**
03554    * Empty everything within the specified element.
03555    *
03556    * A convenience function for removeChildren(). This is equivalent to jQuery's 
03557    * empty() function. However, `empty` is a built-in in PHP, and cannot be used as a 
03558    * function name.
03559    *
03560    * @return QueryPath
03561    *  The QueryPath object with the newly emptied elements.
03562    * @see removeChildren()
03563    * @since 2.1
03564    * @author eabrand
03565    * @deprecated The removeChildren() function is the preferred method.
03566    */
03567   public function emptyElement() {
03568     $this->removeChildren();
03569     return $this;
03570   }
03571   
03572   /**
03573    * Get the even elements, so counter-intuitively 1, 3, 5, etc.
03574    *
03575    *
03576    *
03577    * @return QueryPath
03578    *  A QueryPath wrapping all of the children.
03579    * @see removeChildren()
03580    * @see parent()
03581    * @see parents()
03582    * @see next()
03583    * @see prev()
03584    * @since 2.1
03585    * @author eabrand
03586    */
03587   public function even() {
03588     $found = new SplObjectStorage();
03589     $even = false;
03590     foreach ($this->matches as $m) {
03591       if ($even && $m->nodeType == XML_ELEMENT_NODE) $found->attach($m);
03592       $even = ($even) ? false : true;
03593     }
03594     $this->setMatches($found);
03595     $this->matches = $found; // Don't buffer this. It is temporary.
03596     return $this;
03597   }
03598 
03599   /**
03600    * Get the odd elements, so counter-intuitively 0, 2, 4, etc.
03601    *
03602    *
03603    *
03604    * @return QueryPath
03605    *  A QueryPath wrapping all of the children.
03606    * @see removeChildren()
03607    * @see parent()
03608    * @see parents()
03609    * @see next()
03610    * @see prev()
03611    * @since 2.1
03612    * @author eabrand
03613    */
03614   public function odd() {
03615     $found = new SplObjectStorage();
03616     $odd = true;
03617     foreach ($this->matches as $m) {
03618       if ($odd && $m->nodeType == XML_ELEMENT_NODE) $found->attach($m);
03619       $odd = ($odd) ? false : true;
03620     }
03621     $this->setMatches($found);
03622     $this->matches = $found; // Don't buffer this. It is temporary.
03623     return $this;
03624   }
03625   
03626   /**
03627    * Get the first matching element.
03628    *
03629    *
03630    * @return QueryPath
03631    *  A QueryPath wrapping all of the children.
03632    * @see next()
03633    * @see prev()
03634    * @since 2.1
03635    * @author eabrand
03636    */
03637   public function first() {
03638     $found = new SplObjectStorage();
03639     foreach ($this->matches as $m) {
03640       if ($m->nodeType == XML_ELEMENT_NODE) {
03641         $found->attach($m);
03642         break;
03643       }
03644     }
03645     $this->setMatches($found);
03646     $this->matches = $found; // Don't buffer this. It is temporary.
03647     return $this;
03648   }
03649 
03650   /**
03651    * Get the first child of the matching element.
03652    *
03653    *
03654    * @return QueryPath
03655    *  A QueryPath wrapping all of the children.
03656    * @see next()
03657    * @see prev()
03658    * @since 2.1
03659    * @author eabrand
03660    */
03661   public function firstChild() {
03662     // Could possibly use $m->firstChild http://theserverpages.com/php/manual/en/ref.dom.php
03663     $found = new SplObjectStorage();
03664     $flag = false;
03665     foreach ($this->matches as $m) {
03666       foreach($m->childNodes as $c) {
03667         if ($c->nodeType == XML_ELEMENT_NODE) {
03668           $found->attach($c);
03669           $flag = true;
03670           break;
03671         }
03672       }
03673       if($flag) break;
03674     }
03675     $this->setMatches($found);
03676     $this->matches = $found; // Don't buffer this. It is temporary.
03677     return $this;
03678   }
03679 
03680   /**
03681    * Get the last matching element.
03682    *
03683    *
03684    * @return QueryPath
03685    *  A QueryPath wrapping all of the children.
03686    * @see next()
03687    * @see prev()
03688    * @since 2.1
03689    * @author eabrand
03690    */
03691   public function last() {
03692     $found = new SplObjectStorage();
03693     $item = null;
03694     foreach ($this->matches as $m) {
03695       if ($m->nodeType == XML_ELEMENT_NODE) {
03696         $item = $m;
03697       }
03698     }
03699     if ($item) {
03700       $found->attach($item);
03701     }
03702     $this->setMatches($found);
03703     $this->matches = $found; // Don't buffer this. It is temporary.
03704     return $this;
03705   }
03706 
03707   /**
03708    * Get the last child of the matching element.
03709    *
03710    *
03711    * @return QueryPath
03712    *  A QueryPath wrapping all of the children.
03713    * @see next()
03714    * @see prev()
03715    * @since 2.1
03716    * @author eabrand
03717    */
03718   public function lastChild() {
03719     $found = new SplObjectStorage();
03720     $item = null;
03721     foreach ($this->matches as $m) {
03722       foreach($m->childNodes as $c) {
03723         if ($c->nodeType == XML_ELEMENT_NODE) {
03724           $item = $c;
03725         }
03726       }
03727       if ($item) {
03728         $found->attach($item);
03729         $item = null;
03730       }
03731     }
03732     $this->setMatches($found);
03733     $this->matches = $found; // Don't buffer this. It is temporary.
03734     return $this;
03735   }
03736 
03737   /**
03738    * Get all siblings after an element until the selector is reached.
03739    *
03740    * For each element in the QueryPath, get all siblings that appear after
03741    * it. If a selector is passed in, then only siblings that match the
03742    * selector will be included.
03743    *
03744    * @param string $selector
03745    *  A valid CSS 3 selector.
03746    * @return QueryPath
03747    *  The QueryPath object, now containing the matching siblings.
03748    * @see next()
03749    * @see prevAll()
03750    * @see children()
03751    * @see siblings()
03752    * @since 2.1
03753    * @author eabrand
03754    */
03755   public function nextUntil($selector = NULL) {
03756     $found = new SplObjectStorage();
03757     foreach ($this->matches as $m) {
03758       while (isset($m->nextSibling)) {
03759         $m = $m->nextSibling;
03760         if ($m->nodeType === XML_ELEMENT_NODE) {
03761           if (!empty($selector)) {
03762             if (qp($m, NULL, $this->options)->is($selector) > 0) {
03763               break;
03764             }
03765             else {
03766               $found->attach($m);
03767             }
03768           }
03769           else {
03770             $found->attach($m);
03771           }
03772         }
03773       }
03774     }
03775     $this->setMatches($found);
03776     return $this;
03777   } 
03778 
03779   /**
03780    * Get the previous siblings for each element in the QueryPath
03781    * until the selector is reached.
03782    *
03783    * For each element in the QueryPath, get all previous siblings. If a
03784    * selector is provided, only matching siblings will be retrieved.
03785    *
03786    * @param string $selector
03787    *  A valid CSS 3 selector.
03788    * @return QueryPath
03789    *  The QueryPath object, now wrapping previous sibling elements.
03790    * @see prev()
03791    * @see nextAll()
03792    * @see siblings()
03793    * @see contents()
03794    * @see children()
03795    * @since 2.1
03796    * @author eabrand
03797    */
03798   public function prevUntil($selector = NULL) {
03799     $found = new SplObjectStorage();
03800     foreach ($this->matches as $m) {
03801       while (isset($m->previousSibling)) {
03802         $m = $m->previousSibling;
03803         if ($m->nodeType === XML_ELEMENT_NODE) {
03804           if (!empty($selector) && qp($m, NULL, $this->options)->is($selector))
03805           break;
03806           else
03807           $found->attach($m);
03808         }
03809       }
03810     }
03811     $this->setMatches($found);
03812     return $this;
03813   }
03814   
03815   /**
03816    * Get all ancestors of each element in the QueryPath until the selector is reached.
03817    *
03818    * If a selector is present, only matching ancestors will be retrieved.
03819    *
03820    * @see parent()
03821    * @param string $selector
03822    *  A valid CSS 3 Selector.
03823    * @return QueryPath
03824    *  A QueryPath object containing the matching ancestors.
03825    * @see siblings()
03826    * @see children()
03827    * @since 2.1
03828    * @author eabrand
03829    */
03830   public function parentsUntil($selector = NULL) {
03831     $found = new SplObjectStorage();
03832     foreach ($this->matches as $m) {
03833       while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) {
03834         $m = $m->parentNode;
03835         // Is there any case where parent node is not an element?
03836         if ($m->nodeType === XML_ELEMENT_NODE) {
03837           if (!empty($selector)) {
03838             if (qp($m, NULL, $this->options)->is($selector) > 0)
03839             break;
03840             else
03841             $found->attach($m);
03842           }
03843           else
03844           $found->attach($m);
03845         }
03846       }
03847     }
03848     $this->setMatches($found);
03849     return $this;
03850   }
03851   
03852   /////// INTERNAL FUNCTIONS ////////
03853   
03854   
03855   /**
03856    * Determine whether a given string looks like XML or not.
03857    *
03858    * Basically, this scans a portion of the supplied string, checking to see
03859    * if it has a tag-like structure. It is possible to "confuse" this, which
03860    * may subsequently result in parse errors, but in the vast majority of 
03861    * cases, this method serves as a valid inicator of whether or not the 
03862    * content looks like XML.
03863    *
03864    * Things that are intentional excluded:
03865    * - plain text with no markup.
03866    * - strings that look like filesystem paths.
03867    * 
03868    * Subclasses SHOULD NOT OVERRIDE THIS. Altering it may be altering
03869    * core assumptions about how things work. Instead, classes should 
03870    * override the constructor and pass in only one of the parsed types
03871    * that this class expects.
03872    */
03873   protected function isXMLish($string) {
03874     // Long strings will exhaust the regex engine, so we
03875     // grab a representative string.
03876     // $test = substr($string, 0, 255);
03877     return (strpos($string, '<') !== FALSE && strpos($string, '>') !== FALSE);
03878     //return preg_match(ML_EXP, $test) > 0;
03879   }
03880   
03881   private function parseXMLString($string, $flags = NULL) {
03882     
03883     $document = new DOMDocument('1.0');
03884     $lead = strtolower(substr($string, 0, 5)); // <?xml
03885     try {
03886       set_error_handler(array('QueryPathParseException', 'initializeFromError'), $this->errTypes);
03887       
03888       if (isset($this->options['convert_to_encoding'])) {
03889         // Is there another way to do this?
03890         
03891         $from_enc = isset($this->options['convert_from_encoding']) ? $this->options['convert_from_encoding'] : 'auto';
03892         $to_enc = $this->options['convert_to_encoding'];
03893         
03894         if (function_exists('mb_convert_encoding')) {
03895           $string = mb_convert_encoding($string, $to_enc, $from_enc);
03896         }
03897         
03898       }
03899       
03900       // This is to avoid cases where low ascii digits have slipped into HTML.
03901       // AFAIK, it should not adversly effect UTF-8 documents.
03902       if (!empty($this->options['strip_low_ascii'])) {
03903         $string = filter_var($string, FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_LOW);
03904       }
03905       
03906       // Allow users to override parser settings.
03907       if (empty($this->options['use_parser'])) {
03908         $useParser = '';
03909       }
03910       else {
03911         $useParser = strtolower($this->options['use_parser']);
03912       }
03913       
03914       // If HTML parser is requested, we use it.
03915       if ($useParser == 'html') {
03916         $document->loadHTML($string);
03917       }
03918       // Parse as XML if it looks like XML, or if XML parser is requested.
03919       elseif ($lead == '<?xml' || $useParser == 'xml') {
03920         if ($this->options['replace_entities']) {
03921           $string = QueryPathEntities::replaceAllEntities($string);
03922         }
03923         $document->loadXML($string, $flags);
03924       }
03925       // In all other cases, we try the HTML parser.
03926       else {
03927         $document->loadHTML($string);
03928       }
03929     }
03930     // Emulate 'finally' behavior.
03931     catch (Exception $e) {
03932       restore_error_handler();
03933       throw $e;
03934     }
03935     restore_error_handler();
03936     
03937     if (empty($document)) {
03938       throw new QueryPathParseException('Unknown parser exception.');
03939     }
03940     return $document;
03941   }
03942   
03943   /**
03944    * EXPERT: Be very, very careful using this.
03945    * A utility function for setting the current set of matches.
03946    * It makes sure the last matches buffer is set (for end() and andSelf()).
03947    * @since 2.0
03948    */
03949   public function setMatches($matches, $unique = TRUE) {
03950     // This causes a lot of overhead....
03951     //if ($unique) $matches = self::unique($matches);
03952     $this->last = $this->matches;
03953     
03954     // Just set current matches.
03955     if ($matches instanceof SplObjectStorage) {
03956       $this->matches = $matches;
03957     }
03958     // This is likely legacy code that needs conversion.
03959     elseif (is_array($matches)) {
03960       trigger_error('Legacy array detected.');
03961       $tmp = new SplObjectStorage();
03962       foreach ($matches as $m) $tmp->attach($m);
03963       $this->matches = $tmp;
03964     }
03965     // For non-arrays, try to create a new match set and 
03966     // add this object.
03967     else {
03968       $found = new SplObjectStorage();
03969       if (isset($matches)) $found->attach($matches);
03970       $this->matches = $found;
03971     }
03972     
03973     // EXPERIMENTAL: Support for qp()->length.
03974     $this->length = $this->matches->count();
03975   }
03976   
03977   /**
03978    * Set the match monitor to empty.
03979    *
03980    * This preserves history.
03981    *
03982    * @since 2.0
03983    */
03984   private function noMatches() {
03985     $this->setMatches(NULL);
03986   }
03987     
03988   /**
03989    * A utility function for retriving a match by index.
03990    *
03991    * The internal data structure used in QueryPath does not have
03992    * strong random access support, so we suppliment it with this method.
03993    */
03994   private function getNthMatch($index) {
03995     if ($index > $this->matches->count() || $index < 0) return;
03996     
03997     $i = 0;
03998     foreach ($this->matches as $m) {
03999       if ($i++ == $index) return $m;
04000     }
04001   }
04002   
04003   /**
04004    * Convenience function for getNthMatch(0).
04005    */
04006   private function getFirstMatch() {
04007     $this->matches->rewind();
04008     return $this->matches->current();
04009   }
04010   
04011   /**
04012    * Parse just a fragment of XML.
04013    * This will automatically prepend an <?xml ?> declaration before parsing.
04014    * @param string $string 
04015    *   Fragment to parse.
04016    * @return DOMDocumentFragment 
04017    *   The parsed document fragment.
04018    */
04019    /*
04020   private function parseXMLFragment($string) {
04021     $frag = $this->document->createDocumentFragment();
04022     $frag->appendXML($string);
04023     return $frag;
04024   }
04025   */
04026   
04027   /**
04028    * Parse an XML or HTML file.
04029    *
04030    * This attempts to autodetect the type of file, and then parse it.
04031    *
04032    * @param string $filename
04033    *  The file name to parse.
04034    * @param int $flags
04035    *  The OR-combined flags accepted by the DOM parser. See the PHP documentation
04036    *  for DOM or for libxml.
04037    * @param resource $context
04038    *  The stream context for the file IO. If this is set, then an alternate 
04039    *  parsing path is followed: The file is loaded by PHP's stream-aware IO
04040    *  facilities, read entirely into memory, and then handed off to 
04041    *  {@link parseXMLString()}. On large files, this can have a performance impact.
04042    * @throws QueryPathParseException 
04043    *  Thrown when a file cannot be loaded or parsed.
04044    */
04045   private function parseXMLFile($filename, $flags = NULL, $context = NULL) {
04046     
04047     // If a context is specified, we basically have to do the reading in 
04048     // two steps:
04049     if (!empty($context)) {
04050       try {
04051         set_error_handler(array('QueryPathParseException', 'initializeFromError'), $this->errTypes);
04052         $contents = file_get_contents($filename, FALSE, $context);
04053         
04054       }
04055       // Apparently there is no 'finally' in PHP, so we have to restore the error
04056       // handler this way:
04057       catch(Exception $e) {
04058         restore_error_handler();
04059         throw $e;
04060       }
04061       restore_error_handler();
04062       
04063       if ($contents == FALSE) {
04064         throw new QueryPathParseException(sprintf('Contents of the file %s could not be retrieved.', $filename));
04065       }
04066       
04067       
04068       /* This is basically unneccessary overhead, as it is not more
04069        * accurate than the existing method.
04070       if (isset($md['wrapper_type']) &&  $md['wrapper_type'] == 'http') {
04071         for ($i = 0; $i < count($md['wrapper_data']); ++$i) {
04072           if (stripos($md['wrapper_data'][$i], 'content-type:') !== FALSE) {
04073             $ct = trim(substr($md['wrapper_data'][$i], 12));
04074             if (stripos('text/html') === 0) {
04075               $this->parseXMLString($contents, $flags, 'text/html');
04076             }
04077             else {
04078               // We can't account for all of the mime types that have
04079               // an XML payload, so we set it to XML.
04080               $this->parseXMLString($contents, $flags, 'text/xml');
04081             }
04082             break;
04083           }
04084         }
04085       }
04086       */
04087       
04088       return $this->parseXMLString($contents, $flags);
04089     }
04090     
04091     $document = new DOMDocument();
04092     $lastDot = strrpos($filename, '.');
04093     
04094     $htmlExtensions = array(
04095       '.html' => 1,
04096       '.htm' => 1,
04097     );
04098     
04099     // Allow users to override parser settings.
04100     if (empty($this->options['use_parser'])) {
04101       $useParser = '';
04102     }
04103     else {
04104       $useParser = strtolower($this->options['use_parser']);
04105     }
04106     
04107     $ext = $lastDot !== FALSE ? strtolower(substr($filename, $lastDot)) : '';
04108     
04109     try {
04110       set_error_handler(array('QueryPathParseException', 'initializeFromError'), $this->errTypes);
04111       
04112       // If the parser is explicitly set to XML, use that parser.
04113       if ($useParser == 'xml') {
04114         $r = $document->load($filename, $flags);
04115       }
04116       // Otherwise, see if it looks like HTML.
04117       elseif (isset($htmlExtensions[$ext]) || $useParser == 'html') {
04118         // Try parsing it as HTML.
04119         $r = $document->loadHTMLFile($filename);
04120       }
04121       // Default to XML.
04122       else {
04123         $r = $document->load($filename, $flags);
04124       }
04125       
04126     }
04127     // Emulate 'finally' behavior.
04128     catch (Exception $e) {
04129       restore_error_handler();
04130       throw $e;
04131     }
04132     restore_error_handler();
04133     
04134     
04135     
04136     /*
04137     if ($r == FALSE) {
04138       $fmt = 'Failed to load file %s: %s (%s, %s)';
04139       $err = error_get_last();
04140       if ($err['type'] & self::IGNORE_ERRORS) {
04141         // Need to report these somehow...
04142         trigger_error($err['message'], E_USER_WARNING);
04143       }
04144       else {
04145         throw new QueryPathParseException(sprintf($fmt, $filename, $err['message'], $err['file'], $err['line']));
04146       }
04147       
04148       //throw new QueryPathParseException(sprintf($fmt, $filename, $err['message'], $err['file'], $err['line']));
04149     }
04150     */
04151     return $document;
04152   }
04153   
04154   /**
04155    * Call extension methods.
04156    *
04157    * This function is used to invoke extension methods. It searches the
04158    * registered extenstensions for a matching function name. If one is found,
04159    * it is executed with the arguments in the $arguments array.
04160    * 
04161    * @throws QueryPathException
04162    *  An exception is thrown if a non-existent method is called.
04163    */
04164   public function __call($name, $arguments) {
04165     
04166     if (!QueryPathExtensionRegistry::$useRegistry) {
04167       throw new QueryPathException("No method named $name found (Extensions disabled).");      
04168     }
04169     
04170     // Loading of extensions is deferred until the first time a
04171     // non-core method is called. This makes constructing faster, but it
04172     // may make the first invocation of __call() slower (if there are 
04173     // enough extensions.)
04174     //
04175     // The main reason for moving this out of the constructor is that most
04176     // new QueryPath instances do not use extensions. Charging qp() calls
04177     // with the additional hit is not a good idea.
04178     //
04179     // Also, this will at least limit the number of circular references.
04180     if (empty($this->ext)) {
04181       // Load the registry
04182       $this->ext = QueryPathExtensionRegistry::getExtensions($this);
04183     }
04184     
04185     // Note that an empty ext registry indicates that extensions are disabled.
04186     if (!empty($this->ext) && QueryPathExtensionRegistry::hasMethod($name)) {
04187       $owner = QueryPathExtensionRegistry::getMethodClass($name);
04188       $method = new ReflectionMethod($owner, $name);
04189       return $method->invokeArgs($this->ext[$owner], $arguments);
04190     }
04191     throw new QueryPathException("No method named $name found. Possibly missing an extension.");
04192   }
04193   
04194   /**
04195    * Dynamically generate certain properties.
04196    *
04197    * This is used primarily to increase jQuery compatibility by providing property-like
04198    * behaviors.
04199    *
04200    * Currently defined properties:
04201    *   - length: Alias of {@link size()}.
04202    */
04203    /*
04204   public function __get($name) {
04205     switch ($name) {
04206       case 'length':
04207         return $this->size();
04208       default:
04209         throw new QueryPathException('Unknown or inaccessible property "' . $name . '" (via __get())');
04210     }
04211   }
04212   */
04213   
04214   /**
04215    * Get an iterator for the matches in this object.
04216    * @return Iterable
04217    *  Returns an iterator.
04218    */
04219   public function getIterator() {
04220     $i = new QueryPathIterator($this->matches);
04221     $i->options = $this->options;
04222     return $i;
04223   }
04224 }
04225 
04226 /**
04227  * Perform various tasks on HTML/XML entities.
04228  *
04229  * @ingroup querypath_util
04230  */
04231 class QueryPathEntities {
04232   
04233   /**
04234    * This is three regexes wrapped into 1. The | divides them.
04235    * 1: Match any char-based entity. This will go in $matches[1]
04236    * 2: Match any num-based entity. This will go in $matches[2]
04237    * 3: Match any hex-based entry. This will go in $matches[3]
04238    * 4: Match any ampersand that is not an entity. This goes in $matches[4]
04239    *    This last rule will only match if one of the previous two has not already
04240    *    matched.
04241    * XXX: Are octal encodings for entities acceptable?
04242    */
04243   //protected static $regex = '/&([\w]+);|&#([\d]+);|&([\w]*[\s$]+)/m';
04244   protected static $regex = '/&([\w]+);|&#([\d]+);|&#(x[0-9a-fA-F]+);|(&)/m';
04245   
04246   /**
04247    * Replace all entities.
04248    * This will scan a string and will attempt to replace all
04249    * entities with their numeric equivalent. This will not work
04250    * with specialized entities.
04251    *
04252    * @param string $string
04253    *  The string to perform replacements on.
04254    * @return string
04255    *  Returns a string that is similar to the original one, but with 
04256    *  all entity replacements made.
04257    */
04258   public static function replaceAllEntities($string) {
04259     return preg_replace_callback(self::$regex, 'QueryPathEntities::doReplacement', $string);
04260   }
04261   
04262   /**
04263    * Callback for processing replacements.
04264    *
04265    * @param array $matches
04266    *  The regular expression replacement array.
04267    */
04268   protected static function doReplacement($matches) {
04269     // See how the regex above works out.
04270     //print_r($matches);
04271 
04272     // From count, we can tell whether we got a 
04273     // char, num, or bare ampersand.
04274     $count = count($matches);
04275     switch ($count) {
04276       case 2:
04277         // We have a character entity
04278         return '&#' . self::replaceEntity($matches[1]) . ';';
04279       case 3:
04280       case 4:
04281         // we have a numeric entity
04282         return '&#' . $matches[$count-1] . ';'; 
04283       case 5:
04284         // We have an unescaped ampersand.
04285         return '&#38;';
04286     }
04287   }
04288   
04289   /**
04290    * Lookup an entity string's numeric equivalent.
04291    *
04292    * @param string $entity
04293    *  The entity whose numeric value is needed.
04294    * @return int
04295    *  The integer value corresponding to the entity.
04296    * @author Matt Butcher
04297    * @author Ryan Mahoney
04298    */
04299   public static function replaceEntity($entity) {
04300     return self::$entity_array[$entity];
04301   }
04302   
04303   /**
04304    * Conversion mapper for entities in HTML.
04305    * Large entity conversion table. This is 
04306    * significantly broader in range than 
04307    * get_html_translation_table(HTML_ENTITIES).
04308    *
04309    * This code comes from Rhizome ({@link http://code.google.com/p/sinciput})
04310    *
04311    * @see get_html_translation_table()
04312    */
04313   private static $entity_array = array(
04314     'nbsp' => 160, 'iexcl' => 161, 'cent' => 162, 'pound' => 163, 
04315     'curren' => 164, 'yen' => 165, 'brvbar' => 166, 'sect' => 167, 
04316     'uml' => 168, 'copy' => 169, 'ordf' => 170, 'laquo' => 171, 
04317     'not' => 172, 'shy' => 173, 'reg' => 174, 'macr' => 175, 'deg' => 176, 
04318     'plusmn' => 177, 'sup2' => 178, 'sup3' => 179, 'acute' => 180, 
04319     'micro' => 181, 'para' => 182, 'middot' => 183, 'cedil' => 184, 
04320     'sup1' => 185, 'ordm' => 186, 'raquo' => 187, 'frac14' => 188, 
04321     'frac12' => 189, 'frac34' => 190, 'iquest' => 191, 'Agrave' => 192, 
04322     'Aacute' => 193, 'Acirc' => 194, 'Atilde' => 195, 'Auml' => 196, 
04323     'Aring' => 197, 'AElig' => 198, 'Ccedil' => 199, 'Egrave' => 200, 
04324     'Eacute' => 201, 'Ecirc' => 202, 'Euml' => 203, 'Igrave' => 204, 
04325     'Iacute' => 205, 'Icirc' => 206, 'Iuml' => 207, 'ETH' => 208, 
04326     'Ntilde' => 209, 'Ograve' => 210, 'Oacute' => 211, 'Ocirc' => 212, 
04327     'Otilde' => 213, 'Ouml' => 214, 'times' => 215, 'Oslash' => 216, 
04328     'Ugrave' => 217, 'Uacute' => 218, 'Ucirc' => 219, 'Uuml' => 220, 
04329     'Yacute' => 221, 'THORN' => 222, 'szlig' => 223, 'agrave' => 224, 
04330     'aacute' => 225, 'acirc' => 226, 'atilde' => 227, 'auml' => 228, 
04331     'aring' => 229, 'aelig' => 230, 'ccedil' => 231, 'egrave' => 232, 
04332     'eacute' => 233, 'ecirc' => 234, 'euml' => 235, 'igrave' => 236, 
04333     'iacute' => 237, 'icirc' => 238, 'iuml' => 239, 'eth' => 240, 
04334     'ntilde' => 241, 'ograve' => 242, 'oacute' => 243, 'ocirc' => 244, 
04335     'otilde' => 245, 'ouml' => 246, 'divide' => 247, 'oslash' => 248, 
04336     'ugrave' => 249, 'uacute' => 250, 'ucirc' => 251, 'uuml' => 252, 
04337     'yacute' => 253, 'thorn' => 254, 'yuml' => 255, 'quot' => 34, 
04338     'amp' => 38, 'lt' => 60, 'gt' => 62, 'apos' => 39, 'OElig' => 338, 
04339     'oelig' => 339, 'Scaron' => 352, 'scaron' => 353, 'Yuml' => 376, 
04340     'circ' => 710, 'tilde' => 732, 'ensp' => 8194, 'emsp' => 8195, 
04341     'thinsp' => 8201, 'zwnj' => 8204, 'zwj' => 8205, 'lrm' => 8206, 
04342     'rlm' => 8207, 'ndash' => 8211, 'mdash' => 8212, 'lsquo' => 8216, 
04343     'rsquo' => 8217, 'sbquo' => 8218, 'ldquo' => 8220, 'rdquo' => 8221, 
04344     'bdquo' => 8222, 'dagger' => 8224, 'Dagger' => 8225, 'permil' => 8240, 
04345     'lsaquo' => 8249, 'rsaquo' => 8250, 'euro' => 8364, 'fnof' => 402, 
04346     'Alpha' => 913, 'Beta' => 914, 'Gamma' => 915, 'Delta' => 916, 
04347     'Epsilon' => 917, 'Zeta' => 918, 'Eta' => 919, 'Theta' => 920, 
04348     'Iota' => 921, 'Kappa' => 922, 'Lambda' => 923, 'Mu' => 924, 'Nu' => 925, 
04349     'Xi' => 926, 'Omicron' => 927, 'Pi' => 928, 'Rho' => 929, 'Sigma' => 931,
04350     'Tau' => 932, 'Upsilon' => 933, 'Phi' => 934, 'Chi' => 935, 'Psi' => 936,
04351     'Omega' => 937, 'alpha' => 945, 'beta' => 946, 'gamma' => 947, 
04352     'delta' => 948, 'epsilon' => 949, 'zeta' => 950, 'eta' => 951, 
04353     'theta' => 952, 'iota' => 953, 'kappa' => 954, 'lambda' => 955, 
04354     'mu' => 956, 'nu' => 957, 'xi' => 958, 'omicron' => 959, 'pi' => 960, 
04355     'rho' => 961, 'sigmaf' => 962, 'sigma' => 963, 'tau' => 964, 
04356     'upsilon' => 965, 'phi' => 966, 'chi' => 967, 'psi' => 968, 
04357     'omega' => 969, 'thetasym' => 977, 'upsih' => 978, 'piv' => 982, 
04358     'bull' => 8226, 'hellip' => 8230, 'prime' => 8242, 'Prime' => 8243, 
04359     'oline' => 8254, 'frasl' => 8260, 'weierp' => 8472, 'image' => 8465, 
04360     'real' => 8476, 'trade' => 8482, 'alefsym' => 8501, 'larr' => 8592, 
04361     'uarr' => 8593, 'rarr' => 8594, 'darr' => 8595, 'harr' => 8596, 
04362     'crarr' => 8629, 'lArr' => 8656, 'uArr' => 8657, 'rArr' => 8658, 
04363     'dArr' => 8659, 'hArr' => 8660, 'forall' => 8704, 'part' => 8706, 
04364     'exist' => 8707, 'empty' => 8709, 'nabla' => 8711, 'isin' => 8712, 
04365     'notin' => 8713, 'ni' => 8715, 'prod' => 8719, 'sum' => 8721, 
04366     'minus' => 8722, 'lowast' => 8727, 'radic' => 8730, 'prop' => 8733, 
04367     'infin' => 8734, 'ang' => 8736, 'and' => 8743, 'or' => 8744, 'cap' => 8745, 
04368     'cup' => 8746, 'int' => 8747, 'there4' => 8756, 'sim' => 8764, 
04369     'cong' => 8773, 'asymp' => 8776, 'ne' => 8800, 'equiv' => 8801, 
04370     'le' => 8804, 'ge' => 8805, 'sub' => 8834, 'sup' => 8835, 'nsub' => 8836, 
04371     'sube' => 8838, 'supe' => 8839, 'oplus' => 8853, 'otimes' => 8855, 
04372     'perp' => 8869, 'sdot' => 8901, 'lceil' => 8968, 'rceil' => 8969, 
04373     'lfloor' => 8970, 'rfloor' => 8971, 'lang' => 9001, 'rang' => 9002, 
04374     'loz' => 9674, 'spades' => 9824, 'clubs' => 9827, 'hearts' => 9829, 
04375     'diams' => 9830
04376   );
04377 }
04378 
04379 /**
04380  * An iterator for QueryPath.
04381  *
04382  * This provides iterator support for QueryPath. You do not need to construct
04383  * a QueryPathIterator. QueryPath does this when its {@link QueryPath::getIterator()}
04384  * method is called.
04385  *
04386  * @ingroup querypath_util
04387  */
04388 class QueryPathIterator extends IteratorIterator {
04389   public $options = array();
04390   private $qp = NULL;
04391   
04392   public function current() {
04393     if (!isset($this->qp)) {
04394       $this->qp = qp(parent::current(), NULL, $this->options);
04395     }
04396     else {
04397       $splos = new SplObjectStorage();
04398       $splos->attach(parent::current());
04399       $this->qp->setMatches($splos);
04400     }
04401     return $this->qp;
04402   }
04403 }
04404 
04405 /**
04406  * Manage default options.
04407  *
04408  * This class stores the default options for QueryPath. When a new 
04409  * QueryPath object is constructed, options specified here will be 
04410  * used.
04411  *
04412  * <b>Details</b>
04413  * This class defines no options of its own. Instead, it provides a 
04414  * central tool for developers to override options set by QueryPath.
04415  * When a QueryPath object is created, it will evaluate options in the 
04416  * following order:
04417  *
04418  * - Options passed into {@link qp()} have highest priority.
04419  * - Options in {@link QueryPathOptions} (this class) have the next highest priority.
04420  * - If the option is not specified elsewhere, QueryPath will use its own defaults.
04421  *
04422  * @see qp()
04423  * @see QueryPathOptions::set()
04424  * @ingroup querypath_util
04425  */
04426 class QueryPathOptions {
04427   
04428   /**
04429    * This is the static options array.
04430    *
04431    * Use the {@link set()}, {@link get()}, and {@link merge()} to
04432    * modify this array.
04433    */
04434   static $options = array();
04435   
04436   /**
04437    * Set the default options.
04438    *
04439    * The passed-in array will be used as the default options list.
04440    *
04441    * @param array $array
04442    *  An associative array of options.
04443    */
04444   static function set($array) {
04445     self::$options = $array;
04446   }
04447   
04448   /**
04449    * Get the default options.
04450    *
04451    * Get all options currently set as default.
04452    *
04453    * @return array
04454    *  An array of options. Note that only explicitly set options are 
04455    *  returned. {@link QueryPath} defines default options which are not
04456    *  stored in this object.
04457    */
04458   static function get() {
04459     return self::$options;
04460   }
04461   
04462   /**
04463    * Merge the provided array with existing options.
04464    *
04465    * On duplicate keys, the value in $array will overwrite the 
04466    * value stored in the options.
04467    *
04468    * @param array $array
04469    *  Associative array of options to merge into the existing options.
04470    */
04471   static function merge($array) {
04472     self::$options = $array + self::$options;
04473   }
04474   
04475   /**
04476    * Returns true of the specified key is already overridden in this object.
04477    *
04478    * @param string $key
04479    *  The key to search for.
04480    */
04481   static function has($key) {
04482     return array_key_exists($key, self::$options);
04483   }
04484   
04485 }
04486 
04487 /**
04488  * Exception indicating that a problem has occured inside of a QueryPath object.
04489  *
04490  * @ingroup querypath_core
04491  */
04492 class QueryPathException extends Exception {}
04493 
04494 /**
04495  * Exception indicating that a parser has failed to parse a file.
04496  *
04497  * This will report parser warnings as well as parser errors. It should only be 
04498  * thrown, though, under error conditions.
04499  *
04500  * @ingroup querypath_core
04501  */
04502 class QueryPathParseException extends QueryPathException {
04503   const ERR_MSG_FORMAT = 'Parse error in %s on line %d column %d: %s (%d)';
04504   const WARN_MSG_FORMAT = 'Parser warning in %s on line %d column %d: %s (%d)';
04505   // trigger_error
04506   public function __construct($msg = '', $code = 0, $file = NULL, $line = NULL) {
04507 
04508     $msgs = array();
04509     foreach(libxml_get_errors() as $err) {
04510       $format = $err->level == LIBXML_ERR_WARNING ? self::WARN_MSG_FORMAT : self::ERR_MSG_FORMAT;
04511       $msgs[] = sprintf($format, $err->file, $err->line, $err->column, $err->message, $err->code);
04512     }
04513     $msg .= implode("\n", $msgs);
04514     
04515     if (isset($file)) {
04516       $msg .= ' (' . $file;
04517       if (isset($line)) $msg .= ': ' . $line;
04518       $msg .= ')';
04519     }
04520     
04521     parent::__construct($msg, $code);
04522   }
04523   
04524   public static function initializeFromError($code, $str, $file, $line, $cxt) {
04525     //printf("\n\nCODE: %s %s\n\n", $code, $str);
04526     $class = __CLASS__;
04527     throw new $class($str, $code, $file, $line);
04528   }
04529 }
04530 
04531 /**
04532  * Indicates that an input/output exception has occurred.
04533  *
04534  * @ingroup querypath_core
04535  */
04536 class QueryPathIOException extends QueryPathParseException {
04537   public static function initializeFromError($code, $str, $file, $line, $cxt) {
04538     $class = __CLASS__;
04539     throw new $class($str, $code, $file, $line);
04540   }
04541   
04542 }