'./', // string The path to check for $file in 'element' => '', // string The XML element to return 'type' => 'upload', 'encoding' => 'UTF-8', 'pointer' => 1, 'chunkSize' => 1024, 'filter' => true, 'get_cloud' => false ); /** * file * * @var string The filename being read * @access public */ public $file = ''; /** * pointer * * @var integer The current position the file is being read from * @access public */ public $reader; public $cloud = array(); public $loop = 1; public $is_404 = false; public $parser_type = false; /** * handle * * @var resource The fopen() resource * @access private */ private $handle = null; /** * reading * * @var boolean Whether the script is currently reading the file * @access private */ /** * __construct * * Builds the Chunk object * * @param string $file The filename to work with * @param array $options The options with which to parse the file * * @author Dom Hastings * @access public */ public function __construct( $file, $options = array(), $parser_type = false ) { try { // merge the options together $this->options = array_merge( $this->options, ( is_array( $options ) ? $options : array() ) ); $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption( 'chunk_size' ); // set the filename $this->file = $file; $this->parser_type = empty( $parser_type ) ? 'xmlreader' : $parser_type; $sleep = apply_filters( 'wp_all_import_shard_delay', 0 ); usleep( $sleep ); $is_html = false; $f = @fopen( $file, "rb" ); if ( is_resource( $file ) ) { while ( ! @feof( $f ) ) { $chunk = @fread( $f, 1024 ); if ( strpos( $chunk, "get_file_path(); $this->is_404 = true; $this->reader = new XMLReader(); @$this->reader->open( $path ); @$this->reader->setParserProperty( XMLReader::VALIDATE, false ); return; } $input = new PMXI_Input(); $import_id = $input->get( 'id', 0 ); if ( empty( $import_id ) ) { $import_id = $input->get( 'import_id', 0 ); } if ( PMXI_Plugin::getInstance()->getOption( 'force_stream_reader' ) ) { $this->parser_type = 'xmlstreamer'; } else { if ( ! empty( $import_id ) ) { $this->parser_type = empty( $parser_type ) ? 'xmlreader' : $parser_type; $import = new PMXI_Import_Record(); $import->getById( $import_id ); if ( ! $import->isEmpty() ) { $this->parser_type = empty( $import->options['xml_reader_engine'] ) ? 'xmlreader' : 'xmlstreamer'; } } else { $this->parser_type = empty( $parser_type ) ? get_option( 'wpai_parser_type', 'xmlreader' ) : $parser_type; } } if ( empty( $this->options['element'] ) or $this->options['get_cloud'] ) { $path = $this->get_file_path(); if ( $this->parser_type == 'xmlreader' ) { $reader = new XMLReader(); $reader->open( $path ); $reader->setParserProperty( XMLReader::VALIDATE, false ); while ( @$reader->read() ) { switch ( $reader->nodeType ) { case ( XMLREADER::ELEMENT ): $localName = str_replace( "_colon_", ":", $reader->localName ); if ( array_key_exists( str_replace( ":", "_", $localName ), $this->cloud ) ) { $this->cloud[ str_replace( ":", "_", $localName ) ] ++; } else { $this->cloud[ str_replace( ":", "_", $localName ) ] = 1; } break; default: break; } } unset( $reader ); } else { $CHUNK_SIZE = 1024; $streamProvider = new Prewk\XmlStringStreamer\Stream\File( $path, $CHUNK_SIZE ); $parseroptions = array( "extractContainer" => false, // Required option ); // Works like an XmlReader, and walks the XML tree node by node. Captures by node depth setting. $parser = new Parser\StringWalker( $parseroptions ); // Create the streamer $streamer = new XmlStringStreamer( $parser, $streamProvider ); while ( $node = $streamer->getNode() ) { // $simpleXmlNode = simplexml_load_string($node); // echo (string)$simpleXmlNode->firstName; } $this->cloud = $parser->cloud; } if ( ! empty( $this->cloud ) and empty( $this->options['element'] ) ) { arsort( $this->cloud ); $main_elements = array( 'node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book', 'item_0' ); foreach ( $this->cloud as $element_name => $value ) { if ( in_array( strtolower( $element_name ), $main_elements ) ) { $this->options['element'] = $element_name; break; } } if ( empty( $this->options['element'] ) ) { foreach ( $this->cloud as $el => $count ) { $this->options['element'] = $el; break; } } $this->options['element'] = apply_filters( 'wp_all_import_root_element', $this->options['element'], $import_id, $this->cloud ); } } $path = $this->get_file_path(); if ( $this->parser_type == 'xmlreader' ) { $this->reader = new XMLReader(); @$this->reader->open( $path ); @$this->reader->setParserProperty( XMLReader::VALIDATE, false ); } else { $parseroptions = array( "uniqueNode" => $this->options['element'] ); $CHUNK_SIZE = 1024; $streamProvider = new Prewk\XmlStringStreamer\Stream\File( $path, $CHUNK_SIZE ); $parser = new Parser\UniqueNode( $parseroptions ); $this->reader = new XmlStringStreamer( $parser, $streamProvider ); } } catch ( Throwable $e ) { if (defined('WP_DEBUG') && WP_DEBUG) { error_log('Error in PMXI_Chunk constructor: ' . $e->getMessage()); } $this->options = array(); $this->reader = null; $this->file = ''; $this->cloud = array(); $this->loop = 1; $this->is_404 = false; $this->parser_type = false; } } function get_file_path() { $is_enabled_stream_filter = apply_filters( 'wp_all_import_is_enabled_stream_filter', true ); if ( function_exists( 'stream_filter_register' ) and $this->options['filter'] and $is_enabled_stream_filter and $this->parser_type == 'xmlreader' ) { stream_filter_register( 'preprocessxml', 'preprocessXml_filter' ); if ( defined( 'HHVM_VERSION' ) ) { $path = $this->file; } else { $path = 'php://filter/read=preprocessxml/resource=' . $this->file; } } else { $path = $this->file; } return $path; } /** * __destruct * * Cleans up * * @return void * @author Dom Hastings * @access public */ public function __destruct() { // close the file resource unset( $this->reader ); } /** * read * * Reads the first available occurence of the XML element $this->options['element'] * * @return string The XML string from $this->file * @author Dom Hastings * @access public */ public function read( $debug = false ) { // trim it $element = trim( $this->options['element'] ); $xml = ''; if ( $this->parser_type == 'xmlreader' ) { try { while ( @$this->reader->read() ) { switch ( $this->reader->nodeType ) { case ( XMLREADER::ELEMENT ): $localName = str_replace( "_colon_", ":", $this->reader->localName ); if ( strtolower( str_replace( ":", "_", $localName ) ) == strtolower( $element ) ) { if ( $this->loop < $this->options['pointer'] ) { $this->loop ++; continue( 2 ); } $xml = @$this->reader->readOuterXML(); break( 2 ); } break; default: // code ... break; } } } catch ( XmlImportException $e ) { $xml = false; } } else { $is_preprocess_enabled = apply_filters( 'is_xml_preprocess_enabled', true ); while ( $xml = $this->reader->getNode() ) { if ( $this->loop < $this->options['pointer'] ) { $this->loop ++; continue; } if ( $is_preprocess_enabled ) { // the & symbol is not valid in XML, so replace it with temporary word _ampersand_ $xml = str_replace( "&", "_ampersand_", $xml ); $xml = preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', str_replace( ":", "_colon_", $xml ) ); } break; } } return ( ! empty( $xml ) ) ? self::removeColonsFromRSS( preg_replace( '%xmlns\s*=\s*([\'"]).*\1%sU', '', $xml ) ) : false; } public static function removeColonsFromRSS( $feed ) { $feed = str_replace( "_colon_", ":", $feed ); // pull out colons from start tags // (<\w+):(\w+>) $pattern = '/(<\w+):([\w+|\.|-]+[ |>]{1})/i'; $replacement = '$1_$2'; $feed = preg_replace( $pattern, $replacement, $feed ); // pull out colons from end tags // (<\/\w+):(\w+>) $pattern = '/(<\/\w+):([\w+|\.|-]+>)/i'; $replacement = '$1_$2'; $feed = preg_replace( $pattern, $replacement, $feed ); $is_replace_colons = apply_filters( 'wp_all_import_replace_colons_in_attribute_names', true ); if ( $is_replace_colons ) { // pull out colons from attributes $pattern = '/(\s+\w+):(\w+[=]{1})/i'; $replacement = '$1_$2'; $feed = preg_replace( $pattern, $replacement, $feed ); } // pull colons from single element // (<\w+):(\w+\/>) $pattern = '/(<\w+):([\w+|\.|-]+\/>)/i'; $replacement = '$1_$2'; $feed = preg_replace( $pattern, $replacement, $feed ); $is_preprocess_enabled = apply_filters( 'is_xml_preprocess_enabled', true ); if ( $is_preprocess_enabled ) { // replace temporary word _ampersand_ back to & symbol $feed = str_replace( "_ampersand_", "&", $feed ); } // replace all standalone & symbols ( which is not in htmlentities e.q.   and not wrapped in CDATA section ) to & PMXI_Import_Record::preprocessXml( $feed ); return $feed; } } class preprocessXml_filter extends php_user_filter { #[\ReturnTypeWillChange] public function filter( $in, $out, &$consumed, $closing ) { while ( $bucket = stream_bucket_make_writeable( $in ) ) { $is_preprocess_enabled = apply_filters( 'is_xml_preprocess_enabled', true ); if ( $is_preprocess_enabled ) { // the & symbol is not valid in XML, so replace it with temporary word _ampersand_ $bucket->data = str_replace( "&", "_ampersand_", $bucket->data ); $cleanXML = preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $this->replace_colons( $bucket->data ) ); if ( $cleanXML == null && preg_last_error() == PREG_BAD_UTF8_ERROR ) { $cleanXML = preg_replace( '/[^\x09\x0a\x0d\x20-\xFF]+/', ' ', $this->replace_colons( $bucket->data ) ); } if ( $cleanXML == null && preg_last_error() == PREG_BAD_UTF8_ERROR ) { if ( function_exists( 'mb_ereg_replace' ) ) { mb_regex_encoding( 'UTF-8' ); $cleanXML = mb_ereg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $this->replace_colons( $bucket->data ) ); } } $bucket->data = empty( $cleanXML ) ? $this->replace_colons( $bucket->data ) : $cleanXML; } $consumed += $bucket->datalen; stream_bucket_append( $out, $bucket ); } return PSFS_PASS_ON; } function replace_colons( $data ) { return str_replace( ":", "_colon_", $data ); } }