A super simple and practical HTML parsing class recently collected and written
WBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWB
Release: 2016-07-25 09:02:04
Original
717 people have browsed it
$xp = new xf_HtmlDom();
$xp->loadHtml('http://dealer.bitauto.com/100040078/cars.html');
$rows = $xp->find('dl/dd/a', 0)->innertext;
print_r($rows);
- $oldSetting = libxml_use_internal_errors( true );
- libxml_clear_errors();
- /**
- *
- * -+-----------------------------------
- * |PHP5 Framework - 2011
- * |Web Site: www.iblue.cc
- * |E-mail: mejinke@gmail.com
- * |Date: 2012-10-12
- * -+--------------- --------------------
- *
- * @desc HTML parser
- * @author jingke
- */
- class XF_HtmlDom
- {
- private $_xpath = null;
- private $_nodePath = '';
- public function __construct($xpath = null, $nodePath = '')
- {
- $this->_xpath = $xpath;
- $this->_nodePath = $nodePath;
- }
-
- public function loadHtml($url)
- {
- ini_set('user_agent', 'Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17 –Nexus');
- $content = '';
- if(strpos(strtolower($url), 'http')===false)
- {
- $content = file_get_contents($url);
- }
- else
- {
- $ch = curl_init();
- $user_agent = "Baiduspider+(+http://www.baidu.com/search/spider.htm)";
- $user_agent1='Mozilla/5.0 (Windows NT 5.1; rv:6.0) Gecko/20100101 Firefox/6.0';
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_HEADER, false);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_REFERER, $url);
- curl_setopt($ch, CURLOPT_USERAGENT, $user_agent1);
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
- $content =curl_exec($ch);
- curl_close($ch);
- }
-
- $html = new DOMDocument();
- $html->loadHtml($content);
- $this->_xpath = new DOMXPath( $html );
- return $this;
-
- }
-
- public function find($query, $index = null)
- {
- if($this->_nodePath == '')
- $this->_nodePath = '//';
- else
- $this->_nodePath .= '/';
-
- $nodes = $this->_xpath->query($this->_nodePath.$query);
- if ($index == null && !is_numeric($index))
- {
- $tmp = array();
- foreach ($nodes as $node)
- {
- $tmp[] = new XF_HtmlDom($this->_xpath, $node->getNodePath());
- }
- return $tmp;
- }
- return new XF_HtmlDom($this->_xpath,$this->_xpath->query($this->_nodePath.$query)->item($index)->getNodePath());
- }
-
- /**
- * Get content
- */
- public function text()
- {
- if ($this->_nodePath != '' && $this->_xpath != null )
- return $this->_xpath->query($this->_nodePath)->item(0)->textContent;
- else
- return false;
- }
-
- /**
- * Get attribute value
- */
- public function getAttribute($name)
- {
- if ($this->_nodePath != '' && $this->_xpath != null )
- return $this->_xpath->query($this->_nodePath)->item(0)->getAttribute($name);
- else
- return false;
- }
-
- public function __get($name)
- {
- if($name == 'innertext')
- return $this->text();
- else
- return $this->getAttribute($name);
- }
-
- }
复制代码
|
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Latest Articles by Author
-
2024-10-22 09:46:29
-
2024-10-13 13:53:41
-
2024-10-12 12:15:51
-
2024-10-11 22:47:31
-
2024-10-11 19:36:51
-
2024-10-11 15:50:41
-
2024-10-11 15:07:41
-
2024-10-11 14:21:21
-
2024-10-11 12:59:11
-
2024-10-11 12:17:31