Html Parser Class
Jun 21, 2016 am 09:05 AMThis is a HTML parser class, used to parse HTML and XML. One of the unique features of this class is that it supports the innerHTML property.
php
/**
* HTML/XML Parser Class
*
* This is a helper class that is used to parse HTML and XML. A unique feature of this parsing class
* is the fact that it includes support for innerHTML (which isn't easy to do).
*
* @author Dennis Pallett
* @copyright Dennis Pallett 2006
* @package HTML_Parser
* @version 1.0
*/
// Helper Class
// To parse HTML/XML
Class HTML_Parser {
// Private properties
var $_parser;
var $_tags = array();
var $_html;
var $output = array();
var $strXmlData;
var $_level = 0;
var $_outline;
var $_tagcount = array();
var $xml_error = false;
var $xml_error_code;
var $xml_error_string;
var $xml_error_line_number;
function get_html () {
return $this->_html;
}
function parse($strInputXML) {
$this->output = array();
// Translate entities
$strInputXML = $this->translate_entities($strInputXML);
$this->_parser = xml_parser_create ();
xml_parser_set_option($this->_parser, XML_OPTION_CASE_FOLDING, true);
xml_set_object($this->_parser,$this);
xml_set_element_handler($this->_parser, "tagOpen", "tagClosed");
xml_set_character_data_handler($this->_parser, "tagData");
$this->strXmlData = xml_parse($this->_parser,$strInputXML );
if (!$this->strXmlData) {
$this->xml_error = true;
$this->xml_error_code = xml_get_error_code($this->_parser);
$this->xml_error_string = xml_error_string(xml_get_error_code($this->_parser));
$this->xml_error_line_number = xml_get_current_line_number($this->_parser);
return false;
}
return $this->output;
}
function tagOpen($parser, $name, $attr) {
// Increase level
$this->_level++;
// Create tag:
$newtag = $this->create_tag($name, $attr);
// Build tag
$tag = array("name"=>$name,"attr"=>$attr, "level"=>$this->_level);
// Add tag
array_push ($this->output, $tag);
// Add tag to this level
$this->_tags[$this->_level] = $tag;
// Add to HTML
$this->_html .= $newtag;
// Add to outline
$this->_outline .= $this->_level . $newtag;
}
function create_tag ($name, $attr) {
// Create tag:
# Begin with name
$tag = '. strtolower($name) . ' ';
# Create attribute list
foreach ($attr as $key=>$val) {
$tag .= strtolower($key) . '="' . htmlentities($val) . '" ';
}
# Finish tag
$tag = trim($tag);
switch(strtolower($name)) {
case 'br':
case 'input':
$tag .= ' /';
break;
}
$tag .= '>';
return $tag;
}
function tagData($parser, $tagData) {
if(trim($tagData)) {
if(isset($this->output[count($this->output)-1]['tagData'])) {
$this->output[count($this->output)-1]['tagData'] .= $tagData;
} else {
$this->output[count($this->output)-1]['tagData'] = $tagData;
}
}
$this->_html .= htmlentities($tagData);
$this->_outline .= htmlentities($tagData);
}
function tagClosed($parser, $name) {
// Add to HTML and outline
switch (strtolower($name)) {
case 'br':
case 'input':
break;
default:
$this->_outline .= $this->_level . '' . strtolower($name) . '>';
$this->_html .= '' . strtolower($name) . '>';
}
// Get tag that belongs to this end
$tag = $this->_tags[$this->_level];
$tag = $this->create_tag($tag['name'], $tag['attr']);
// Try to get innerHTML
$regex = '%' . preg_quote($this->_level . $tag, '%') . '(.*?)' . preg_quote($this->_level . '' . strtolower($name) . '>', '%') . '%is';
preg_match ($regex, $this->_outline, $matches);
// Get innerHTML
if (isset($matches['1'])) {
$innerhtml = $matches['1'];
}
// Remove level identifiers
$this->_outline = str_replace($this->_level . $tag, $tag, $this->_outline);
$this->_outline = str_replace($this->_level . '' . strtolower($name) . '>', '' . strtolower($name) . '>', $this->_outline);
// Add innerHTML
if (isset($innerhtml)) {
$this->output[count($this->output)-1]['innerhtml'] = $innerhtml;
}
// Fix tree
$this->output[count($this->output)-2]['children'][] = $this->output[count($this->output)-1];
array_pop($this->output);
// Decrease level
$this->_level--;
}
function translate_entities($xmlSource, $reverse =FALSE) {
static $literal2NumericEntity;
if (empty($literal2NumericEntity)) {
$transTbl = get_html_translation_table(HTML_ENTITIES);
foreach ($transTbl as $char => $entity) {
if (strpos('&"', $char) !== FALSE) continue;
$literal2NumericEntity[$entity] = ''.ord($char).';';
}
}
if ($reverse) {
return strtr($xmlSource, array_flip($literal2NumericEntity));
} else {
return strtr($xmlSource, $literal2NumericEntity);
}
}
}
// To be used like this
$parser = new HTML_Parser;
$output = $parser->parse($html);
print_r ($output);
?>

Artikel Panas

Alat panas Tag

Artikel Panas

Tag artikel panas

Notepad++7.3.1
Editor kod yang mudah digunakan dan percuma

SublimeText3 versi Cina
Versi Cina, sangat mudah digunakan

Hantar Studio 13.0.1
Persekitaran pembangunan bersepadu PHP yang berkuasa

Dreamweaver CS6
Alat pembangunan web visual

SublimeText3 versi Mac
Perisian penyuntingan kod peringkat Tuhan (SublimeText3)

Topik panas

Penyelesaian: Organisasi anda memerlukan anda menukar PIN anda

Cara melaraskan tetapan sempadan tetingkap pada Windows 11: Tukar warna dan saiz

Bagaimana untuk menukar warna bar tajuk pada Windows 11?

Bagaimana untuk mendayakan atau melumpuhkan pratonton lakaran kecil bar tugas pada Windows 11

Masalah Ralat OOBELANGUAGE dalam Pembaikan Windows 11/10

Apakah perbezaan antara Huawei GT3 Pro dan GT4?

Paparkan panduan penskalaan pada Windows 11

10 Cara untuk Melaraskan Kecerahan pada Windows 11
