Detailed explanation of PHP filtering HTML string function_PHP tutorial

WBOY
Release: 2016-07-13 17:40:31
Original
869 people have browsed it

/***************************************************** ****************
* Process description:
* When the parameters that meet the requirements are passed into the filter function, the filter() function first
* All tags $tag to be filtered in the string are taken out through preg_match_all()
*, then the matching array of preg_match_all is looped, and each tag is divided into "left attribute" = "right value through preg_split()
* function " form, and then loop through the attribute array to be retained by
*, and take out the content matched by preg_split() to form a replaceable value of
*, and finally replace the string with str_replcae() Corresponding tag
* function list:
* function filter(&$str,$tag,$keep_attribute)
* function match($reg,&$str,$arr)
* function show ($str,$title=,$debug = True)
* Usage example:
* //Get Sohu News homepage
* $str = @file_get_content("http://news.sohu.com ");
* //Filter
* filter($str,a,href,target,alt);
* filter($str,p,align);
* show($str , filtered content);
****************************************** ****************************/

$start_time = array_sum(explode(" ",microtime()));

$str = <<< HTML
site a
site b
site c
site d
site e

adasdfasdf


asdfasdfasdfasdf


asdfasdfasdf



asdfadsfasdf
asdfasdfadf
asdfasdf
HTML;

//显示原字串
show($str,Html);

/***********************************************************************************************************************************************************************/
//过滤
filter($str,a,href,target,alt);
filter($str,p,align);
filter($str,font,color,alt);

//显示过滤后的内容
show($str,Result);

//脚本运行时间
$run_time = array_sum(explode(" ",microtime())) - $start_time;
echo(
Script Run Time: .$run_time.
);

/**
* Description: Filter HTML string
* Parameters:
* $str: HTML string to be filtered
* $tag: Filtered tag type
* $keep_attribute:
* Attributes to be retained, this parameter can be in the form of
* href
* href,target,alt
* array(href,target,alt)
*/
function filter(&$str,$tag,$keep_attribute) {

//检查要保留的属性的参数传递方式
if(!is_array($keep_attribute)) {
//没有传递数组进来时判断参数是否包含,号
if(strpos($keep_attribute,,)) {
//包含,号时,切分参数串为数组
$keep_attribute = explode(,,$keep_attribute);
}else {
//纯字串,构造数组
$keep_attribute = array($keep_attribute);
}
}

echo("·过滤[$tag]标签,保留属性:".implode(,,$keep_attribute).
);

//取得所有要处理的标记
$pattern = "/<$tag(.*)/i";
preg_match_all($pattern,$str,$out);

//循环处理每个标记
foreach($out[1] as $key => $val) {
//取得a标记中有几个=
$cnt = preg_split(/ *=/i,$val);
$cnt = count($cnt) -1;

//构造匹配正则
$pattern = ;
for($i=1; $i<=$cnt; $i ) {

$pattern .= ( .*=.*);
}
//完成正则表达式形成,如/(.*/i的样式
$pattern = "/(<$tag)$pattern(>.*)/i";

//取得保留属性
$replacement = match($pattern,$out[0][$key],$keep_attribute);

//替换
$str = str_replace($out[0][$key],$replacement,$str);
}
}

/**
* Description: Construct tags and retain the attributes to be retained
* Parameters: $reg: pattern, expression of preg_match
* $str: string, html string
* $arr: array, Attributes to be retained
* Return:
* Returns the retained tag, such as
* e .com
*/
function match($reg,&$str,$arr) {

//match
preg_match($reg,$str ,$out);

//Get the reserved attributes
$keep_attribute = ;
foreach($arr as $k1=>$v1) {
//The defined ones should be kept Array of attributes
foreach($out as $k2=>$v2) {
//Array after matching =
$attribute = trim(substr($v2,0,strpos($v2 ,=)));
//=The previous
if($v1 == $attribute) {
//The attribute to be retained is the same as the part before = of the matching value
$ keep_attribute .= $v2;
//Save the value of this matching part
}
}
}

//Construct the return value, with a structure such as: aadd
$keep_attribute = $out[1].$keep_attribute.($out[count($out)-1]);
//return value
Return $keep_attribute;
}

/**
* Display string content
*/
function show($str,$title=,$debug = True) {
if($debug) {
if(is_array($str)) {
$str = print_r($str,True);
}
$txtRows = count(explode(" ",$str)) 1;
echo($title.: