html tag closing detection and repair

WBOY
Release: 2016-07-25 09:01:09
Original
1374 people have browsed it
html标签闭合检测与修复,说的有点大 , 并没有考虑的很完整,没有使用正则表达式, 适用于html文件中只有开始标签没有结束标签, 是有结束标签没有开始标签的情况。标签闭合的位置需要根据需求调整
  1. $str = '
  2. content
  3. content full
  4. this is content
  5. this is content
  6. This is cont
  7. This is content
  • this is content';
  • $str_len = strlen($str);
  • //Record starting tag
  • $pre_data = array();
  • / /Record the starting tag position
  • $pre_pos = array();
  • $last_data = array();
  • $error_data = array();
  • $error_pos = array();
  • $i = 0;
  • // Marked as < start
  • $start_flag = false;
  • while( $i < $str_len ) {
  • if($str[$i]=="<" && $str[$i+1]!= '/' && $str[$i+1]!='!') {
  • $i++;
  • $_tmp_str = '';
  • //Mark as < start
  • $start_flag = true;
  • //Mark Blank
  • $space_flag = false;
  • while($str[$i]!=">" && $str[$i]!="'" && $str[$i]!='"' && $str[ $i] !='/' && $i<$str_len){
  • if($str[$i]==' ') {
  • $space_flag = true;
  • }
  • if(!$space_flag) {
  • $_tmp_str .= $str[$i];
  • }
  • $i++;
  • }
  • $pre_data[] = $_tmp_str;
  • $pre_pos[] = $i;
  • } else if ($str[$i ]=="<" && $str[$i+1]=='/') {
  • $i += 2;
  • $_tmp_str = '';
  • while($str[$i]!=" >" && $i<$str_len){
  • $_tmp_str .= $str[$i];
  • $i++;
  • }
  • $last_data[] = $_tmp_str;
  • //View the previous value of the start tag
  • if(count($pre_data)>0) {
  • $last_pre_node = getLastNode($pre_data, 1);
  • if($last_pre_node == $_tmp_str) {
  • //On pairing, delete the value at the corresponding position
  • array_pop ($pre_data);
  • array_pop($pre_pos);
  • array_pop($last_data);
  • } else {
  • //No matching, there are two situations
  • //Case 1: Only closing tag, no start tag
  • //Case 2: Only opening tag, no closing tag
  • array_pop($last_data);
  • $error_data[] = $_tmp_str;
  • $error_pos[] = $i;
  • }
  • } else {
  • array_pop($last_data );
  • $error_data[] = $_tmp_str;
  • $error_pos[] = $i;
  • }
  • }else if ($str[$i]=="<" && $str[$i+1] =="!") {
  • $i++;
  • while($i<$str_len) {
  • if($str[$i]=="-" && $str[$i+1]==" -" && $str[$i+2]==">") {
  • $i++;
  • break;
  • } else {
  • $i++;
  • }
  • }
  • $i++;
  • }else if($str[$i]=='/' && $str[$i+1]=='>') {
  • //Skip the automatic single closing tag
  • if($start_flag) {
  • array_pop( $pre_data);
  • array_pop($pre_pos);
  • $i+=2;
  • }
  • }else if($str[$i]=="/" && $str[$i+1]=="* "){
  • $i++;
  • while($i<$str_len) {
  • if($str[$i]=="*" && $str[$i+1]=="/") {
  • $i++;
  • break;
  • } else {
  • $i++;
  • }
  • $i++;
  • }
  • }else if($str[$i]=="'"){
  • $i++ ;
  • while($str[$i]!="'" && $i<$str_len) {
  • $i++;
  • }
  • $i++;
  • } else if($str[$i]==' "'){
  • $i++;
  • while($str[$i]!='"' && $i<$str_len ) {
  • $i++;
  • }
  • $i++;
  • } else {
  • $ i++;
  • }
  • }
  • //Determine the position of the start tag
  • function confirm_pre_pos($str, $pre_pos){
  • $str_len = strlen($str);
  • $j=$pre_pos;
  • while($j < $str_len) {
  • if($str[$j] == '"') {
  • $j++;
  • while ($j<$str_len) {
  • if($str[$j]=='" ') {
  • $j++;
  • break;
  • }
  • $j++;
  • }
  • }
  • else if($str[$j] == "'") {
  • $j++;
  • while ($j<$str_len ) {
  • if($str[$j]=="'") {
  • $j++;
  • break;
  • }
  • $j++;
  • }
  • }
  • else if($str[$j]==">") {
  • $j++;
  • while ($j<$str_len) {
  • if($str[$j]=="<") {
  • //Return to the original content position
  • $j--;
  • break;
  • }
  • $j++;
  • }
  • break;
  • }
  • else {
  • $j++;
  • }
  • }
  • return $j;
  • }
  • //Determine the position of the starting tag
  • function confirm_err_pos($str, $err_pos){
  • $j=$err_pos;
  • $j--;
  • while($j > 0) {
  • if ($str[$j] == '"') {
  • $j--;
  • while ($j<$str_len) {
  • if($str[$j]=='"') {
  • $j- -;
  • break;
  • }
  • $j--;
  • }
  • }
  • else if($str[$j] == "'") {
  • $j--;
  • while ($j<$str_len) {
  • if($str[$j]=="'") {
  • $j--;
  • break;
  • }
  • $j--;
  • }
  • }
  • else if($str[$j]= =">") {
  • $j++;
  • break;
  • }
  • else {
  • $j--;
  • }
  • }
  • return $j;
  • }
  • //Get the last number of the array num values
  • function getLastNode(array $arr, $num){
  • $len = count($arr);
  • if($len > $num) {
  • return $arr[$len-$num];
  • } else {
  • return $arr[0];
  • }
  • }
  • //Organize the data, mainly look backward and conduct further checks
  • function sort_data(&$pre_data, &$pre_pos, &$error_data, &$error_pos) {
  • $rem_key_array = array();
  • $rem_i_array = array();
  • //Get the value that needs to be deleted
  • foreach($error_data as $key=>$value){
  • $count = count($ pre_data);
  • for($i=($count-1) ; $i>=0; $i--) {
  • if($pre_data[$i] == $value && !in_array($i, $ rem_i_array)) {
  • $rem_key_array[] = $key;
  • $rem_i_array[] = $i;
  • break;
  • }
  • }
  • }
  • //Delete the corresponding value of the start tag
  • foreach($rem_key_array as $_item ) {
  • unset($error_pos[$_item]);
  • unset($error_data[$_item]);
  • }
  • //Delete the corresponding value of the end tag
  • foreach($rem_i_array as $_item) {
  • unset($ pre_data[$_item]);
  • unset($pre_pos[$_item]);
  • }
  • }
  • //Organize the data and close the tag
  • function modify_data($str, $pre_data, $pre_pos, $error_data, $error_pos ){
  • $move_log = array();
  • //Only data for closed labels
  • foreach ($error_data as $key => $value) {
  • # code...
  • $_tmp_move_count = 0;
  • foreach ( $move_log as $pos_key => $move_value) {
  • # code...
  • if($error_pos[$key]>=$pos_key) {
  • $_tmp_move_count += $move_value;
  • }
  • }
  • $data = insert_data($str, $value, $error_pos[$key]+$_tmp_move_count, false);
  • $str = $data['str'];
  • $move_log[$data['pos']] = $ data['move_count'];
  • }
  • //Only data with start tag
  • foreach ($pre_data as $key => $value) {
  • # code...
  • $_tmp_move_count = 0;
  • foreach ( $move_log as $pos_key => $move_value) {
  • # code...
  • if($pre_pos[$key]>=$pos_key) {
  • $_tmp_move_count += $move_value;
  • }
  • }
  • $data = insert_data($str, $value, $pre_pos[$key]+$_tmp_move_count, true);
  • $str = $data['str'];
  • $move_log[$data['pos']] = $ data['move_count'];
  • }
  • return $str;
  • }
  • //Insert data, $type represents the way to insert data
  • function insert_data($str, $insert_data, $pos, $type ) {
  • $len = strlen($str);
  • //Start tag type
  • if($type==true) {
  • $move_count = strlen($insert_data)+3;
  • $pos = confirm_pre_pos ($str, $pos);
  • $pre_str = substr($str, 0, $pos);
  • $end_str = substr($str, $pos);
  • $mid_str = "";
  • //Closing tag type
  • } else {
  • $pos = confirm_err_pos($str, $pos);
  • $move_count = strlen($insert_data) + 2;
  • $ pre_str = substr($str, 0, $pos);
  • $end_str = substr($str, $pos);
  • $mid_str = "<" . $insert_data . ">";
  • }
  • $str = $pre_str.$mid_str.$end_str;
  • return array('str'=>$str, 'pos'=>$pos, 'move_count'=>$move_count);
  • }
  • sort_data($pre_data, $pre_pos, $error_data, $error_pos);
  • $new_str = modify_data($str, $pre_data, $pre_pos, $error_data, $error_pos);
  • echo $new_str;
  • // print_r($pre_data);
  • // print_r($pre_pos);
  • // print_r($error_data);
  • // print_r($error_pos);
  • // echo strlen($str);
  • // foreach($pre_pos as $value){
  • // $value = confirm_pre_pos($str, $value);
  • // for($i=$value-5; $i<=$value; $i++) {
  • // echo $str[$i];
  • // }
  • // echo "n";
  • // }
  • // foreach($error_pos as $value){
  • // for($i=$value-5; $i<=$value; $i++) {
  • // echo $str[$i];
  • // }
  • // echo "n";
  • // }
  • ?>
  • 复制代码


    source:php.cn
    Statement of this Website
    The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
    Popular Tutorials
    More>
    Latest Downloads
    More>
    Web Effects
    Website Source Code
    Website Materials
    Front End Template