Home > Backend Development > PHP Tutorial > php正则替换处理HTML页面的方法_php技巧

php正则替换处理HTML页面的方法_php技巧

WBOY
Release: 2016-05-16 20:13:39
Original
844 people have browsed it

本文实例讲述了php正则替换处理HTML页面的方法。分享给大家供大家参考。具体如下:

<&#63;php
if(!defined('BASEPATH')) exit('No direct script access allowed');
 /**
  * HTML替换处理类,考虑如下几种替换
  * 1. img src : '/<img (.+&#63;)src=([\'\" ])&#63;(.+&#63;)([  alt="php正则替换处理HTML页面的方法_php技巧" >]+&#63;)/i'
  * 2. a href : '/<a(.+&#63;)href=([\'\" ])&#63;(.+&#63;)([ >]+&#63;)/i'
  * 3. ifram.src : '/<iframe(.+&#63;)src=([\'\" ])&#63;(.+&#63;)([ >]+&#63;)/i'
  * 4. frame src : '/<frame(.+&#63;)src=([\'\" ])&#63;(.+&#63;)([ >]+&#63;)/i'
  * 5. js  : '/window.open([( ]+&#63;)([\'" ]+&#63;)(.+&#63;)([ )+&#63;])/i'
  * 6. css  : '/background(.+&#63;)url([( ])([\'" ]+&#63;)(.+&#63;)([ )+&#63;])/i'
  */
 class Myreplace {
  private $moudle_array = array('udata','tdata','tresult','dresult');
  private $content;
  private $relative_dirname;
  private $projectid;
  private $moudle;
  function __construct() {
   $this->CI = &get_instance ();
  }
  /**
   * 替换
   * @param string $content HTML内容
   * @param string $relative 相对路径
   * @param int  $projectid 项目id
   * @moudle string $moudle  模板标识: udata,tdata,tresult,dresult
   */
  public function my_replace($content,$relative,$projectid,$moudle) {
   $this->content = $content;
   $this->relative_dirname = $relative;
   $this->projectid = $projectid;
   if(in_array(strtolower($moudle),$this->moudle_array))
    $this->moudle = $moudle;
   else exit;
   switch($this->moudle) {
    case 'udata':
     $this->CI->load->model('mupload_data','model');
     break;
    case 'tdata':
     $this->CI->load->model('taskdata','model');
     break;
    case 'tresult':
     $this->CI->load->model('taskresult','model');
     break;
    case 'dresult':
     $this->CI->load->model('dmsresult','model');
     break;
    default:
     break;
   }
   $pattern = '/<img (.+&#63;)src=([\'\" ])&#63;(.+&#63;)([  alt="php正则替换处理HTML页面的方法_php技巧" >]+&#63;)/i';
   $content = preg_replace_callback( $pattern, array($this, 'image_replace') , $content );
   $pattern = '/<a(.+&#63;)href=([\'\" ])&#63;(.+&#63;)([ >]+&#63;)/i';
   $content = preg_replace_callback( $pattern, array($this, 'html_replace') , $content );
   $pattern = '/<iframe(.+&#63;)src=([\'\" ])&#63;(.+&#63;)([ >]+&#63;)/i';
   $content = preg_replace_callback( $pattern, array($this, 'iframe_replace') , $content );
   $pattern = '/<frame(.+&#63;)src=([\'\" ])&#63;(.+&#63;)([ >]+&#63;)/i'; 
   $content = preg_replace_callback( $pattern, array($this, 'frame_replace'), $content );
   $pattern = '/window.open([( ]+&#63;)([\'" ]+&#63;)(.+&#63;)([ )]+&#63;)/i';
   $content = preg_replace_callback( $pattern, array($this, 'js_replace'), $content );
   $pattern = '/background(.+&#63;)url([( ])([\'" ]+&#63;)(.+&#63;)([ )+&#63;])/i';
   $content = preg_replace_callback( $pattern, array($this, 'css_replace'), $content);
   return $content;
  }
  private function image_replace($matches) {
   if(count($matches) < 4) return '';
   if( empty($matches[3]) ) return '';
   $matches[3] = rtrim($matches[3],'\'"/');
   //获取图片的id
   $parent_dir_num = substr_count( $matches[3], '../');
   $relative_dirname = $this->relative_dirname;
   for($i=0; $i<$parent_dir_num; $i++) {
    $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
   }
   $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
   $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
   //输出
   if( !empty($image_id) ) {
    if($this->moudle == 'dresult') {
     return "<img ".$matches[1]."src=".$matches[2].$this- alt="php正则替换处理HTML页面的方法_php技巧" >CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id&#63;pid=".$this->projectid .$matches[2]. $matches[4];
    } else {
     return "<img ".$matches[1]."src=".$matches[2].$this- alt="php正则替换处理HTML页面的方法_php技巧" >CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id&#63;pid=".$this->projectid .$matches[2]. $matches[4];
    }
   } else {
    return "<img".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
   }
  }
  private function html_replace( $matches ) {
   if(count($matches) < 4) return '';
   if( empty($matches[3]) ) return '';
   //如果href的链接($matches[3])以http或www或mailto开始,则不进行处理
   //if(preg_match('/^[http|www|mailto](.+&#63;)/i',$matches[3])) 
   // return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[4];
   $matches[3] = rtrim($matches[3],'\'"/');
   //处理锚点
   if(substr_count($matches[3],'#')>0) 
    $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
   //获取html的id
   $parent_dir_num = substr_count( $matches[3], '../');
   $relative_dirname = $this->relative_dirname;
   for($i=0; $i<$parent_dir_num; $i++) {
    $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
   }
   $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
   $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
   //输出
   if( !empty($txtfile_id ) ) {
    if($this->moudle == 'dresult') {
     return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id&#63;pid=".$this->projectid .$matches[2].$matches[4];
    } else {
     return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id&#63;pid=".$this->projectid .$matches[2].$matches[4];
    }
   } else {
    return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[2].$matches[4];
   }
  }
  private function iframe_replace( $matches ) {
   if(count($matches) < 4) return '';
   if( empty($matches[3]) ) return '';
   $matches[3] = rtrim($matches[3],'\'"/');
   //处理锚点
   if(substr_count($matches[3],'#')>0) 
    $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
   //获取html的id
   $parent_dir_num = substr_count( $matches[3], '../');
   $relative_dirname = $this->relative_dirname;
   for($i=0; $i<$parent_dir_num; $i++) {
    $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
   }
   $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
   $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
   //输出
   if( !empty($txtfile_id ) ) {
    if($this->moudle == 'dresult') {  
     return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id&#63;pid=".$this->projectid .$matches[2].$matches[4];
    } else {
     return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id&#63;pid=".$this->projectid .$matches[2].$matches[4];
    }
   } else {
    return "<iframe".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
   }
  }
  private function frame_replace( $matches ) {   
   if(count($matches) < 4) return '';
   if( empty($matches[3]) ) return '';
   $matches[3] = rtrim($matches[3],'\'"/');
   //处理锚点
   if(substr_count($matches[3],'#')>0) 
    $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
   //获取html的id
   $parent_dir_num = substr_count( $matches[3], '../');
   $relative_dirname = $this->relative_dirname;
   for($i=0; $i<$parent_dir_num; $i++) {
    $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
   }
   $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
   $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
   //输出
   if( !empty($txtfile_id ) ) {
    if($this->moudle == 'dresult') { 
     return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id&#63;pid=".$this->projectid.$matches[2].$matches[4];
    } else {
     return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id&#63;pid=".$this->projectid.$matches[2].$matches[4];
    }
   } else {
    return "<frame".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
   }
  }
  private function js_replace( $matches ){
   if(count($matches) < 4) return '';
   if( empty($matches[3]) ) return '';
   //处理链接
   $arr_html = split(',',$matches[3]);
   $href = $arr_html[0];
   $other = '';
   for($i=0; $i<count($arr_html); $i++)
    $other = $arr_html[$i].", ";
   $other = rtrim($other,"\, ");
   $href =rtrim($href,'\'\"');
   //处理锚点
   if(substr_count($href,'#')>0) 
    return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];;
   //获取html的id
   $parent_dir_num = substr_count( $href, '../');
   $relative_dirname = $this->relative_dirname;
   for($i=0; $i<$parent_dir_num; $i++) {
    $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
   }
   $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($href,'./');
   $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
   //输出
   if( !empty($txtfile_id ) ) {
    if($this->moudle == 'dresult') { 
     return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id&#63;pid=".$this->projectid.$matches[2].','.$other.$matches[4];
    } else {
     return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id&#63;pid=".$this->projectid.$matches[2].','.$other.$matches[4];
    }
   } else {
    return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];
   }
  }
  private function css_replace( $matches ) {
   if(count($matches) < 5) return '';
   if( empty($matches[4]) ) return '';
    
   $matches[4] = rtrim($matches[4],'\'"/');
   //获取图片的id
   $parent_dir_num = substr_count( $matches[4], '../');
   $relative_dirname = $this->relative_dirname;
   for($i=0; $i<$parent_dir_num; $i++) {
    $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
   }
   $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[4],'./');
   $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
   //输出
   if( !empty($image_id) ) {
    if($this->moudle == 'dresult') {
     return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id&#63;pid=".$this->projectid .$matches[3]. $matches[5];
    } else {
     return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id&#63;pid=".$this->projectid .$matches[3]. $matches[5];
    }
   } else {
    return "background".$matches[1]."url".$matches[2].$matches[3].$matches[4].$matches[3].$matches[5];
   }
  }
 }
/* End of Myreplace.php */
/* Location: /application/libraries/Myreplace.php */

Copy after login

希望本文所述对大家的php程序设计有所帮助。

Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template