How to parse all URL addresses in a string in php_PHP tutorial-PHP Tutorial-php.cn

How to parse all URL addresses in a string in php_PHP tutorial

WBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWB

Release： 2016-07-13 09:58:19

Original

797 people have browsed it

How php parses all URL addresses in a string

The details are as follows:

// $html = the html on the page

// $current_url = the full url that the html came from

//(only needed for $repath)

// $repath = converts ../ and / and // urls to full valid urls

function pageLinks($html, $current_url = "", $repath = false){

preg_match_all("/

$links = array();

if(isset($matches[2])){

$links = $matches[2];

}

if($repath && count($links) > 0 && strlen($current_url) > 0){

$pathi = pathinfo($current_url);

$dir = $pathi["dirname"];

$base = parse_url($current_url);

$split_path = explode("/", $dir);

$url = "";

foreach($links as $k => $link){

if(preg_match("/^../", $link)){

$total = substr_count($link, "../");

for($i = 0; $i < $total; $i ){

array_pop($split_path);

}

$url = implode("/", $split_path) . "/" . str_replace("../", "", $link);

}elseif(preg_match("/^///", $link)){

$url = $base["scheme"] . ":" . $link;

}elseif(preg_match("/^/|^.//", $link)){

$url = $base["scheme"] . "://" . $base["host"] . $link;

}elseif(preg_match("/^[a-zA-Z0-9]/", $link)){

if(preg_match("/^http/", $link)){

$url = $link;

}else{

$url = $dir . "/" . $link;

}

$links[$k] = $url;

}

return $links;

}

header("content-type: text/plain");

$url = "http://www.jb51.net";

$html = file_get_contents($url);

// Gets links from the page:

print_r(pageLinks($html));

// Gets links from the page and formats them to a full valid url:

print_r(pageLinks($html, $url, true));

13<🎜> <🎜>14<🎜> <🎜>15<🎜> <🎜>16<🎜> <🎜>17<🎜> <🎜>18<🎜> <🎜>19<🎜> <🎜>20<🎜> <🎜>21<🎜> <🎜>22<🎜> <🎜>23<🎜> <🎜>24<🎜> <🎜>25<🎜> <🎜>26<🎜> <🎜>27<🎜> <🎜>28<🎜> <🎜>29<🎜> <🎜>30<🎜> <🎜>31<🎜> <🎜>32<🎜> <🎜>33<🎜> <🎜>34<🎜> <🎜>35<🎜> <🎜>36<🎜> <🎜>37<🎜> <🎜>38<🎜> <🎜>39<🎜> <🎜>40<🎜> <🎜>41<🎜> <🎜>42<🎜> <🎜>43<🎜> <🎜>44<🎜> <🎜>45<🎜> <🎜>46<🎜> <🎜>47<🎜>

<🎜> <🎜> <🎜>// $html = the html on the page<🎜> <🎜>// $current_url = the full url that the html came from<🎜> <🎜>//(only needed for $repath)<🎜> <🎜>// $repath = converts ../ and / and // urls to full valid urls<🎜> <🎜>function pageLinks($html, $current_url = "", $repath = false){<🎜> <🎜>preg_match_all("/ <🎜>$links = array();<🎜> <🎜>if(isset($matches[2])){<🎜> <🎜>$links = $matches[2];<🎜> <🎜>}<🎜> <🎜>if($repath && count($links) > 0 && strlen($current_url) > 0){ $pathi = pathinfo($current_url); $dir = $pathi["dirname"]; $base = parse_url($current_url); $split_path = explode("/", $dir); $url = ""; foreach($links as $k => $link){ if(preg_match("/^../", $link)){ $total = substr_count($link, "../"); for($i = 0; $i < $total; $i ){ array_pop($split_path); } $url = implode("/", $split_path) . "/" . str_replace("../", "", $link); }elseif(preg_match("/^///", $link)){ $url = $base["scheme"] . ":" . $link; }elseif(preg_match("/^/|^.//", $link)){ $url = $base["scheme"] . "://" . $base["host"] . $link; }elseif(preg_match("/^[a-zA-Z0-9]/", $link)){ if(preg_match("/^http/", $link)){ $url = $link; }else{ $url = $dir . "/" . $link; } } $links[$k] = $url; } } return $links; } header("content-type: text/plain"); $url = "http://www.jb51.net"; $html = file_get_contents($url); // Gets links from the page: print_r(pageLinks($html)); // Gets links from the page and formats them to a full valid url: print_r(pageLinks($html, $url, true));

http://www.bkjia.com/PHPjc/978262.htmlwww.bkjia.comtruehttp: //www.bkjia.com/PHPjc/978262.htmlTechArticleThe method for PHP to parse all URL addresses in the string is as follows: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 4...