Home > Backend Development > PHP Tutorial > How to parse all URL addresses in a string in php_PHP tutorial

How to parse all URL addresses in a string in php_PHP tutorial

WBOY
Release: 2016-07-13 09:58:19
Original
764 people have browsed it

How php parses all URL addresses in a string

The details are as follows:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

// $html = the html on the page

// $current_url = the full url that the html came from

//(only needed for $repath)

// $repath = converts ../ and / and // urls to full valid urls

function pageLinks($html, $current_url = "", $repath = false){

preg_match_all("/

$links = array();

if(isset($matches[2])){

$links = $matches[2];

}

if($repath && count($links) > 0 && strlen($current_url) > 0){

$pathi = pathinfo($current_url);

$dir = $pathi["dirname"];

$base = parse_url($current_url);

$split_path = explode("/", $dir);

$url = "";

foreach($links as $k => $link){

if(preg_match("/^../", $link)){

$total = substr_count($link, "../");

for($i = 0; $i < $total; $i ){

array_pop($split_path);

}

$url = implode("/", $split_path) . "/" . str_replace("../", "", $link);

}elseif(preg_match("/^///", $link)){

$url = $base["scheme"] . ":" . $link;

}elseif(preg_match("/^/|^.//", $link)){

$url = $base["scheme"] . "://" . $base["host"] . $link;

}elseif(preg_match("/^[a-zA-Z0-9]/", $link)){

if(preg_match("/^http/", $link)){

$url = $link;

}else{

$url = $dir . "/" . $link;

}

}

$links[$k] = $url;

}

}

return $links;

}

header("content-type: text/plain");

$url = "http://www.jb51.net";

$html = file_get_contents($url);

// Gets links from the page:

print_r(pageLinks($html));

// Gets links from the page and formats them to a full valid url:

print_r(pageLinks($html, $url, true));

1

2

3

4

6 7 8 9 10
11

12
13<🎜> <🎜>14<🎜> <🎜>15<🎜> <🎜>16<🎜> <🎜>17<🎜> <🎜>18<🎜> <🎜>19<🎜> <🎜>20<🎜> <🎜>21<🎜> <🎜>22<🎜> <🎜>23<🎜> <🎜>24<🎜> <🎜>25<🎜> <🎜>26<🎜> <🎜>27<🎜> <🎜>28<🎜> <🎜>29<🎜> <🎜>30<🎜> <🎜>31<🎜> <🎜>32<🎜> <🎜>33<🎜> <🎜>34<🎜> <🎜>35<🎜> <🎜>36<🎜> <🎜>37<🎜> <🎜>38<🎜> <🎜>39<🎜> <🎜>40<🎜> <🎜>41<🎜> <🎜>42<🎜> <🎜>43<🎜> <🎜>44<🎜> <🎜>45<🎜> <🎜>46<🎜> <🎜>47<🎜>
<🎜> <🎜> <🎜>// $html = the html on the page<🎜> <🎜>// $current_url = the full url that the html came from<🎜> <🎜>//(only needed for $repath)<🎜> <🎜>// $repath = converts ../ and / and // urls to full valid urls<🎜> <🎜>function pageLinks($html, $current_url = "", $repath = false){<🎜> <🎜>preg_match_all("/ <🎜>$links = array();<🎜> <🎜>if(isset($matches[2])){<🎜> <🎜>$links = $matches[2];<🎜> <🎜>}<🎜> <🎜>if($repath && count($links) > 0 && strlen($current_url) > 0){ $pathi = pathinfo($current_url); $dir = $pathi["dirname"]; $base = parse_url($current_url); $split_path = explode("/", $dir); $url = ""; foreach($links as $k => $link){ if(preg_match("/^../", $link)){ $total = substr_count($link, "../"); for($i = 0; $i < $total; $i ){ array_pop($split_path); } $url = implode("/", $split_path) . "/" . str_replace("../", "", $link); }elseif(preg_match("/^///", $link)){ $url = $base["scheme"] . ":" . $link; }elseif(preg_match("/^/|^.//", $link)){ $url = $base["scheme"] . "://" . $base["host"] . $link; }elseif(preg_match("/^[a-zA-Z0-9]/", $link)){ if(preg_match("/^http/", $link)){ $url = $link; }else{ $url = $dir . "/" . $link; } } $links[$k] = $url; } } return $links; } header("content-type: text/plain"); $url = "http://www.jb51.net"; $html = file_get_contents($url); // Gets links from the page: print_r(pageLinks($html)); // Gets links from the page and formats them to a full valid url: print_r(pageLinks($html, $url, true));

http://www.bkjia.com/PHPjc/978262.htmlwww.bkjia.comtruehttp: //www.bkjia.com/PHPjc/978262.htmlTechArticleThe method for PHP to parse all URL addresses in the string is as follows: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 4...
Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template