function
get_php_url(){
if
(!
empty
(
$_SERVER
[
"REQUEST_URI"
])){
$scriptName
=
$_SERVER
[
"REQUEST_URI"
];
$nowurl
=
$scriptName
;
}
else
{
$scriptName
=
$_SERVER
[
"PHP_SELF"
];
if
(
empty
(
$_SERVER
[
"QUERY_STRING"
]))
$nowurl
=
$scriptName
;
else
$nowurl
=
$scriptName
.
"?"
.
$_SERVER
[
"QUERY_STRING"
];
}
return
$nowurl
;
}
function
GetAlabNum(
$fnum
){
$nums
=
array
(
"0"
,
"1"
,
"2"
,
"3"
,
"4"
,
"5"
,
"6"
,
"7"
,
"8"
,
"9"
);
$fnums
=
"0123456789"
;
for
(
$i
=0;
$i
<=9;
$i
++)
$fnum
=
str_replace
(
$nums
[
$i
],
$fnums
[
$i
],
$fnum
);
$fnum
=
ereg_replace
(
"[^0-9\.]|^0{1,}"
,
""
,
$fnum
);
if
(
$fnum
==
""
)
$fnum
=0;
return
$fnum
;
}
function
Text2Html(
$txt
){
$txt
=
str_replace
(
" "
,
" "
,
$txt
);
$txt
=
str_replace
(
"<"
,
"<"
,
$txt
);
$txt
=
str_replace
(
">"
,
">"
,
$txt
);
$txt
= preg_replace(
"/[\r\n]{1,}/isU"
,"
\r\n",
$txt
);
return
$txt
;
}
function
ClearHtml(
$str
){
$str
=
str_replace
('<','<',
$str
);
$str
=
str_replace
('>','>',
$str
);
return
$str
;
}
function
relative_to_absolute(
$content
,
$feed_url
) {
preg_match('/(http|https|ftp):\/\
$server_url
= preg_replace(
"/(http|https|ftp|news):\/\//"
,
""
,
$feed_url
);
$server_url
= preg_replace(
"/\/.*/"
,
""
,
$server_url
);
if
(
$server_url
== '') {
return
$content
;
}
if
(isset(
$protocol
[0])) {
$new_content
= preg_replace('/href=
"\//', 'href="
'.
$protocol
[0].
$server_url
.'/',
$content
);
$new_content
= preg_replace('/src=
"\//', 'src="
'.
$protocol
[0].
$server_url
.'/',
$new_content
);
}
else
{
$new_content
=
$content
;
}
return
$new_content
;
}
function
get_all_url(
$code
){
preg_match_all('/
"\' ]+)["
|\']?\s*[^>]*>([^>]+)<\/a>/i',
$code
,
$arr
);
return
array
('name'=>
$arr
[2],'url'=>
$arr
[1]);
}
function
get_tag_data(
$str
,
$start
,
$end
){
if
(
$start
== '' ||
$end
== '' ){
return
;
}
$str
=
explode
(
$start
,
$str
);
$str
=
explode
(
$end
,
$str
[1]);
return
$str
[0];
}
function
get_tr_array(
$table
) {
$table
= preg_replace(
"'<td[^>]*?>'si"
,'"',
$table
);
$table
=
str_replace
(
""
,'",',
$table
);
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);
$table
= preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);
$table
= preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);
$table
=
str_replace
(
" "
,
""
,
$table
);
$table
=
str_replace
(
" "
,
""
,
$table
);
$table
=
explode
(
",{tr}"
,
$table
);
array_pop
(
$table
);
return
$table
;
}
function
get_td_array(
$table
) {
$table
= preg_replace(
"'<table[^>]*?>'si"
,
""
,
$table
);
$table
= preg_replace(
"'<tr[^>]*?>'si"
,
""
,
$table
);
$table
= preg_replace(
"'<td[^>]*?>'si"
,
""
,
$table
);
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);
$table
=
str_replace
(
""
,
"{td}"
,
$table
);
$table
= preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);
$table
= preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);
$table
=
str_replace
(
" "
,
""
,
$table
);
$table
=
str_replace
(
" "
,
""
,
$table
);
$table
=
explode
('{tr}',
$table
);
array_pop
(
$table
);
foreach
(
$table
as
$key
=>
$tr
) {
$td
=
explode
('{td}',
$tr
);
array_pop
(
$td
);
$td_array
[] =
$td
;
}
return
$td_array
;
}
function
split_en_str(
$str
,
$distinct
=true) {
preg_match_all('/([a-zA-Z]+)/',
$str
,
$match
);
if
(
$distinct
== true) {
$match
[1] =
array_unique
(
$match
[1]);
}
sort(
$match
[1]);
return
$match
[1];
}
函数描述及例子
PHP采集程序中常用的函数
查询关键字
PHP采集程序中常用的函数
<!--?
function
get_php_url(){
if
(!
empty
(
$_SERVER
[
"REQUEST_URI"
])){
$scriptName
=
$_SERVER
[
"REQUEST_URI"
];
$nowurl
=
$scriptName
;
}
else
{
$scriptName
=
$_SERVER
[
"PHP_SELF"
];
if
(
empty
(
$_SERVER
[
"QUERY_STRING"
]))
$nowurl
=
$scriptName
;
else
$nowurl
=
$scriptName
.
"?"
.
$_SERVER
[
"QUERY_STRING"
];
}
return
$nowurl
;
}
function
GetAlabNum(
$fnum
){
$nums
=
array
(
"0"
,
"1"
,
"2"
,
"3"
,
"4"
,
"5"
,
"6"
,
"7"
,
"8"
,
"9"
);
$fnums
=
"0123456789"
;
for
(
$i
=0;
$i
<=9;
$i
++)
$fnum
=
str_replace
(
$nums
[
$i
],
$fnums
[
$i
],
$fnum
);
$fnum
=
ereg_replace
(
"[^0-9\.]|^0{1,}"
,
""
,
$fnum
);
if
(
$fnum
==
""
)
$fnum
=0;
return
$fnum
;
}
function
Text2Html(
$txt
){
$txt
=
str_replace
(
" "
,
" "
,
$txt
);
$txt
=
str_replace
(
"<"
,
"<"
,
$txt
);
$txt
=
str_replace
(
"-->"
,
">"
,
$txt
);
$txt
= preg_replace(
"/[\r\n]{1,}/isU"
,"
\r\n",
$txt
);
return
$txt
;
}
function
ClearHtml(
$str
){
$str
=
str_replace
('<','<',
$str
);
$str
=
str_replace
('>','>',
$str
);
return
$str
;
}
function
relative_to_absolute(
$content
,
$feed_url
) {
preg_match('/(http|https|ftp):\/\
$server_url
= preg_replace(
"/(http|https|ftp|news):\/\//"
,
""
,
$feed_url
);
$server_url
= preg_replace(
"/\/.*/"
,
""
,
$server_url
);
if
(
$server_url
== '') {
return
$content
;
}
if
(isset(
$protocol
[0])) {
$new_content
= preg_replace('/href=
"\//', 'href="
'.
$protocol
[0].
$server_url
.'/',
$content
);
$new_content
= preg_replace('/src=
"\//', 'src="
'.
$protocol
[0].
$server_url
.'/',
$new_content
);
}
else
{
$new_content
=
$content
;
}
return
$new_content
;
}
function
get_all_url(
$code
){
preg_match_all('/
"\' ]+)["
|\']?\s*[^>]*>([^>]+)<\/a>/i',
$code
,
$arr
);
return
array
('name'=>
$arr
[2],'url'=>
$arr
[1]);
}
function
get_tag_data(
$str
,
$start
,
$end
){
if
(
$start
== '' ||
$end
== '' ){
return
;
}
$str
=
explode
(
$start
,
$str
);
$str
=
explode
(
$end
,
$str
[1]);
return
$str
[0];
}
function
get_tr_array(
$table
) {
$table
= preg_replace(
"'<td[^>]*?>'si"
,'"',
$table
);
$table
=
str_replace
(
""
,'",',
$table
);
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);
$table
= preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);
$table
= preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);
$table
=
str_replace
(
" "
,
""
,
$table
);
$table
=
str_replace
(
" "
,
""
,
$table
);
$table
=
explode
(
",{tr}"
,
$table
);
array_pop
(
$table
);
return
$table
;
}
function
get_td_array(
$table
) {
$table
= preg_replace(
"'<table[^>]*?>'si"
,
""
,
$table
);
$table
= preg_replace(
"'<tr[^>]*?>'si"
,
""
,
$table
);
$table
= preg_replace(
"'<td[^>]*?>'si"
,
""
,
$table
);
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);
$table
=
str_replace
(
""
,
"{td}"
,
$table
);
$table
= preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);
$table
= preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);
$table
=
str_replace
(
" "
,
""
,
$table
);
$table
=
str_replace
(
" "
,
""
,
$table
);
$table
=
explode
('{tr}',
$table
);
array_pop
(
$table
);
foreach
(
$table
as
$key
=>
$tr
) {
$td
=
explode
('{td}',
$tr
);
array_pop
(
$td
);
$td_array
[] =
$td
;
}
return
$td_array
;
}
function
split_en_str(
$str
,
$distinct
=true) {
preg_match_all('/([a-zA-Z]+)/',
$str
,
$match
);
if
(
$distinct
== true) {
$match
[1] =
array_unique
(
$match
[1]);
}
sort(
$match
[1]);
return
$match
[1];
}
</td[^></tr[^></table[^></td[^></a\s+href=[
"|\']?([^></td[^></tr[^></table[^></td[^></a\s+href=["
|\']?([^>