http://www.btmao.org 为了从远程读取网页内容,我们常常用需要用PHP打开网页文件,而当网页文件很大或者网页根本不存在的时候,我们发现使用fopen的方法经常会出现超时。于是我们需要一个更加稳定的方法来解决这个问题。 我们想到使用fsockopen来帮助我们完
http://www.btmao.org
为了从远程读取网页内容,我们常常用需要用PHP打开网页文件,而当网页文件很大或者网页根本不存在的时候,我们发现使用fopen的方法经常会出现超时。于是我们需要一个更加稳定的方法来解决这个问题。
我们想到使用fsockopen来帮助我们完成工作。fsockopen是定下在network function下的一个函数,他使用的是socket来打开文件。
resource fsockopen ( string $hostname [, int $port [, int &$errno [, string &$errstr [, float $timeout ]]]] )
我们定义了一个类叫做http,下面是详细的代码。
php
class http
{
function forge_http_packet($url, & $host, & $port, & $packet)
{
// sure about the $url is start with "http://"
$strpos = strpos($url, '/', 7);
$filename = substr($url, $strpos, strlen($url) - $strpos);
$strpos2 = strpos($url, ':' , 7);
if ($strpos2 == false || $strpos2 > $strpos)
{
$host = substr($url, 7, $strpos - 7);
$port = 80;
}
else
{
$host = substr($url, 7, $strpos2 - 7);
$port = intval(substr($url, $strpos2 + 1, $strpos - $strpos2 - 1));
}
$encode_filename = str_replace(' ', '%20', $filename);
$packet = "GET ".$encode_filename." HTTP/1.1 ";
$packet .= "Accept: */* ";
$packet .= "Host: ".$host."";
$packet .= "User-Agent: OpenWebSpider ";
}
function chunked_result($chunked_html)
{
$html = "";
$start_pos = 0;
while ($start_pos strlen($chunked_html))
{
$final_pos = strpos($chunked_html, "", $start_pos);
$chunked_length = $this->hex2dec(substr($chunked_html, $start_pos, $final_pos - $start_pos));
$buf_html = substr($chunked_html, $final_pos + 2, $chunked_length);
$html .= $buf_html;
$start_pos = $final_pos + $chunked_length + 4;
}
return $html;
}
function hex2dec($hexstr)
{
$num = 0;
for ($i = 0 ; $i strlen($hexstr) ; $i++)
{
$num = $num * 16;
if ($hexstr[$i] >= '0' && $hexstr[$i] '9')
$num += ord($hexstr[$i]) - ord('0');
if ($hexstr[$i] >= 'A' && $hexstr[$i] 'F')
$num += ord($hexstr[$i]) - ord('A') + 10;
if ($hexstr[$i] >= 'a' && $hexstr[$i] 'f')
$num += ord($hexstr[$i]) - ord('a') + 10;
}
return $num;
}
function read_from_url($url)
{
$total_len = 0;
$this->forge_http_packet($url, $host, $port, $packet);
$handle = fsockopen($host, $port, $errno, $errstr, 5);
if ($handle) {
fwrite($handle, $packet);
$result = "";
while (!feof($handle)) {
stream_set_timeout($handle, 10);
$buffer = fgets($handle, 4096); //4K per 20 sec
$info = stream_get_meta_data($handle);
if ($info['timed_out'])
{
break;
}
$total_len += strlen($buffer);
$result .= $buffer;
if ($total_len > 1000000)
{ // more than 2M filesize, properly not correct url for news or list
return "";
}
}
fclose($handle);
$pos = strpos($result, "");
$header = substr($result, 0, $pos + 4);
if (strncasecmp($result, "HTTP/1.0 302", 12) == 0 ||
strncasecmp($result, "HTTP/1.1 302", 12) == 0 ||
strncasecmp($result, "HTTP/1.0 301", 12) == 0 ||
strncasecmp($result, "HTTP/1.1 301", 12) == 0)
{
$new_url_pos = strpos($result, " Location:");
if ($new_url_pos === false) return "";
$new_url_pos += 11;
$new_url_pos2 = strpos($result, "", $new_url_pos);
return $this->read_body_from_url(trim(substr($result,
$new_url_pos, $new_url_pos2 - $new_url_pos + 1)));
}
else if (strncasecmp($result, "HTTP/1.0 4", 10) == 0 ||
strncasecmp($result, "HTTP/1.1 4", 10) == 0 ||
strncasecmp($result, "HTTP/1.0 5", 10) == 0 ||
strncasecmp($result, "HTTP/1.1 5", 10) == 0)
{
$pos2 = strpos($result, "");
return substr($result, 0, $pos2);
}
if (strstr($header, "Transfer-Encoding: chunked") == false)
return strstr($result, "");
return $this->chunked_result(substr($result, $pos + 4, strlen($result) - ($pos + 4)));
}
return "";
}
}
?>
只需要调用上述类中的函数read_from_url($url)便可以完成工作。