PHP抓
VideoUrlParser是一款基于PHP根据视频URL抓取视频信息的工具,支持优酷、土豆、酷六、56、乐视、搜狐、腾讯、新浪。
使用方法:
复制代码 代码如下:
require_once "VideoUrlParser.class.php";
$url = "http://v.youku.com/v_show/id_XMjkwMzc0Njg4.html";
$info = VedioUrlParser::parse($url);
echo $info;
说明:调用该工具php文件VideoUrlParser.class.php,$url变量后面的字符串为视频页的地址,然后使用echo输出变量$info。
附:info含有的几个值,分别是img(用于视频缩略图),title(视频标题),url(地址),,swf(视频swf播放地址)。我只用到了img和swf地址。具体的可以根据自己的需要进行调整。
VideoUrlParser类源码:
复制代码 代码如下:
/**
* Video
*
* @package
* @version 1.2
* @copyright 2005-2011 HDJ.ME
* @author Dijia Huang
* @license PHP Version 3.0 {@link }
*
* Usage
* require_once "VideoUrlParser.class.php";
* $urls[] = "http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html";
* $urls[] = "http://www.tudou.com/playlist/p/l13087099.html";
* $urls[] = "http://www.tudou.com/programs/view/ufg-A3tlcxk/";
* $urls[] = "http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html";
* $urls[] = "http://www.56.com/u68/v_NjI2NTkxMzc.html";
* $urls[] = "http://www.letv.com/ptv/vplay/1168109.html";
* $urls[] = "http://video.sina.com.cn/v/b/46909166-1290055681.html";
*
* foreach($urls as $url){
* $info = VideoUrlParser::parse($url);
* //var_dump($info);
* echo "{$info['title']}";
* echo "
";
* echo $info['object'];
* echo "
";
* }
*
*
*
* //优酷
*
*
*
* //酷六
*
*
*
* //土豆
* ?iid=74905844
*
*
* //56
*
*
*
* //新浪播客
*
*
*
* //乐视
*
*
class VideoUrlParser
{
const USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko)
Chrome/8.0.552.224 Safari/534.10";
const CHECK_URL_VALID = "/(youku\.com|tudou\.com|ku6\.com|56\.com|letv\.com|video\.sina\.com\.cn|(my\.)?tv\.sohu\.com|v\.qq\.com)/";
/**
* parse
*
* @param string $url
* @param mixed $createObject
* @static
* @access public
* @return void
*/
static public function parse($url='', $createObject=true){
$lowerurl = strtolower($url);
preg_match(self::CHECK_URL_VALID, $lowerurl, $matches);
if(!$matches) return false;
switch($matches[1]){
case 'youku.com':
$data = self::_parseYouku($url);
break;
case 'tudou.com':
$data = self::_parseTudou($url);
break;
case 'ku6.com':
$data = self::_parseKu6($url);
break;
case '56.com':
$data = self::_parse56($url);
break;
case 'letv.com':
$data = self::_parseLetv($url);
break;
case 'video.sina.com.cn':
$data = self::_parseSina($url);
break;
case 'my.tv.sohu.com':
case 'tv.sohu.com':
case 'sohu.com':
$data = self::_parseSohu($url);
break;
case 'v.qq.com':
$data = self::_parseQq($url);
break;
default:
$data = false;
}
if($data && $createObject) $data['object'] = "";
return $data;
}
/**
* 腾讯视频
* ?vid=97abu74o4w3_0
*
*
*
* ?_v=20110829&vid=97abu74o4w3&autoplay=1&list=2&showcfg=1&tpid=23&title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&adplay=1&cid=o9tab7nuu0q3esh
*/
private function _parseQq($url){
if(preg_match("/\/play\//", $url)){
$html = self::_fget($url);
preg_match("/url=[^\"]+/", $html, $matches);
if(!$matches); return false;
$url = $matches[0];
}
preg_match("/vid=([^\_]+)/", $url, $matches);
$vid = $matches[1];
$html = self::_fget($url);
// query
preg_match("/flashvars\s=\s\"([^;]+)/s", $html, $matches);
$query = $matches[1];
if(!$vid){
preg_match("/vid\s?=\s?vid\s?\|\|\s?\"(\w+)\";/i", $html, $matches);
$vid = $matches[1];
}
$query = str_replace('"+vid+"', $vid, $query);
parse_str($query, $output);
$data['img'] = "http://vpic.video.qq.com/{$$output['cid']}/{$vid}_1.jpg";
$data['url'] = $url;
$data['title'] = $output['title'];
$data['swf'] = "http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?".$query;
return $data;
}
/**
* 优酷网
*
*
*/
private function _parseYouku($url){
preg_match("#id\_(\w+)#", $url, $matches);
if (empty($matches)){
preg_match("#v_playlist\/#", $url, $mat);
if(!$mat) return false;
$html = self::_fget($url);
preg_match("#videoId2\s*=\s*\'(\w+)\'#", $html, $matches);
if(!$matches) return false;
}
$link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&ran=2513&n=3";
$retval = self::_cget($link);
if ($retval) {
$json = json_decode($retval, true);
$data['img'] = $json['data'][0]['logo'];
$data['title'] = $json['data'][0]['title'];
$data['url'] = $url;
$data['swf'] = "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf";
return $data;
} else {
return false;
}
}
/**
* 土豆网
*
*
*
* ?iid=74909603
* &iid=74909603/v.swf
*/
private function _parseTudou($url){
preg_match("#view/([-\w]+)/#", $url, $matches);
if (empty($matches)) {
if (strpos($url, "/playlist/") == false) return false;
if(strpos($url, 'iid=') !== false){
$quarr = explode("iid=", $lowerurl);
if (empty($quarr[1])) return false;
}elseif(preg_match("#p\/l(\d+).#", $lowerurl, $quarr)){
if (empty($quarr[1])) return false;
}
$html = self::_fget($url);
$html = iconv("GB2312", "UTF-8", $html);
preg_match("/lid_code\s=\slcode\s=\s[\'\"]([^\'\"]+)/s", $html, $matches);
$icode = $matches[1];
preg_match("/iid\s=\s.*?\|\|\s(\d+)/sx", $html, $matches);
$iid = $matches[1];
preg_match("/listData\s=\s(\[\{.*\}\])/sx", $html, $matches);
$find = array("/\n/", '/\s/', "/:[^\d\"]\w+[^\,]*,/i", "/(\{|,)(\w+):/");
$replace = array("", "", ':"",', '\\1"\\2":');
$str = preg_replace($find, $replace, $matches[1]);
//var_dump($str);
$json = json_decode($str);
//var_dump($json);exit;
if(is_array($json) || is_object($json) && !empty($json)){
foreach ($json as $val) {
if ($val->iid == $iid) {
break;
}
}
}
$data['img'] = $val->pic;
$data['title'] = $val->title;
$data['url'] = $url;
$data['swf'] = "http://www.tudou.com/l/{$icode}/&iid={$iid}/v.swf";
return $data;
}
$host = "www.tudou.com";
$path = "/v/{$matches[1]}/v.swf";
$ret = self::_fsget($path, $host);
if (preg_match("#\nLocation: (.*)\n#", $ret, $mat)) {
parse_str(parse_url(urldecode($mat[1]), PHP_URL_QUERY));
$data['img'] = $snap_pic;
$data['title'] = $title;
$data['url'] = $url;
$data['swf'] = "http://www.tudou.com/v/{$matches[1]}/v.swf";
return $data;
}
return false;
}
/**
* 酷6网
*
*
*
*
*/
private function _parseKu6($url){
if(preg_match("/show\_/", $url)){
preg_match("#/([-\w]+)\.html#", $url, $matches);
$url = "http://v.ku6.com/fetchVideo4Player/{$matches[1]}.html";
$html = self::_fget($url);
if ($html) {
$json = json_decode($html, true);
if(!$json) return false;
$data['img'] = $json['data']['picpath'];
$data['title'] = $json['data']['t'];
$data['url'] = $url;
$data['swf'] = "http://player.ku6.com/refer/{$matches[1]}/v.swf";
return $data;
} else {
return false;
}
}elseif(preg_match("/show\//", $url, $matches)){
$html = self::_fget($url);
preg_match("/ObjectInfo\s?=\s?([^\n]*)};/si", $html, $matches);
$str = $matches[1];
// img
preg_match("/cover\s?:\s?\"([^\"]+)\"/", $str, $matches);
$data['img'] = $matches[1];
// title
preg_match("/title\"?\s?:\s?\"([^\"]+)\"/", $str, $matches);
$jsstr = "{\"title\":\"{$matches[1]}\"}";
$json = json_decode($jsstr, true);
$data['title'] = $json['title'];
// url
$data['url'] = $url;
// query
preg_match("/\"(vid=[^\"]+)\"\sname=\"flashVars\"/s", $html, $matches);
$query = str_replace("&", '&', $matches[1]);
preg_match("/\/\/player\.ku6cdn\.com[^\"\']+/", $html, $matches);
$data['swf'] = 'http:'.$matches[0].'?'.$query;
return $data;
}
}
/**
* 56网
*
*
*/
private function _parse56($url){
preg_match("#/v_(\w+)\.html#", $url, $matches);
if (empty($matches)) return false;
$link="http://vxml.56.com/json/{$matches[1]}/?src=out";
$retval = self::_cget($link);
if ($retval) {
$json = json_decode($retval, true);
$data['img'] = $json['info']['img'];
$data['title'] = $json['info']['Subject'];
$data['url'] = $url;
$data['swf'] = "http://player.56.com/v_{$matches[1]}.swf";
return $data;
} else {
return false;
}
}
/**
* 乐视网
*
*
*/
private function _parseLetv($url){
$html = self::_fget($url);
preg_match("#([^'\"]*)#", $html, $matches);
parse_str(parse_url(urldecode($matches[0]), PHP_URL_QUERY));
preg_match("#vplay/(\d+)#", $url, $matches);
$data['img'] = $pic;
$data['title'] = $title;
$data['url'] = $url;
$data['swf'] = "http://www.letv.com/player/x{$matches[1]}.swf";
return $data;
}
// 搜狐TV
private function _parseSohu($url){
$html = self::_fget($url);
$html = iconv("GB2312", "UTF-8", $html);
preg_match_all("/og:(?:title|image|videosrc)\"\scontent=\"([^\"]+)\"/s", $html, $matches);
$data['img'] = $matches[1][1];
$data['title'] = $matches[1][0];
$data['url'] = $url;
$data['swf'] = $matches[1][2];
return $data;
}
/*
* 新浪播客
*
* =48717043_1290055681_PUzkSndrDzXK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdatoG5ynSA9kEqDhAQJA4dPkm0x4/s.swf
*/
private function _parseSina($url){
preg_match("/(\d+)(?:\-|\_)(\d+)/", $url, $matches);
$url = "http://video.sina.com.cn/v/b/{$matches[1]}-{$matches[2]}.html";
$html = self::_fget($url);
preg_match("/video\s?:\s?([^ $find = array("/\n/", "/\s*/", "/\'/", "/\{([^:,]+):/", "/,([^:]+):/", "/:[^\d\"]\w+[^\,]*,/i");
$replace = array('', '', '"', '{"\\1":', ',"\\1":', ':"",');
$str = preg_replace($find, $replace, $matches[1]);
$arr = json_decode($str, true);
$data['img'] = $arr['pic'];
$data['title'] = $arr['title'];
$data['url'] = $url;
$data['swf'] = $arr['swfOutsideUrl'];
return $data;
}
/*
* 通过 file_get_contents 获取内容
*/
private function _fget($url=''){
if(!$url) return false;
$html = file_get_contents($url);
// 判断是否gzip压缩
if($dehtml = self::_gzdecode($html))
return $dehtml;
else
return $html;
}
/*
* 通过 fsockopen 获取内容
*/
private function _fsget($path='/', $host='', $user_agent=''){
if(!$path || !$host) return false;
$user_agent = $user_agent ? $user_agent : self::USER_AGENT;
$out = GET $path HTTP/1.1
Host: $host
User-Agent: $user_agent
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language: zh-cn,zh;q=0.5
Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7\r\n\r\n
HEADER;
$fp = @fsockopen($host, 80, $errno, $errstr, 10);
if (!$fp) return false;
if(!fputs($fp, $out)) return false;
while ( !feof($fp) ) {
$html .= fgets($fp, 1024);
}
fclose($fp);
// 判断是否gzip压缩
if($dehtml = self::_gzdecode($html))
return $dehtml;
else
return $html;
}
/*
* 通过 curl 获取内容
*/
private function _cget($url='', $user_agent=''){
if(!$url) return;
$user_agent = $user_agent ? $user_agent : self::USER_AGENT;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
if(strlen($user_agent)) curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
ob_start();
curl_exec($ch);
$html = ob_get_contents();
ob_end_clean();
if(curl_errno($ch)){
curl_close($ch);
return false;
}
curl_close($ch);
if(!is_string($html) || !strlen($html)){
return false;
}
return $html;
// 判断是否gzip压缩
if($dehtml = self::_gzdecode($html))
return $dehtml;
else
return $html;
}
private function _gzdecode($data) {
$len = strlen ( $data );
if ($len return null; // Not GZIP format (See RFC 1952)
}
$method = ord ( substr ( $data, 2, 1 ) ); // Compression method
$flags = ord ( substr ( $data, 3, 1 ) ); // Flags
if ($flags & 31 != $flags) {
// Reserved bits are set -- NOT ALLOWED by RFC 1952
return null;
}
// NOTE: $mtime may be negative (PHP integer limitations)
$mtime = unpack ( "V", substr ( $data, 4, 4 ) );
$mtime = $mtime [1];
$xfl = substr ( $data, 8, 1 );
$os = substr ( $data, 8, 1 );
$headerlen = 10;
$extralen = 0;
$extra = "";
if ($flags & 4) {
// 2-byte length prefixed EXTRA data in header
if ($len - $headerlen - 2 return false; // Invalid format
}
$extralen = unpack ( "v", substr ( $data, 8, 2 ) );
$extralen = $extralen [1];
if ($len - $headerlen - 2 - $extralen return false; // Invalid format
}
$extra = substr ( $data, 10, $extralen );
$headerlen += 2 + $extralen;
}
$filenamelen = 0;
$filename = "";
if ($flags & 8) {
// C-style string file NAME data in header
if ($len - $headerlen - 1 return false; // Invalid format
}
$filenamelen = strpos ( substr ( $data, 8 + $extralen ), chr ( 0 ) );
if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 return false; // Invalid format
}
$filename = substr ( $data, $headerlen, $filenamelen );
$headerlen += $filenamelen + 1;
}
$commentlen = 0;
$comment = "";
if ($flags & 16) {
// C-style string COMMENT data in header
if ($len - $headerlen - 1 return false; // Invalid format
}
$commentlen = strpos ( substr ( $data, 8 + $extralen + $filenamelen ), chr ( 0 ) );
if ($commentlen === false || $len - $headerlen - $commentlen - 1 return false; // Invalid header format
}
$comment = substr ( $data, $headerlen, $commentlen );
$headerlen += $commentlen + 1;
}
$headercrc = "";
if ($flags & 1) {
// 2-bytes (lowest order) of CRC32 on header present
if ($len - $headerlen - 2 return false; // Invalid format
}
$calccrc = crc32 ( substr ( $data, 0, $headerlen ) ) & 0xffff;
$headercrc = unpack ( "v", substr ( $data, $headerlen, 2 ) );
$headercrc = $headercrc [1];
if ($headercrc != $calccrc) {
return false; // Bad header CRC
}
$headerlen += 2;
}
// GZIP FOOTER - These be negative due to PHP's limitations
$datacrc = unpack ( "V", substr ( $data, - 8, 4 ) );
$datacrc = $datacrc [1];
$isize = unpack ( "V", substr ( $data, - 4 ) );
$isize = $isize [1];
// Perform the decompression:
$bodylen = $len - $headerlen - 8;
if ($bodylen // This should never happen - IMPLEMENTATION BUG!
return null;
}
$body = substr ( $data, $headerlen, $bodylen );
$data = "";
if ($bodylen > 0) {
switch ($method) {
case 8 :
// Currently the only supported compression method:
$data = gzinflate ( $body );
break;
default :
// Unknown compression method
return false;
}
} else {
//...
}
if ($isize != strlen ( $data ) || crc32 ( $data ) != $datacrc) {
// Bad format! Length or CRC doesn't match!
return false;
}
return $data;
}
}

핫 AI 도구

Undresser.AI Undress
사실적인 누드 사진을 만들기 위한 AI 기반 앱

AI Clothes Remover
사진에서 옷을 제거하는 온라인 AI 도구입니다.

Undress AI Tool
무료로 이미지를 벗다

Clothoff.io
AI 옷 제거제

AI Hentai Generator
AI Hentai를 무료로 생성하십시오.

인기 기사

뜨거운 도구

메모장++7.3.1
사용하기 쉬운 무료 코드 편집기

SublimeText3 중국어 버전
중국어 버전, 사용하기 매우 쉽습니다.

스튜디오 13.0.1 보내기
강력한 PHP 통합 개발 환경

드림위버 CS6
시각적 웹 개발 도구

SublimeText3 Mac 버전
신 수준의 코드 편집 소프트웨어(SublimeText3)

뜨거운 주제









이번 장에서는 CakePHP의 환경 변수, 일반 구성, 데이터베이스 구성, 이메일 구성에 대해 알아봅니다.

PHP 8.4는 상당한 양의 기능 중단 및 제거를 통해 몇 가지 새로운 기능, 보안 개선 및 성능 개선을 제공합니다. 이 가이드에서는 Ubuntu, Debian 또는 해당 파생 제품에서 PHP 8.4를 설치하거나 PHP 8.4로 업그레이드하는 방법을 설명합니다.

CakePHP에서 데이터베이스 작업은 매우 쉽습니다. 이번 장에서는 CRUD(생성, 읽기, 업데이트, 삭제) 작업을 이해하겠습니다.

CakePHP는 PHP용 오픈 소스 프레임워크입니다. 이는 애플리케이션을 훨씬 쉽게 개발, 배포 및 유지 관리할 수 있도록 하기 위한 것입니다. CakePHP는 강력하고 이해하기 쉬운 MVC와 유사한 아키텍처를 기반으로 합니다. 모델, 뷰 및 컨트롤러 gu
