Script to cach youtube new storeurl.pl



  • #!/usr/bin/perl
    # ==========================================================================
    # $Rev$
    # by chudy_fernandez@yahoo.com
    # Updates at http://wiki.squid-cache.org/ConfigExamples/DynamicContent/YouTube/Discussion
    # ==========================================================================
    $|=1;
    while (<>) {
     @X = split;
    # $X[1] =~ s/&sig=.*//;
     $x = $X[0] . " ";
     $_ = $X[1];
     $u = $X[1];
    # ==========================================================================
    # Speedtest
    # ==========================================================================
    #if (m/^http:\/\/(.*)\/speedtest\/(.*\.(jpg|txt))\?(.*)/) {
    # print $x . "http://www.speedtest.net.SQUIDINTERNAL/speedtest/" . $2 . "\n";
    # ==========================================================================
    # Mediafire
    # ==========================================================================
    #} elseif
    if (m/^http:\/\/199\.91\.15\d\.\d*\/\w{12}\/(\w*)\/(.*)/) {
     print $x . "http://www.mediafire.com.SQUIDINTERNAL/" . $1 ."/" . $2 . "\n";
    # ==========================================================================
    # Fileserve
    # ==========================================================================
    } elsif (m/^http:\/\/fs\w*\.fileserve\.com\/file\/(\w*)\/[\w-]*\.\/(.*)/) {
     print $x . "http://www.fileserve.com.SQUIDINTERNAL/" . $1 . "./" . $2 . "\n";
    # ==========================================================================
    # Filesonic
    # ==========================================================================
    } elsif (m/^http:\/\/s[0-9]*\.filesonic\.com\/download\/([0-9]*)\/(.*)/) {
     print $x . "http://www.filesonic.com.SQUIDINTERNAL/" . $1 . "\n";
    # ==========================================================================
    # 4shared
    # ==========================================================================
    } elsif (m/^http:\/\/[a-zA-Z]{2}\d*\.4shared\.com(:8080|)\/download\/(.*)\/(.*\..*)\?.*/) {
     print $x . "http://www.4shared.com.SQUIDINTERNAL/download/$2\/$3\n";
    # ==========================================================================
    # 4shared preview
    # ==========================================================================
    } elsif (m/^http:\/\/[a-zA-Z]{2}\d*\.4shared\.com(:8080|)\/img\/(\d*)\/\w*\/dlink__2Fdownload_2F(\w*)_3Ftsid_3D[\w-]*\/preview\.mp3\?sId=\w*/) {
     print $x . "http://www.4shared.com.SQUIDINTERNAL/$2\n";
    # ==========================================================================
    # Photos-X.ak.fbcdn.net where X a-z
    # ==========================================================================
    } elsif (m/^http:\/\/photos-[a-z](\.ak\.fbcdn\.net)(\/.*\/)(.*\.jpg)/) {
     print $x . "http://photos" . $1 . "/" . $2 . $3 . "\n";
    # ==========================================================================
    # YX.sphotos.ak.fbcdn.net where X 1-9, Y a-z
    # ==========================================================================
    } elsif (m/^http:\/\/[a-z][0-9]\.sphotos\.ak\.fbcdn\.net\/(.*)\/(.*)/) {
     print $x . "http://photos.ak.fbcdn.net/" . $1 ."/". $2 . "\n";
    # ==========================================================================
    # maps.google.com
    # ==========================================================================
    } elsif (m/^http:\/\/(cbk|mt|khm|mlt|tbn)[0-9]?(.google\.co(m|\.uk|\.id).*)/) {
     print $x . "http://" . $1 . $2 . "\n";
    # ==========================================================================
    # Compatibility for old cached get_video?video_id
    # ==========================================================================
    } elsif (m/^http:\/\/([0-9.]{4}|.*\.youtube\.com|.*\.googlevideo\.com|.*\.video\.google\.com).*?(videoplayback\?id=.*?|video_id=.*?)\&(.*?)/) {
     $z = $2; $z =~ s/video_id=/get_video?video_id=/;
     print $x . "http://video-srv.youtube.com.SQUIDINTERNAL/" . $z . "\n";
    # ==========================================================================
    # Youtube fix
    # ==========================================================================
    } elsif (m/^http:\/\/([0-9.]{4}|.*\.youtube\.com|.*\.googlevideo\.com|.*\.video\.google\.com)\/videoplayback\?(.*)/) {
     $p_str = $2;
     $tag = "";
     $alg = "";
     $id = "";
     $range = "";
     if ($p_str =~ m/(itag=[0-9]*)/){$tag = "&".$1}
     if ($p_str =~ m/(algorithm=[a-z]*\-[a-z]*)/){$alg = "&".$1}
     if ($p_str =~ m/(id=[a-zA-Z0-9]*)/){$id = "&".$1}
     if ($p_str =~ m/(range=[0-9\-]*)/){$range = "&".$1; $range =~ s/-//; $range =~ s/range=//; }
     print $x . "http://video-srv.youtube.com.SQUIDINTERNAL/" . $tag . "&" . $alg . "&" . $id . "&" . $range . "\n";
    # ==========================================================================
    # Google Analytic
    # ==========================================================================
    } elsif (m/^http:\/\/www\.google-analytics\.com\/__utm\.gif\?.*/) {
     print $x . "http://www.google-analytics.com/__utm.gif\n";
    # ==========================================================================
    # Cache High Latency Ads
    # ==========================================================================
    } elsif (m/^http:\/\/([a-z0-9.]*)(\.doubleclick\.net|\.quantserve\.com|\.googlesyndication\.com|yieldmanager|cpxinteractive)(.*)/) {
     $y = $3;$z = $2;
     for ($y) {
     s/pixel;.*/pixel/;
     s/activity;.*/activity/;
     s/(imgad[^&]*).*/\1/;
     s/;ord=[?0-9]*//;
     s/;×tamp=[0-9]*//;
     s/[&?]correlator=[0-9]*//;
     s/&cookie=[^&]*//;
     s/&ga_hid=[^&]*//;
     s/&ga_vid=[^&]*//;
     s/&ga_sid=[^&]*//;
     # s/&prev_slotnames=[^&]*//
     # s/&u_his=[^&]*//;
     s/&dt=[^&]*//;
     s/&dtd=[^&]*//;
     s/&lmt=[^&]*//;
     s/(&alternate_ad_url=http%3A%2F%2F[^(%2F)]*)[^&]*/\1/;
     s/(&url=http%3A%2F%2F[^(%2F)]*)[^&]*/\1/;
     s/(&ref=http%3A%2F%2F[^(%2F)]*)[^&]*/\1/;
     s/(&cookie=http%3A%2F%2F[^(%2F)]*)[^&]*/\1/;
     s/[;&?]ord=[?0-9]*//;
     s/[;&]mpvid=[^&;]*//;
     s/&xpc=[^&]*//;
     # yieldmanager
     s/\?clickTag=[^&]*//;
     s/&u=[^&]*//;
     s/&slotname=[^&]*//;
     s/&page_slots=[^&]*//;
    }
     print $x . "http://" . $1 . $2 . $y . "\n";
    # ==========================================================================
    # Cache high latency ads
    # ==========================================================================
    } elsif (m/^http:\/\/(.*?)\/(ads)\?(.*?)/) {
     print $x . "http://" . $1 . "/" . $2 . "\n";
    # ==========================================================================
    # spicific servers starts here....
    # ==========================================================================
    } elsif (m/^http:\/\/(www\.ziddu\.com.*\.[^\/]{3,4})\/(.*?)/) {
     print $x . "http://" . $1 . "\n";
    # ==========================================================================
    # cdn, varialble 1st path
    # ==========================================================================
    } elsif (($u =~ /filehippo/) && (m/^http:\/\/(.*?)\.(.*?)\/(.*?)\/(.*)\.([a-z0-9]{3,4})(\?.*)?/)) {
     @y = ($1,$2,$4,$5);
     $y[0] =~ s/[a-z0-9]{2,5}/cdn./;
     print $x . "http://" . $y[0] . $y[1] . "/" . $y[2] . "." . $y[3] . "\n";
    # ==========================================================================
    # Rapidshare
    # ==========================================================================
    } elsif (($u =~ /rapidshare/) && (m/^http:\/\/(([A-Za-z]+[0-9-.]+)*?)([a-z]*\.[^\/]{3}\/[a-z]*\/[0-9]*)\/(.*?)\/([^\/\?\&]{4,})$/)) {
     print $x . "http://cdn." . $3 . "/SQUIDINTERNAL/" . $5 . "\n";
    } elsif (($u =~ /maxporn/) && (m/^http:\/\/([^\/]*?)\/(.*?)\/([^\/]*?)(\?.*)?$/)) {
     print $x . "http://" . $1 . "/SQUIDINTERNAL/" . $3 . "\n";
    # ==========================================================================
    # like porn hub variables url and center part of the path, filename etention 3 or 4 with or without ? at the end
    # ==========================================================================
    } elsif (($u =~ /tube8|pornhub|xvideos/) && (m/^http:\/\/(([A-Za-z]+[0-9-.]+)*?(\.[a-z]*)?)\.([a-z]*[0-9]?\.[^\/]{3}\/[a-z]*)(.*?)((\/[a-z]*)?(\/[^\/]*){4}\.[^\/\?]{3,4})(\?.*)?$/)) {
     print $x . "http://cdn." . $4 . $6 . "\n";
    #...spicific servers end here.
    # ==========================================================================
    # Photos-X.ak.fbcdn.net where X a-z
    # ==========================================================================
    } elsif (m/^http:\/\/photos-[a-z].ak.fbcdn.net\/(.*)/) {
     print $x . "http://photos.ak.fbcdn.net/" . $1 . "\n";
    # ==========================================================================
    # For yimg.com video
    # ==========================================================================
    } elsif (m/^http:\/\/(.*yimg.com)\/\/(.*)\/([^\/\?\&]*\/[^\/\?\&]*\.[^\/\?\&]{3,4})(\?.*)?$/) {
     print $x . "http://cdn.yimg.com//" . $3 . "\n";
    # ==========================================================================
    # For yimg.com doubled
    # ==========================================================================
    } elsif (m/^http:\/\/(.*?)\.yimg\.com\/(.*?)\.yimg\.com\/(.*?)\?(.*)/) {
     print $x . "http://cdn.yimg.com/" . $3 . "\n";
    # ==========================================================================
    # For yimg.com with &sig=
    # ==========================================================================
    } elsif (m/^http:\/\/(.*?)\.yimg\.com\/(.*)/) {
     @y = ($1,$2);
     $y[0] =~ s/[a-z]+[0-9]+/cdn/;
     $y[1] =~ s/&sig=.*//;
     print $x . "http://" . $y[0] . ".yimg.com/" . $y[1] . "\n";
    # ==========================================================================
    # Youjizz. We use only domain and filename
    # ==========================================================================
    } elsif (($u =~ /media[0-9]{2,5}\.youjizz/) && (m/^http:\/\/(.*)(\.[^\.\-]*?\..*?)\/(.*)\/([^\/\?\&]*)\.([^\/\?\&]{3,4})((\?|\%).*)?$/)) {
     @y = ($1,$2,$4,$5);
     $y[0] =~ s/(([a-zA-A]+[0-9]+(-[a-zA-Z])?$)|(.*cdn.*)|(.*cache.*))/cdn/;
     print $x . "http://" . $y[0] . $y[1] . "/" . $y[2] . "." . $y[3] . "\n";
    # ==========================================================================
    # General purpose for cdn servers. add above your specific servers.
    # ==========================================================================
    } elsif (m/^http:\/\/([0-9.]*?)\/\/(.*?)\.(.*)\?(.*?)/) {
     print $x . "http://squid-cdn-url//" . $2 . "." . $3 . "\n";
    # ==========================================================================
    # Generic http://variable.domain.com/path/filename."ex" "ext" or "exte" with or withour "? or %"
    # ==========================================================================
    } elsif (m/^http:\/\/(.*)(\.[^\.\-]*?\..*?)\/(.*)\.([^\/\?\&]{2,4})((\?|\%).*)?$/) {
     @y = ($1,$2,$3,$4);
     $y[0] =~ s/(([a-zA-A]+[0-9]+(-[a-zA-Z])?$)|(.*cdn.*)|(.*cache.*))/cdn/;
     print $x . "http://" . $y[0] . $y[1] . "/" . $y[2] . "." . $y[3] . "\n";
    # ==========================================================================
    # generic http://variable.domain.com/...
    # ==========================================================================
    } elsif (m/^http:\/\/(([A-Za-z]+[0-9-]+)*?|.*cdn.*|.*cache.*)\.(.*?)\.(.*?)\/(.*)$/) {
     print $x . "http://cdn." . $3 . "." . $4 . "/" . $5 . "\n";
    # ==========================================================================
    # spicific extention that ends with ?
    # ==========================================================================
    } elsif (m/^http:\/\/(.*?)\/(.*?)\.(jp(e?g|e|2)|gif|png|tiff?|bmp|ico|flv|on2)(.*)/) {
     print $x . "http://" . $1 . "/" . $2 . "." . $3 . "\n";
    # ==========================================================================
    # all that ends with ;
    # ==========================================================================
    } elsif (m/^http:\/\/(.*?)\/(.*?)\;(.*)/) {
     print $x . "http://" . $1 . "/" . $2 . "\n";
    } else {
     print $x . $_ . "sucks\n";
    }
    }
    
    

    and new  include.conf

    # $Rev$
    
    #modif1 27 des 12
    acl range url_regex -i .*youtube.com/videoplayback.*range=.*$
    http_access deny range
    #end modif1
    
    acl store_rewrite_list urlpath_regex /(get_video|videoplayback?id|videoplayback.*id) .(jp(e?g|e|2)|gif|png|tiff?|bmp|ico|flv|wmv|3gp|mp(4|3)|exe|msi|zip|on2|mar|swf|fid)?
    acl store_rewrite_list_domain url_regex ^http://([a-zA-Z-]+[0-9-]+).[A-Za-z]*.[A-Za-z]*
    acl store_rewrite_list_domain url_regex (([a-z]{1,2}[0-9]{1,3})|([0-9]{1,3}[a-z]{1,2})).[a-z]*[0-9]?.[a-z]{3}
    acl store_rewrite_list_path urlpath_regex .(jp(e?g|e|2)|gif|png|tiff?|bmp|ico|flv|avc|zip|mp3|3gp|rar|on2|mar|exe)$
    acl store_rewrite_list_domain_CDN url_regex (khm|mt)[0-9]?.google.com streamate.doublepimp.com.*.js? photos-[a-z].ak.fbcdn.net .rapidshare.com.*/[0-9]*/.*/[^/]* ^http://(www.ziddu.com.*.[^/]{3,4})/(.*) .doubleclick.net.* yieldmanager cpxinteractive ^http://[.a-z0-9]*.photobucket.com.*.[a-z]{3}$ quantserve.com
    
    #acl rapidurl url_regex .rapidshare.com.*/[0-9]*/[0-9]*/[^/]*
    #acl video urlpath_regex .((mpeg|ra?m|avi|mp(g|e|4)|mov|divx|asf|qt|wmv|mdv|rv|vob|asx|ogm|flv|3gp)(?.*)?)$ (get_video?|videoplayback?|videodownload?|.flv(?.*)?)
    #acl html url_regex .((html|htm|php|js|css|aspx)(?.*)?)$ .com/$ .com$
    #acl images urlpath_regex .((jp(e?g|e|2)|gif|png|tiff?|bmp|ico)(?.*)?)$
    
    acl dontrewrite url_regex redbot.org (get_video|videoplayback?id|videoplayback.*id).*begin=[1-9][0-9]* .php? threadless.*.jpg?r=
    acl getmethod method GET
    
    storeurl_access deny dontrewrite
    storeurl_access deny !getmethod
    storeurl_access allow store_rewrite_list_domain_CDN
    storeurl_access allow store_rewrite_list
    storeurl_access allow store_rewrite_list_domain store_rewrite_list_path
    storeurl_access deny all
    storeurl_rewrite_program /usr/local/etc/squid/storeurl.pl
    storeurl_rewrite_children 1
    storeurl_rewrite_concurrency 99
    
    acl snmppublic snmp_community public
    cachemgr_passwd none config reconfigure
    
    #work around for fragment videos of msn
    acl msnvideo url_regex QualityLevel.*Fragment
    http_access deny msnvideo
    
    #always_direct allow html
    #cache_peer localhost parent 4001 0 carp login=PASS name=backend-1
    max_stale 10 years
    
    include /usr/local/etc/squid/refresh.conf
    
    #acl shoutcast rep_header X-HTTP09-First-Line ^ICY.[0-9]
    #upgrade_http0.9 deny shoutcast
    acl apache rep_header Server ^Apache
    broken_vary_encoding allow apache
    
    #read_ahead_gap 0 KB
    
    #ie_refresh on
    reload_into_ims on
    
    strip_query_terms off
    deny_info TCP_RESET localnet
    negative_dns_ttl 1 second
    negative_ttl 1 second
    snmp_port 3401
    snmp_access allow snmppublic all
    maximum_single_addr_tries 2
    retry_on_error on
    n_aiops_threads 64
    #request_header_max_size 128 KB
    #reply_header_max_size 128 KB
    #range_offset_limit 10 MB
    vary_ignore_expire on
    #client_db off # this needs to be on for acl maxconn to work
    ipcache_size 4096
    fqdncache_size 20
    #tcp_recv_bufsize 64 KB
    pipeline_prefetch on
    #half_closed_clients off
    
    # 0x10 no delay, 0x08 throughput, 0x04 reliability
    # 0x10       10000    (minimize delay)           Use delay metric
    # 0x08       01000    (maximize throughput)      Use default metric
    # 0x04       00100    (maximize reliability)     Use reliability metric
    # 0x02       00010    (minimize monetary cost)   Use cost metric
    # dscp squidtos+ECN
    # 56 0xE0 11100000
    # 48 0xc0 11000000
    # 08 0x20 00100000
    # 32 0x80 10000000
    # 16 0x40 01000000
    #tcp_outgoing_tos 0x03 video
    #tcp_outgoing_tos 0xb8 html
    #tcp_outgoing_tos 0x20 images
    #tcp_outgoing_tos 0x02 all
    
    #zph_mode tos
    #zph_local 0xb8
    #zph_parent 0x08
    
    #acl monitor url_regex avira
    #logformat chudy %ts.tu %6tr %>a %Ss/Hs %<st %rm="" %ru="" %mt="" http%rv="" rq[%="">h] Rp[%<h]<br>#access_log /var/squid/log/access2.log chudy monitor
    
    #buffered_logs on
    #download_fastest_client_speed on
    #acl text rep_header Content-Type -i text/
    #acl hit rep_header X-Cache -i hit
    #acl partial rep_header Content-Range .*
    #log_access deny partial
    #log_access deny php
    #log_access deny text
    #log_access deny hit
    #log_access deny html
    #log_access deny !getmethod
    high_page_fault_warning 50
    #log_access deny manager</h]<br></st>
    

    iam realy tried that cods and really cached youtube
    but i need to cach mp3 mp4 .exe all files on interner
    can any one help that


Log in to reply