Hi,
I don’t know much php first of all. The script was working until google changed something late last year. I got an updated script and finally got it to work then last week a cron job ran it and all domains showed zero for PR.
I got a new script and still getting zero for PR. Below is the script. I put the url value inside the script instead of calling including from another file to make it easier to troubleshoot. The only error I receive is: Notice: Undefined variable: pagerank in /home8/xxx/public_html/site/pagerank4.php on line 282
This is the line where I echo the pagerank value to the screen.
Thanks for any help.
Gibs
[php]<?php
ini_set(“display_errors”,“1”);
ERROR_REPORTING(E_ALL);
$url=‘www.abc.com’;
/**
- File: GooglePR.class.php
- Google PageRank checker. Implements change on Google PageRank API around October 2011.
- Added bulk check which finds out domain and actual PageRank with extra site availability check.
- Bulk check takes longer to execute because of double requests, but gives more relevant data about sites being checked.
- Usage:
- include ‘GooglePR.class.php’;
- site exists: echo GooglePR::exists($url);
- simple check: echo GooglePR::get($url);
- link count check: echo GooglePR::links($url);
- bulk check: print_r(GooglePR::bulk($urls));
- @source Original source and credits to this forge goes to: mmncs.com/2011/10/how-to-create-your-own-google-pagerank-checker-using-php-updated-pagerank-query-url-php-code/
-
@version 0.2
*/
class GooglePR
{
// Google PageRank API check entry point
public $google_pr_api = ‘http://toolbarqueries.google.com/tbr?client=navclient-auto&ch=%s&features=Rank&q=info:%s&num=100&filter=0’;
// timeout for curl requests too long. see init_curl
public $timeout;
// GooglePR singleton instance
private static $instance;
// static handler for getting pr string for url
function get($url, $timeout = 10) {
if(!isset(self::$instance)) {
self::$instance = new GooglePR($timeout);
}
return trim(substr(self::$instance->get_pr($url), 9));
}
// static handler for getting prs in bulk.
// timeout is set to 5 to keep some speed up on the process
// optional debug parameter print results on screen and log file prints
// results to the log file
function bulk($urls, $timeout = 5, $debug = FALSE, $log_file = NULL) {
if(!isset(self::$instance)) {
self::$instance = new GooglePR($timeout);
}
return self::$instance->bulk_pr_check($urls, $debug, $log_file);
}
// static handler for checking page existence
function exists($url, $timeout = 10) {
if(!isset(self::$instance)) {
self::$instance = new GooglePR($timeout);
}
return self::$instance->site_exists($url);
}
// static handler for getting page outbound links count
function links($url, $timeout = 5) {
if(!isset(self::$instance)) {
self::$instance = new GooglePR($timeout);
}
return self::$instance->get_outbound_links($url);
}
// class constructor with timeout argument.
// using 10 seconds to keep flow smooth instead of default 30 that might be set by your system
function __construct($timeout = 10) {
$this->timeout = $timeout;
}
// give an array of urls to check. returns 1. url, 2. host / domain pr,
// 3. page pr and 4. outbound links count in associative array
// uses log file to store and restore results which is particularly useful
// if urls list is long so you dont want to start all from beginning if
// script execution is interfered by yourself or other party
function bulk_pr_check($urls, $debug = FALSE, $log_file = NULL) {
$prs = array();
# set up log content if enabled
if ($log_file) {$log_content = @file_get_contents($log_file);}
# loop over all urls
foreach ($urls as $r => $url) {
# if log is already checked, then skip url
if ($log_file && strpos($log_content, $url.' ') !== FALSE) continue;
# initialize vars
$pagerank = $hostrank = $outbound = 'na';
$url_info = parse_url($url);
$host = $url_info['host'];
# first check if site / host exists
if ($this->site_exists($host)) {
# strip pr from return string
$hostrank = trim(substr($this->get_pr($host), 9));
$has_path = (isset($url_info['path']) && $url_info['path'] != '' && $url_info['path'] != '/') || isset($url_info['query']);
# before next possible pr check retrieve outbound links count.
# this should randomize requests to google server thus not producing too much overload and trigger IP ban/captcha verification
if (($count = $this->get_outbound_links($url)) !== FALSE) {
$outbound = $count;
}
# if path doesnt exist, then host and page prs are same
if (!$has_path) {
$pagerank = $hostrank;
} elseif ($this->site_exists($url)) {
$pagerank = trim(substr($this->get_pr($url), 9));
}
}
# create log line and save log content if enabled
$log_line = "$url [$hostrank] [$pagerank] [$outbound]".PHP_EOL;
if ($log_file) {
$log_content .= $log_line;
# be sure there is write permission to the directory and file used for logging
@file_put_contents($log_file, $log_content);
}
# debug for console. useful for long url lists
if ($debug) echo $log_line;
# set up return array
$prs[$r]['url'] = $url;
$prs[$r]['host_pr'] = $hostrank;
$prs[$r]['page_pr'] = $pagerank;
$prs[$r]['outbound'] = $outbound;
}
return $prs;
}
// init curl resource handle
function init_curl($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
if ($this->timeout > 0) curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
return $ch;
}
// dummy callback method for site exists check
function curl_header_callback($ch, $header) {}
// check if site exists
// TODO: http://dmbf4.th8.us/ redirect and exists after that! CURLOPT_FOLLOWLOCATION?
function site_exists($url) {
$ch = $this->init_curl($url);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, 'curl_header_callback'));
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
# follow only 5 redirects at max
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
curl_exec($ch);
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
return !($code != 200 && $code != 302 && $code != 304);
}
// get pagerank
function get_pr($url) {
global $google_pr_api;
$checksum = $this->check_hash($this->create_hash($url));
$url = sprintf($this->google_pr_api, $checksum, urlencode($url));
$ch = $this->init_curl($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
# http referer is possibly found if script is used from browser
curl_setopt($ch, CURLOPT_REFERER, @$_SERVER['HTTP_REFERER']);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; GoogleToolbar 2.0.114-big; Windows XP 5.1)');
$response = curl_exec($ch);
curl_close($ch);
return $response;
}
// get page content
function get_content($url) {
$ch = $this->init_curl($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
# follow only 5 redirects at max
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
$response = curl_exec($ch);
curl_close($ch);
return $response;
}
// get outbound links stat for given page
function get_outbound_links($url) {
# if content is not found, return FALSE
# TODO: you may want to check returned content type is correct too ie. html or xhtml or ?
if (!($content = $this->get_content($url))) return FALSE;
# find all anchor links from content
$regexp = '<a\s[^>]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>';
preg_match_all("/$regexp/siU", $content, $match, PREG_SET_ORDER);
$parsed_url = parse_url($url);
# counter for links
$count = 0;
# allow only http and https schemes, not mailto: feed: or other
$allowed_schemes = array('http', 'https');
# NOTE: this counts outbounding hosts, not individual pages
$added_hosts = array();
foreach ($match as $m) {
# some links may have '' wrappers because people tend to write bad html
if ($matched_url = trim($m[2], "'")) {
# get host for outbound link
$parsed_matched_url = parse_url($matched_url);
# if no host OR link host is same as url host OR item already handled OR scheme
# is anything else than http/https skip host
if (($parsed_matched_host = @$parsed_matched_url['host']) &&
$parsed_url['host'] != $parsed_matched_host &&
!in_array($parsed_matched_host, $added_hosts) &&
in_array($parsed_matched_url['scheme'], $allowed_schemes)) {
$added_hosts[] = $parsed_matched_host;
$count++;
}
}
}
return $count;
}
// convert string to a number
function strtonmbr($string, $check, $magic) {
$int32 = 4294967296;
$length = strlen($string);
for ($i = 0; $i < $length; $i++) {
$check *= $magic;
if ($check >= $int32) {
$check = ($check - $int32 * (int) ($check / $int32));
$check = ($check < -($int32 / 2)) ? ($check + $int32) : $check;
}
$check += ord($string{$i});
}
return $check;
}
// create a url hash
function create_hash($string) {
$check1 = $this->strtonmbr($string, 0x1505, 0x21);
$check2 = $this->strtonmbr($string, 0, 0x1003F);
$factor = 4;
$halfFactor = $factor/2;
$check1 >>= $halfFactor;
$check1 = (($check1 >> $factor) & 0x3FFFFC0 ) | ($check1 & 0x3F);
$check1 = (($check1 >> $factor) & 0x3FFC00 ) | ($check1 & 0x3FF);
$check1 = (($check1 >> $factor) & 0x3C000 ) | ($check1 & 0x3FFF);
$calc1 = (((($check1 & 0x3C0) << $factor) | ($check1 & 0x3C)) << $halfFactor ) | ($check2 & 0xF0F );
$calc2 = (((($check1 & 0xFFFFC000) << $factor) | ($check1 & 0x3C00)) << 0xA) | ($check2 & 0xF0F0000 );
return ($calc1 | $calc2);
}
// create checksum for hash
function check_hash($hashNumber) {
$check = $flag = 0;
$hashString = sprintf('%u', $hashNumber);
for ($i = strlen($hashString) - 1; $i >= 0; $i --) {
$r = $hashString{$i};
if (1 === ($flag % 2)) {
$r += $r;
$r = (int)($r / 10) + ($r % 10);
}
$check += $r;
$flag++;
}
$check %= 10;
if (0 !== $check) {
$check = 10 - $check;
if (1 === ($flag % 2) ) {
if (1 === ($check % 2)) {
$check += 9;
}
$check >>= 1;
}
}
return '7'.$check.$hashString;
}
}
echo "checking page rank";
echo "<br>";
echo $url;
echo " PR ";
echo $pagerank;
?>[/php]