So to please my boss I wrote up a quick script to grab all the links for postings on CL and other like sites so he won’t have to check himself, just his email, so I did my best and he was happy it worked in the end, but my problem is I am still an amateur with php so when it is run it cost the server huge processing times longer then 60 seconds! I know there is a way to fix the code to make it run faster but I don’t want to change anything for fear of it breaking and my boss knocking on my door.
[php]<?php session_start();
if(!ISSET($_SESSION[‘counts’])){
$_SESSION[‘counts’] = 0;
}
//time how long it takes to excute Part A
$mtime = microtime();
$mtime = explode(" ",$mtime);
$mtime = $mtime[1] + $mtime[0];
$starttime = $mtime;
//end
//your database details
$dbservertype=‘mysql’;
$servername=‘localhost’;
$dbusername=‘root’;
$dbpassword=’’;
$dbname=‘postings’;
//////////////////////////////////////
/**
- Connect to the mysql database.
*/
$conn = mysql_connect($servername, $dbusername, $dbpassword) or die(mysql_error());
mysql_select_db($dbname, $conn) or die(mysql_error());
//now lets spider the webpage and look for new post
//put some details in
$url = “http://someotherpostsite.com/post/exmaples”;
$input = @file_get_contents($url) or die(“Could not access file: $url”);
$regexp = “<a\s[^>]href=("??)([^" >]?)\1[^>]>(.)</a>”;
$link_num = 0;
$done = 0;
//details done
A:
if(preg_match_all("/$regexp/siU", $input, $matches, PREG_SET_ORDER)) {
foreach($matches as $match) {
// $match[2] = link address
// $match[3] = link text
$check_url=mysql_query(“SELECT auto_id FROM urls WHERE url=’{$match[2]}’”);
$url_db=mysql_fetch_array($check_url);
if ($url_db[‘auto_id’] > 0 ){}
else {
$link_num++;
$time = date(‘l jS \of F Y h:i:s A’);
mysql_query(“INSERT INTO urls (url, time_stamp) VALUES (’{$match[2]}’, ‘{$time}’)”);
$mail_content = @file_get_contents($match[2]);
//now mail it
$to = "[email protected]";
if ($done == 1){
$from = “Second site:”;
}
else {
$from = “First site:”;
}
$subject = “{$from} {$match[3]}”;
$headers = ‘MIME-Version: 1.0’ . “\r\n”;
$headers .= ‘Content-type: text/html; charset=iso-8859-1’ . “\r\n”;
$headers .= “From: [email protected]\r\n” . “X-Mailer: php”;
mail($to, $subject, $mail_content, $headers);
}
}
}
if ($done == 1){
goto B;
}
//now do the 2ad listing
$url = “http://somesitewithpost.com/post/examples”;
$input = @file_get_contents($url) or die(“Could not access file: $url”);
$done = 1;
//done
goto A;
B:
$_SESSION[‘counts’] = $_SESSION[‘counts’] + $link_num;
//show time it took to excute PART B
$mtime = microtime();
$mtime = explode(" ",$mtime);
$mtime = $mtime[1] + $mtime[0];
$endtime = $mtime;
$totaltime = ($endtime - $starttime);
echo “All Links crawled, checked, mailed in {$totaltime} seconds with {$link_num} inserted into database
{$_SESSION[‘counts’]} added this session.”;
//end
?>[/php]