Now, there must be some neat trick …
I’m parsing this table, row by row … no problem.
Until I come to a
How do I tell both rows that they should both take 6 in column 0 ??
| A |
6 |_________|
| B |
___|_________|
Target website: http://www.webbotsspidersscreenscrapers.com/DSP_targets.php
This script gives a MySQL error obviously, but it’s so you can see my thinking.
[php]
$product_array=array();
$product_count=0;
Download the target (store) web page
$target = “http://www.webbotsspidersscreenscrapers.com/DSP_targets.php”;
$web_page = http_get($target, “”);
Parse all the tables on the web page into an array
$table_array = parse_array($web_page[‘FILE’], “<table”, “”);
Look for the table that contains the product information
for($xx=0; $xx<count($table_array); $xx++)
{
$table_landmark = “Name”;
if(stristr($table_array[$xx], $table_landmark)) // Process this table
{
echo “FOUND: Product table\n”;
$product_row_array = parse_array($table_array[$xx], "<tr", "</tr>");
for($table_row=0; $table_row<count($product_row_array); $table_row++)
{
$heading_landmark = "Name";
if((stristr($product_row_array[$table_row], $heading_landmark)))
{
echo "FOUND: Table heading row\n";
$table_cell_array = parse_array($product_row_array[$table_row], "<td", "</td>");
for($heading_cell=0; $heading_cell<count($table_cell_array); $heading_cell++)
{
if(stristr(strip_tags(trim($table_cell_array[$heading_cell])), "chapter"))
$chapter_column = $heading_cell;
if(stristr(strip_tags(trim($table_cell_array[$heading_cell])), "name"))
$name_column = $heading_cell;
}
echo "FOUND: chapter_column=$chapter_column\n";
echo "FOUND: name_column=$name_column\n";
$heading_row = $table_row;
}
$ending_landmark = "Nasa Viking";
if((stristr($product_row_array[$table_row], $ending_landmark)))
{
echo "PARSING COMPLETE!\n";
break;
}
if(isset($heading_row) && $heading_row<$table_row)
{
$table_cell_array = parse_array($product_row_array[$table_row], "<td", "</td>");
$product_array[$product_count]['Chapter'] =
strip_tags(trim($table_cell_array[$chapter_column]));
$product_array[$product_count]['Name'] =
strip_tags(trim($table_cell_array[$name_column]));
$product_count++;
echo "PROCESSED: Item#$product_count\n";
}
}
}
}
for($xx=0; $xx<count($product_array); $xx++)
{
echo "$xx. ";
echo "Chapter: ".$product_array[$xx]['chapter'].", ";
echo "Name: ".$product_array[$xx]['name'].", ";
scraperwiki::save(array('name', 'chapter'), $product_array);
}
?>[/php]