Do you ever think about scrapping google results ,,,
It is possible
TRy this
<?php
interface Search_Interface
{
public function setRecordLimit($recordLimit);
public function setSearchKeyword($searchKeyword);
public function getWebpageContent($pageUrl);
public function parseWebpage($content);
}
<?php
/*
************************************************** **************************
* @ Written by
smart_coder@yahoo.co.in
* @ Usage only for learning the concept
* @ Tested : under normal conditions
************************************************** **************************
*/
//error_reporting(E_ALL);
class Google_Engine implements Search_Interface
{
public $searchKeyword = null;
public $searchUrl = null;
public $pageContent = null;
public $linkText = array();
public $linkUrl = array();
public $maxRecord = 0;
public $googleUrl = null;
public $curlOptions = array();
public $googleLinks = array();
public $searchRecord = 0;
public $parseInfo = null;
/**
* @Constructor Method
* @Initialize the parameters
*/
function __construct($maxRecord,$searchKeyword)
{
$this->googleUrl = "http://www.google.co.in/search?q=keyword&hl=en&sa=N";
$this->curlOptions = array(
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_USERAGENT => "spider", // who am i
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
);
$this->setRecordLimit($maxRecord);
$this->setSearchKeyword($searchKeyword);
}
/**
* @Set Max number od records to be searched
* @Default will be zero
*/
function setRecordLimit($maxRecord)
{
if(!is_numeric($maxRecord))
{
$this->maxRecord = 0;
}
else if(empty($maxRecord))
{
$this->maxRecord = 0;
}
else
{
$this->maxRecord = $maxRecord;
}
}
/**
* @Set keyword to be searched
* @If not set default will be null
*/
function setSearchKeyword($searchKeyword)
{
if(empty($searchKeyword))
{
$this->searchKeyword = null;
}
else
{
$this->searchKeyword = urlencode($searchKeyword);
}
}
/*
* @Error Handler
*/
function raiseError($errorString)
{
trigger_error($errorString,E_USER_WARNING);
die("<br><b>Google Search Halted</b>");
}
/**
* @ Searching google search engine
*/
function searchGoogle()
{
if(empty($this->searchKeyword))
{
$this->raiseError("Search Keyword Empty");
}
else if($this->maxRecord==0)
{
$this->raiseError("Nothing to do: max record is zero");
}
else
{
$this->searchUrl = str_replace("keyword",$this->searchKeyword,$this->googleUrl);
//Loop through pages //
$pageLimit = ceil($this->maxRecord/10);
$this->parseInfo.= "\n-----Starting Scrap-----\n";
for($i=1;$i<=$pageLimit;$i++)
{
$this->parseInfo.= "\n\n-----Page $i-----\n\n";
$pageStart = ($i-1)*10;
$this->searchUrl = $this->searchUrl."&start=".$pageStart;
$this->getWebpageContent($this->searchUrl); //read the webpage //
$this->parseWebpage($this->pageContent); //page content //
}
$this->parseInfo.= "\n-----Records".$this->searchRecord."-----\n";
$this->parseInfo.= "\n-----End Scrap----\n";
//report scritp//
$fp = fopen("Log-".date("Y-m-d").".txt","a");
fwrite($fp,$this->parseInfo);
fclose($fp);
}
}
/**
* @ Crawl google result page
*/
function getWebpageContent($pageUrl)
{
$curlCon = curl_init($this->searchUrl);
curl_setopt_array( $curlCon, $this->curlOptions );
$pageContent = curl_exec( $curlCon );
$errNo = curl_errno( $curlCon );
$errMsg = curl_error( $curlCon );
$header = curl_getinfo( $curlCon );
curl_close( $curlCon );
if($errNo==0)
{
$this->pageContent = $pageContent; //content ready for parsing //
}
else
{
//connection error or curl not performed well//
$this->raiseError($errMsg);
}
}
/**
* @ Parsing webpage for result
* @ consider only main link-title-page description
*/
function parseWebpage($content)
{
if(empty($this->pageContent))
{
$this->raiseError("Webpage Content Empty");
}
else
{
$patternResult = "/<div[\sa-zA-Z0-9_.='\"]*class[\s]*=g[a-zA-Z0-9='\".]*/";
$matchItems = preg_split($patternResult,$this->pageContent);
for($i=0;$i<count($matchItems);$i++)
{
$mainUrl = null;
$mainText = null;
$matchUrl = array();
$partSplit = $matchItems[$i];
$urlPattern = "/a[\sa-zA-Z='\"._0-9]*href[\s]*=['\"]+([a-zA-Z0-9:\/._=\+\?\&#;\-\s]+)['\"]+[\sa-zA-Z0-9=+'\".]*class[\s]*=[\s\'\"0-9]*[l\s]+>([a-zA-Z0-9_.,><:;\/\s\-\|#\&]+[\s]*)[<\/]*a>/";
preg_match_all($urlPattern,$partSplit,$matchUrl);
$mainUrl = $matchUrl[1][0];
$mainText = strip_tags($matchUrl[2][0]);
$infoPattern= "/font[\s]*size[\s]*=[\s]*-1[\s]*>([a-zA-Z0-9:_.'\",;\>\<\/\-\s\&\?#'\"()\@\!\*\^\%]+)[\s\<]*span[\s]class/";
preg_match_all($infoPattern,$partSplit,$matchInfo) ;
$mainInfo = strip_tags($matchInfo[1][0]);
if(!empty($mainUrl)&&!empty($mainText))
{
$this->googleLinks[$this->searchRecord]['pageUrl'] = $mainUrl;
$this->googleLinks[$this->searchRecord]['pageTitle'] = $mainText;
$this->googleLinks[$this->searchRecord]['pageInfo'] = $mainInfo;
$this->searchRecord = $this->searchRecord+1;
}
if($this->searchRecord>$this->maxRecord)
{
break;
}
}
}
}
//end class//
}
?>
<?php
include("SearchInterface.php");
include("GoogleEngine.class.php");
$keyword = $_REQUEST['keyword'];
$limit = $_REQUEST['limit'];
if(empty($limit))
{
$limit=10;
}
if(!empty($keyword) && !empty($limit))
{
$googleObject = new Google_Engine($limit,$keyword);
$googleObject -> searchGoogle();
$links = $googleObject->googleLinks;
for($i=0;$i<count($links);$i++)
{
/*echo "<br>--------------------<br>";
echo $links[$i]['pageUrl'];
echo "<br>";
echo $links[$i]['pageTitle'];
echo "<br>";
echo $links[$i]['pageInfo'];*/
}
}
?>
<style type="text/css">
<!--
.style1 {
font-size: 24px;
color: #000099;
}
.style2 {
font-size: 16px;
font-weight: bold;
color: #666666;
}
.style3 {
font-size: 16px;
font-weight: bold;
color:#0000CC;
}
.style4 {
font-size: 16px;
font-weight:500;
color:#000000;
font-family:"Times New Roman", Times, serif;
}
-->
</style>
<table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr>
<td align="center"> </td>
</tr>
<tr>
<td height="24" align="center"><span class="style1">G-Search</span></td>
</tr>
<tr>
<td align="center"><form name="form1" method="post" action="">
<span class="style2">Search</span>
<input name="keyword" type="text" id="keyword" size="50" value="<?php echo $keyword?>">
<select name="limit" id="limit">
<option value="10">10</option>
<option value="20">20</option>
<option value="30">30</option>
<option value="40">40</option>
</select>
<input type="submit" name="Submit" value="Search">
</form>
</td>
</tr>
<tr>
<td align="center"> </td>
</tr>
<tr>
<td align="center"><table width="90%" border="0" cellspacing="0" cellpadding="0">
<?php
for($i=0;$i<count($links);$i++)
{
/*echo "<br>--------------------<br>";
echo $links[$i]['pageUrl'];
echo "<br>";
echo $links[$i]['pageTitle'];
echo "<br>";
echo $links[$i]['pageInfo'];*/
?>
<tr>
<td align="left" valign="top" class="style4"><a href="<?php echo $links[$i]['pageUrl']?>" class="style3" target="_blank"><?php
echo $links[$i]['pageTitle'];
?>
</a><br>
<?php
echo $links[$i]['pageInfo'];
?><br>
</td>
</tr>
<?php
}
if(count($links)==0 && $_POST['Submit'] && !empty($keyword))
{
?>
<tr>
<td align="left" valign="top">No results </td>
</tr>
<?php
}
?>
</table></td>
</tr>
<tr>
<td align="center"> </td>
</tr>
</table>
For more details
SmartCoderIn => Scripts and tutorials : We give online assistance
SmartCoderIn => Scripts and tutorials : We give online assistance
?>