Location: PHPKode > scripts > Search results retriever > class.httpRetriever.php
<?php
/*
    php httpRetriever for PubMed, jStor, Amazon and Google
    Copyright (C) 2010 Pierre-Luc Germain (plger)
    hide@address.com

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License (version 3) as 
    published by the Free Software Foundation.

	Excluded from the restrictions of this license is non-original code 
	that is already published under another license (BSD, for instance).

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    See http://www.gnu.org/licenses/ for the complete license.

*/


function createHttpRequest($type=false) {
	$retriever = false;
	switch($type){
		case 'amazon':
			require_once('class.amazonRetriever.php');
			$retriever = new amazonRetriever();
			break;
		case 'jstor':
			require_once('class.jstorRetriever.php');
			$retriever = new jstorRetriever();
			break;
		case 'pubmed':
			require_once('class.pubmedRetriever.php');
			$retriever = new pubmedRetriever();
			break;
		case 'google':
			require_once('JSON.phps');
			require_once('class.googleRetriever.php');
			$retriever = new googleRetriever();
			break;
		case 'geocode':
			require_once('JSON.phps');
			require_once('class.geocodeRetriever.php');
			$retriever = new geocodeRetriever();
			break;
		default:
			$retriever = new httpRetriever();
			break;
	}
	return $retriever;		
}

class httpRetriever {
	private $debug_log = array();
	private $has_errors = false;
	public $preferential_get_method = 'curl';
	public $referer = false;
	public $resultCount = false;
	public $params = array();
	public $allowed_params = false;
	
	public function help(){
		echo '<pre>Sample usage:
$retriever = createHttpRequest();
$pageContent = $retriever->fetch_results($url);

For platform-specific retrievers, you can also use the standardized functions:
$retriever->setQueryParam($value);
$results = $retriever->fetch_results();
$results = $retriever->standardize($results);
</pre>';
	}
	
	public function get_result_count(){
		return $this->resultCount;
	}
	
	public function fetch_results($url){
		return $this->retrieve_page($url);
	}
	
	public function getAllowedParams($print = true){
		if($print)	echo 'Allowed params: '.implode(', ',$this->allowed_params);
		return $this->allowed_params;
	}
	
	public function setParams($params){
		foreach($params as $key=>$value)	$this->setParam($key, $value);
	}
	public function setParam($param, $value){
		if(!$this->allowed_params || in_array($param, $this->allowed_params)){
			$this->params[$param] = $value;
			return true;
		}else{
			$this->doDebug(false,'Invalid parameter.', 'Parameter "'.$param.'" is not registered', __FILE__,__LINE__);
			return false;
		}
	}
	public function getParams($print = true){
		if($print){
			echo '<pre>Params: ';
			print_r($this->params);
			echo '</pre>';
		}
		return $this->params;		
	}
	
	public function doDebug($error, $title, $details=false, $file=false, $line=false){
		array_push($this->debug_log, array($error, $title, $details, $file?basename($file):false, $line));
		if($error)	$this->has_errors = true;
	}
	
	public function hasErrors(){
		return $this->has_errors;
	}
	
	public function getLogArray(){
		return $this->debug_log;
	}
	
	public function getLog(){
		$output = '';
		foreach($this->debug_log as $entry){
			$output .= '<div '.($entry[0]?'style="color: red;"':'').'>';
			if($entry[3])	$output .= '<span style="float: right;">'.$entry[3].($entry[4]?', line '.$entry[4]:'').'</span>';
			$output .= '<b>'.$entry[1].'</b>';
			if($entry[2])	$output .= '<br/>'.$entry[2];
			$output .= '</div>
';
		}
		return $output;
	}
	
	public function get_url_from_params($base, $params){
		$url = '';
		foreach($params as $key=>$value){
			$url .= ($url==''?'':'&').$key.'='.urlencode(urldecode($value));
		}
		return $base.url;
	}
	
	private function retrieve_page_curl($url){
		$curl = curl_init();
		curl_setopt($ch, CURLOPT_HEADER, 0); 
		if($this->referer)	curl_setopt ($curl, CURLOPT_REFERER, $this->referer);
		curl_setopt ($curl, CURLOPT_URL, $url);
		curl_setopt ($curl, CURLOPT_TIMEOUT, 15);
		curl_setopt ($curl, CURLOPT_CONNECTTIMEOUT, 10);
		curl_setopt ($curl, CURLOPT_FOLLOWLOCATION, 1);
		curl_setopt ($curl, CURLOPT_FORBID_REUSE, 1);
		curl_setopt ($curl, CURLOPT_RETURNTRANSFER, 1);
		$html = curl_exec ($curl);
		curl_close ($curl);
		if(!$html)	$this->doDebug(true,'Could not fetch page.', 'Could not fetch page ('.$url.') using cURL', __FILE__,__LINE__);
		return $html;
	}
	
	public function retrieve_page_get($url){
		$res = file_get_contents($url);
		if(!$res)	$this->doDebug(true,'Could not fetch page.', 'Could not fetch page ('.$url.') using file_get_contents', __FILE__,__LINE__);
		return $res;
	}
	
	public function retrieve_page($url){
		if($this->preferential_get_method == 'get'){
			$res = $this->retrieve_page_get($url);
			if($res)	return $res;
		}
		if(function_exists('curl_init')){
			$res = $this->retrieve_page_curl($url);
			if($res)	return $res;
		}else{
			$this->doDebug(true,'Could not fetch page.', 'Could not fetch page ('.$url.'). cURL functions not found', __FILE__,__LINE__);
			if(!isset($get))	return $this->retrieve_page_get($url);
		}
		return false;
	}
	
	public function manage_return($itemlist, $return_type){
		if($return_type == 'xml'){
			header('Content-Type: application/xml');
			echo '<root>';
			foreach($itemlist as $item){
				echo '
					<item>';
				foreach($item as $field=>$value)	echo '<field name="'.$field.'"><![CDATA['.$value.']]></field>
		';
				echo '
					</item>';
			}
			echo '
		</root>';
		}elseif($return_type == 'display'){
			if( $debug ){
				echo '<pre>';
				print_r($itemlist);
				echo '</pre>';
			}	
		}else{
			return $itemlist;
		}		
	}
	
	public function setQueryParam($value){
		return false;
	}
	
	public function standardize($array){
		return $array();
	}
	
}
Return current item: Search results retriever