Location: PHPKode > scripts > Search results retriever > class.pubmedRetriever.php
<?php
/*
* Copyright (C) 2010 Pierre-Luc Germain (plger)
* See class.httpRetriever.php for more information.
*/

class pubmedRetriever extends httpRetriever {
	
	public $allowed_params = array(	'pubmedfilters', 'report', 'term', 'dispmax', 'retmax', 'retstart');
	public $wanted_fields = array(	'affiliation' => 'Affiliation',
								'title' => 'ArticleTitle',
								'year' => 'Year',
								'coverage' => 'MedlinePgn',
								'pmid' => 'PMID'
							);

	public function help(){
		echo '<pre>Sample usage:
$retriever = createHttpRequest("pubmed");
$retriever->setParam("term", "stem cells");
$retriever->setParam("dispmax", 50);
$result_array = $retriever->fetch_results("array");
$result_count = $retriever->get_result_count();

Mass usage:
$retriever = createHttpRequest("pubmed");
$query_info = $retriever->prepare_mass($searchTerm);
$result_array = $retriever->fetch_mass($query_info["query_key"], $query_info["WebEnv"], 0, 20);
</pre>';
	}
	
	public function setQueryParam($value){
		$this->params['query'] = $value;
	}
	
	public function standardize($array){
		$itemlist = array();
		foreach($array as $record){
			$newitem = array(	'title'=>$record['title'],
								'author'=>$record['author'],
								'date'=>$record['pubdate'],
								'year'=>$record['year'],
								'number'=>$record['pmid'],
								'link'=>$record['link']
							);
			array_push($itemlist, $newitem);
		}
		return $itemlist;
	}		

	public function prepare_mass($term){
		$url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&usehistory=y&term='.urlencode(urldecode($term));
		$content = $this->retrieve_page($url);	
		if(!$content)	return false;
		
		$xml = new DOMDocument();
		if(!($xml->loadXML($content))){
			$this->doDebug(true,'Could not parse xml.', false, __FILE__,__LINE__);
			return false;			
		}

		$return = array();
		$return['query_key'] = $this->nodeValue($xml->getElementsByTagName('QueryKey'));
		$return['WebEnv'] = $this->nodeValue($xml->getElementsByTagName('WebEnv'));
		if(!$return['WebEnv'])	return false;
		return $return;
	}

	public function fetch_mass($query_key, $web_env, $retstart=0, $retmax=50){
		
		$parameters = array( 'db'=>'pubmed', 'retmax' => $retmax, 'retstart' => $retstart, 'query_key'=>$query_key, 'WebEnv'=>$web_env, 'retmode'=>'xml' );
		$url = "";
		foreach($parameters as $key=>$value)	$url .= ($url==''?'':'&').$key."=".urlencode(urldecode($value));	
		$url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?".$url;
		$content = $this->retrieve_page($url);	
		if(!$content)	return false;
		
		$xml = new DOMDocument();
		if(!($xml->loadXML($content))){
			$this->doDebug(true,'Could not parse xml.', false, __FILE__,__LINE__);
			return false;			
		}

		$records = $xml->getElementsByTagName('PubmedArticle');		
	
		$itemlist = array();
		foreach( $records as $record ){
			$item = $this->cleanRecord($record);
			array_push($itemlist, $item);
		}		
	
		return $itemlist;
		
	}

	public function fetch_results($return='array'){

		$parameters = array( 'pubmedfilters'=>'true', 'retmax' => '20', 'report'=>'xml', 'term'=>'' );
		
		$parameters = array_merge($parameters, $this->params);
		$url = "";
		foreach($parameters as $key=>$value)	$url .= ($url==''?'':'&').$key."=".urlencode(urldecode($value));
		$url = "http://www.ncbi.nlm.nih.gov/pubmed?".$url;

		$content = $this->retrieve_page($url);	
		if(!$content)	return false;
		$content = str_replace(array('&lt;','&gt;'),array('<','>'),$content);
		
		$xml = new DOMDocument();
		if(!($xml->loadXML($content))){
			$this->doDebug(true,'Could not parse xml.', false, __FILE__,__LINE__);
			return false;			
		}

		$records = $xml->getElementsByTagName('PubmedArticle');
		$itemlist = array();
		$wanted_fields = $this->wanted_fields;
		foreach( $records as $record ){
			$item = $this->cleanRecord($record);
			array_push($itemlist, $item);
		}
		
		return $this->manage_return($itemlist, $return);
	
	}
	
	private function cleanRecord($record){
		$item = array('publisher'=>'', 'authors'=>array());
		$authors = $record->getElementsByTagName('Author');
		foreach($authors as $author){
			$part = $author->getElementsByTagName('ForeName');
			$autname = $this->nodeValue($part);
			$part = $author->getElementsByTagName('LastName');
			$autname .= ' '.$this->nodeValue($part);
			array_push($item['authors'], $autname);
		}
		foreach($this->wanted_fields as $key=>$var){
			$try = $this->nodeValue($record->getElementsByTagName($var));
			if($try)	$item[$key] = $try;
		}
		if(isset($item['pmid']))	$item['link'] = 'http://www.ncbi.nlm.nih.gov/pubmed/'.$item['pmid'];
		$try = false;
		$pubdate_attempts = array('PubDate','PubMedPubDate','ArticleDate');
		$i = 0;
		while(!$try && isset($pubdate_attempts[$i])){
			$try = $this->pubmed_try_date($record->getElementsByTagName($pubdate_attempts[$i]));
			$i++;
		}
		if($try)	$item['pubdate'] = $try;

		$journal = $record->getElementsByTagName('Journal');
		$children = $journal->item(0)->getElementsByTagName('*');
		$j = array();
		foreach($children as $node){
			if(in_array($node->nodeName, array('Title','Volume','Issue','ISOAbbreviation'))){
				$j[$node->nodeName] = $node->nodeValue;
			}
		}
		if(isset($j['Title'])){
			$item['relation'] = $j['Title'];
			if(isset($j['Volume']))	$item['relation'] .= ', Vol '.$j['Volume'];
			if(isset($j['Issue']))	$item['relation'] .= ', '.$j['Issue'];
		}else{
			$item['relation'] = '';
		}
		if(count($item['authors']) > 4){
			$item['author'] = $item['authors'][0].', '.$item['authors'][1].' et al.';
		}else{
			$item['author'] = implode(', ',$item['authors']);
		}
		return $item;
	}

	private function nodeValue($node){
		if($node && count($node)>0 && ($tmp = $node->item(0)) && ($tmp = $tmp->nodeValue)){
			return $tmp;
		}
		return false;
	}

	private function pubmed_try_date($pubdate){
		if(!$pubdate)	return false;
		$thepubdate = $pubdate->item(0);
		if(!$thepubdate)	return false;
		$day = $thepubdate->getElementsByTagName('Day');
		$day = $this->nodeValue($day);
		$month = $thepubdate->getElementsByTagName('Month');
		$month = $this->nodeValue($month);
		$year = $thepubdate->getElementsByTagName('Year');
		$year = $this->nodeValue($year);
		$mtest = (int) $month;
		if($mtest != $month){
			$try = strtotime($day.' '.$month.' '.$year);
		}else{
			$try = strtotime($year.'-'.$month.'-'.$day);
		}
		return ($try?date('Y-m-d',$try):false);
	}

	public function get_result_count(){
		if($this->resultCount)	return $this->resultCount;
		$term = isset($this->params['term'])?urlencode(urldecode($this->params['term'])):false;
		if(!$term)	return false;
		$url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi?term=".$term;
		
		$content = $this->retrieve_page($url);	
		if(!$content)	return false;

		$xml = new DOMDocument();
		if(!($xml->loadXML($content))){
			$this->doDebug(true,'Could not parse xml.', $url, __FILE__,__LINE__);
			return false;			
		}
	
		$records = $xml->getElementsByTagName('ResultItem');
		foreach( $records as $record ){
			$type = $record->getElementsByTagName('DbName');
			if($type && count($type) > 0 && $type->item(0)->nodeValue == 'pubmed'){
				$count = $record->getElementsByTagName('Count');
				if($count && count($count) > 0){
					$this->resultCount = $count->item(0)->nodeValue;
					return $this->resultCount;
				}
			}	
		}
		return false;
	}

}
Return current item: Search results retriever