<?php
/*
* Copyright (C) 2010 Pierre-Luc Germain (plger)
* See class.httpRetriever.php for more information.
*/
class pubmedRetriever extends httpRetriever {
public $allowed_params = array( 'pubmedfilters', 'report', 'term', 'dispmax', 'retmax', 'retstart');
public $wanted_fields = array( 'affiliation' => 'Affiliation',
'title' => 'ArticleTitle',
'year' => 'Year',
'coverage' => 'MedlinePgn',
'pmid' => 'PMID'
);
public function help(){
echo '<pre>Sample usage:
$retriever = createHttpRequest("pubmed");
$retriever->setParam("term", "stem cells");
$retriever->setParam("dispmax", 50);
$result_array = $retriever->fetch_results("array");
$result_count = $retriever->get_result_count();
Mass usage:
$retriever = createHttpRequest("pubmed");
$query_info = $retriever->prepare_mass($searchTerm);
$result_array = $retriever->fetch_mass($query_info["query_key"], $query_info["WebEnv"], 0, 20);
</pre>';
}
public function setQueryParam($value){
$this->params['query'] = $value;
}
public function standardize($array){
$itemlist = array();
foreach($array as $record){
$newitem = array( 'title'=>$record['title'],
'author'=>$record['author'],
'date'=>$record['pubdate'],
'year'=>$record['year'],
'number'=>$record['pmid'],
'link'=>$record['link']
);
array_push($itemlist, $newitem);
}
return $itemlist;
}
public function prepare_mass($term){
$url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&usehistory=y&term='.urlencode(urldecode($term));
$content = $this->retrieve_page($url);
if(!$content) return false;
$xml = new DOMDocument();
if(!($xml->loadXML($content))){
$this->doDebug(true,'Could not parse xml.', false, __FILE__,__LINE__);
return false;
}
$return = array();
$return['query_key'] = $this->nodeValue($xml->getElementsByTagName('QueryKey'));
$return['WebEnv'] = $this->nodeValue($xml->getElementsByTagName('WebEnv'));
if(!$return['WebEnv']) return false;
return $return;
}
public function fetch_mass($query_key, $web_env, $retstart=0, $retmax=50){
$parameters = array( 'db'=>'pubmed', 'retmax' => $retmax, 'retstart' => $retstart, 'query_key'=>$query_key, 'WebEnv'=>$web_env, 'retmode'=>'xml' );
$url = "";
foreach($parameters as $key=>$value) $url .= ($url==''?'':'&').$key."=".urlencode(urldecode($value));
$url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?".$url;
$content = $this->retrieve_page($url);
if(!$content) return false;
$xml = new DOMDocument();
if(!($xml->loadXML($content))){
$this->doDebug(true,'Could not parse xml.', false, __FILE__,__LINE__);
return false;
}
$records = $xml->getElementsByTagName('PubmedArticle');
$itemlist = array();
foreach( $records as $record ){
$item = $this->cleanRecord($record);
array_push($itemlist, $item);
}
return $itemlist;
}
public function fetch_results($return='array'){
$parameters = array( 'pubmedfilters'=>'true', 'retmax' => '20', 'report'=>'xml', 'term'=>'' );
$parameters = array_merge($parameters, $this->params);
$url = "";
foreach($parameters as $key=>$value) $url .= ($url==''?'':'&').$key."=".urlencode(urldecode($value));
$url = "http://www.ncbi.nlm.nih.gov/pubmed?".$url;
$content = $this->retrieve_page($url);
if(!$content) return false;
$content = str_replace(array('<','>'),array('<','>'),$content);
$xml = new DOMDocument();
if(!($xml->loadXML($content))){
$this->doDebug(true,'Could not parse xml.', false, __FILE__,__LINE__);
return false;
}
$records = $xml->getElementsByTagName('PubmedArticle');
$itemlist = array();
$wanted_fields = $this->wanted_fields;
foreach( $records as $record ){
$item = $this->cleanRecord($record);
array_push($itemlist, $item);
}
return $this->manage_return($itemlist, $return);
}
private function cleanRecord($record){
$item = array('publisher'=>'', 'authors'=>array());
$authors = $record->getElementsByTagName('Author');
foreach($authors as $author){
$part = $author->getElementsByTagName('ForeName');
$autname = $this->nodeValue($part);
$part = $author->getElementsByTagName('LastName');
$autname .= ' '.$this->nodeValue($part);
array_push($item['authors'], $autname);
}
foreach($this->wanted_fields as $key=>$var){
$try = $this->nodeValue($record->getElementsByTagName($var));
if($try) $item[$key] = $try;
}
if(isset($item['pmid'])) $item['link'] = 'http://www.ncbi.nlm.nih.gov/pubmed/'.$item['pmid'];
$try = false;
$pubdate_attempts = array('PubDate','PubMedPubDate','ArticleDate');
$i = 0;
while(!$try && isset($pubdate_attempts[$i])){
$try = $this->pubmed_try_date($record->getElementsByTagName($pubdate_attempts[$i]));
$i++;
}
if($try) $item['pubdate'] = $try;
$journal = $record->getElementsByTagName('Journal');
$children = $journal->item(0)->getElementsByTagName('*');
$j = array();
foreach($children as $node){
if(in_array($node->nodeName, array('Title','Volume','Issue','ISOAbbreviation'))){
$j[$node->nodeName] = $node->nodeValue;
}
}
if(isset($j['Title'])){
$item['relation'] = $j['Title'];
if(isset($j['Volume'])) $item['relation'] .= ', Vol '.$j['Volume'];
if(isset($j['Issue'])) $item['relation'] .= ', '.$j['Issue'];
}else{
$item['relation'] = '';
}
if(count($item['authors']) > 4){
$item['author'] = $item['authors'][0].', '.$item['authors'][1].' et al.';
}else{
$item['author'] = implode(', ',$item['authors']);
}
return $item;
}
private function nodeValue($node){
if($node && count($node)>0 && ($tmp = $node->item(0)) && ($tmp = $tmp->nodeValue)){
return $tmp;
}
return false;
}
private function pubmed_try_date($pubdate){
if(!$pubdate) return false;
$thepubdate = $pubdate->item(0);
if(!$thepubdate) return false;
$day = $thepubdate->getElementsByTagName('Day');
$day = $this->nodeValue($day);
$month = $thepubdate->getElementsByTagName('Month');
$month = $this->nodeValue($month);
$year = $thepubdate->getElementsByTagName('Year');
$year = $this->nodeValue($year);
$mtest = (int) $month;
if($mtest != $month){
$try = strtotime($day.' '.$month.' '.$year);
}else{
$try = strtotime($year.'-'.$month.'-'.$day);
}
return ($try?date('Y-m-d',$try):false);
}
public function get_result_count(){
if($this->resultCount) return $this->resultCount;
$term = isset($this->params['term'])?urlencode(urldecode($this->params['term'])):false;
if(!$term) return false;
$url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi?term=".$term;
$content = $this->retrieve_page($url);
if(!$content) return false;
$xml = new DOMDocument();
if(!($xml->loadXML($content))){
$this->doDebug(true,'Could not parse xml.', $url, __FILE__,__LINE__);
return false;
}
$records = $xml->getElementsByTagName('ResultItem');
foreach( $records as $record ){
$type = $record->getElementsByTagName('DbName');
if($type && count($type) > 0 && $type->item(0)->nodeValue == 'pubmed'){
$count = $record->getElementsByTagName('Count');
if($count && count($count) > 0){
$this->resultCount = $count->item(0)->nodeValue;
return $this->resultCount;
}
}
}
return false;
}
}