Location: PHPKode > projects > crVCL PHP Framework > crvcl/dict.lib.php
<?php
/* 
The contents of this file are subject to the Mozilla Public License
Version 1.1 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/MPL-1.1.html or see MPL-1.1.txt in directory "license"

Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See the License for
the specific language governing rights and limitations under the License.

The Initial Developers of the Original Code are: 
Copyright (c) 2003-2011, CR-Solutions (http://www.cr-solutions.net), Ricardo Cescon
All Rights Reserved.

Contributor(s): Ricardo Cescon

crVCL PHP Framework Version 2.3
*/
############################################################
if(!defined("DICT_LIB")){
	define ("DICT_LIB", 1);
############################################################
//-------------------------------------------------------------------------------------------------------------------------------------
/**
 * Dictionary Server Protocol (DICT) - RFC 2229, for more see http://www.dict.org<br><br>By obtaining, using and/or copying data of any DICT database, you agree that you have read, understood, and will comply with the terms and conditions of this database. To get the information of a database use the method "show"<br><br>recommended server "dict.uni-leipzig.de"
 */	
class dictBase{
	private $m_host = null;
	private $m_port = null;
	private $m_max_length = 6144;  // 1024 * 6 to cover UTF-8 chars
	private $m_socket;

	private $m_valid_codes =	array(110, 111, 112, 113, 114, 130, 150, 151, 152, 
			                     210, 220, 221, 230, 250,
			                     330,
			                     420, 421,
                      			500, 501, 502, 503, 530, 531, 532,
			                     550, 551, 552, 554, 555
                     		);

	private $m_error = false;
	private $m_client_info = "";
	
	/**
	 * connection timeout
	 * 
	 * @var int
	 */
	public $m_socket_timeout_s = 5;
	/**
	 * contain the server info after successfull connection
	 * 
	 * @var string
	 */	
	public $m_server_info = "";
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * client info should contain information about your company/service as sample the name and website
    * 
    * @param string $client_info
    */
   function __construct($client_info){
   	$this->m_client_info;
   }
//-------------------------------------------------------------------------------------------------------------------------------------
   function __destruct(){
   	$this->close();
   }
//-------------------------------------------------------------------------------------------------------------------------------------
   private function read($is_star=false){
   	$ret = '';
   	socket_set_timeout($this->m_socket, $this->m_socket_timeout_s, 0);
   	
   	
   	while($buf = fgets($this->m_socket, $this->m_max_length)){   		
   		
   		if($buf === false){
   			break;
   		}   		
   		if(!$is_star && preg_match("/^\.\r\n$/", $buf)){
   			break;
   	   }
   	   
   	   // debug only
   	   #echo $buf;
   		
   	   $ret .= $buf;
   		
   		if(stripos($ret, ".\r\n250 ok") !== false || stripos($ret, "\r\n552 no match") !== false 
   				|| stripos($ret, "\r\n501 syntax error, illegal parameters") !== false || stripos($ret, "\r\n550 invalid database") !== false){
   			break;
   		}
   		   		
   	}  	
   	return $ret;
   }
//-------------------------------------------------------------------------------------------------------------------------------------
   private function parse_msg($buf){
   	$msg_list = array();
   	$buf = explode("\r\n", $buf);   	
   	
   	for($l = 0; $l < acount($buf); $l++){
   		$code = left($buf[$l], 3); 
   		
   		if(empty($buf[$l])){continue;}
   		
   		if(!in_array($code, $this->m_valid_codes)){
   			$this->m_error = array("errno"=>-99, "errmsg"=>"not valid return code/msg from server (".$code.")");
   			return false;
   		}
   		
   		if($code >= 300){
   			$this->m_error = array("errno"=>$code, "errmsg"=>substr($buf[$l],4));
   			return false;
   		}
   		
   		$msg = substr($buf[$l],4);  		
   		
   		
   		//check following lines   	
   		while(isset($buf[$l+1])){
   		   $tmp = left($buf[$l+1], 3);   		   
   		   if(!is_numeric($tmp) || ($l > 0 && !in_array($tmp, $this->m_valid_codes))){
   		   	
   		   	$l++;
   		   	
   		   	if(!empty($buf[$l])){
	   		   	if(is_array($msg)){   		   		
	   		   		$msg[] = $buf[$l];
	   		   	}else{
	   		   		$msg = array($msg);
	   		   		$msg[] = $buf[$l];
	   		   	}
   		   	}	
   		   	
   		   	  		
   		   }else{break;}
   		}   
   		   		
   		$msg_list[] = array("code"=>$code, "msg"=>$msg);
   	}
   	return $msg_list;
   }
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * return an array with errno and errmsg
    
    * @return array
    */   
   function error(){
   	return $this->m_error;
   }
//-------------------------------------------------------------------------------------------------------------------------------------   
   /**
    * connect to a dictionary server
    * 
    * @param string $host
    * @param int $port
    * @return bool
    */   
	function connect($host='dict.org', $port=2628){
		$this->m_host = $host;
		$this->m_port = $port;
		
		$errno = 0;
		$errstr = '';
		
		$fp = fsockopen($this->m_host, $this->m_port, $errno, $errstr, $this->m_socket_timeout_s);
		if(!$fp){
			 $this->m_error = array("errno"=>$errno, "errmsg"=>$errstr);
			 return false;
		}
		
		$this->m_socket = $fp;

		###
				
		fwrite($this->m_socket, "CLIENT ".$this->m_client_info." - crVCL DICT Client " . (defined('CRVCL_VERSION')?CRVCL_VERSION:'') . " (www.cr-solutions.net)\r\n");
		$tmp = fgets($this->m_socket, $this->m_max_length);
		
		$code = left($tmp, 3);
						
		if(!in_array($code, $this->m_valid_codes)){
			$this->m_error = array("errno"=>-99, "errmsg"=>"not valid return code/msg from server");
			$this->close();
			return false;
		}
		
		if($code >= 300){
			$this->m_error = array("errno"=>$code, "errmsg"=>substr($buf[$l],4));
			$this->close();
			return false;
		}
		
		$this->m_server_info = $msg = substr($tmp,4);
		
		return true;
	}
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * close the connection
    * @return bool
    */
	function close(){
		if($this->m_socket !== null && is_resource($this->m_socket)){		
		   fwrite($this->m_socket, "QUIT\r\n");
		   $tmp = fgets($this->m_socket, $this->m_max_length);
		   $ret = fclose($this->m_socket);
		   $this->m_socket = null;
		   return $ret;
		}   
	}	
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * reconnect to the last dictionary server
    * 
    * @return bool
    */
   function reconnect(){
   	$this->close();
   	return $this->connect($this->m_host, $this->m_port);   	
   }	
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * show available databases, search strategies, server information or database information
    * 
    * @param string $s DB, STRAT, SERVER or INFO [db]
    * @return array
    */	
	function show($s){	
		$this->reconnect();			
		fwrite($this->m_socket, "SHOW ".$s."\r\n");		
		$buf = $this->read();
		$msg = $this->parse_msg($buf);
				
		if($msg === false){
			$err = &$this->m_error;
			$err['errmsg'] .= ' ('.$s.')';
			$this->m_error = $err;
			return false;
		}
								
		return $msg;
	}
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * return the msg array as php processable  
    * 
    * @param array $msg
    * @return array
    */
   function parseShow($msg){
   	$ret = array();
   	
   	for($m = 0; $m < acount($msg); $m++){
   		$code = $msg[$m]['code'];
   		if($code == 110 || $code == 111){ // 110 => DB, 111 => STRAT
   			$list = $msg[$m]['msg'];
   			for($i = 1; $i < acount($list); $i++){
   				$line = $list[$i];
   				$pos = strpos($line, ' ');
   				$key = left($line, $pos);
   				$val = substr($line, $pos);
   				$ret[$key] = $val;
   			}	
   		}else if($code == 112 || $code == 114){
   				$ret = $msg[$m]['msg'];
   		}   		
   	}
   	
   	return $ret;
   }
//-------------------------------------------------------------------------------------------------------------------------------------  
   /**
    * return the definitions of a word<br>the Return is dependent from  dictionary
    * 
    * @param string $word
    * @param string $db
    * @return array
    */
   function define($word, $db='*'){   	
   	$this->reconnect();
   	
   	$word = utf8_fix($word);   	
   	
   	fwrite($this->m_socket, "DEFINE ".$db." ".$word."\r\n");
   	
   	$buf = $this->read(($db=='*'?true:false)); 
   	$msg = $this->parse_msg($buf);
   	
   	if($msg === false){
   		$err = &$this->m_error;
         $err['errmsg'] .= ' (w:'.$word.' d:'.$db.')';   		
   		$this->m_error = $err;
   		return false;
   	}
   	
   	return $msg;
   }
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * return the msg array as php processable
    * 
    * @param array $msg
    * @return array
    */
   function parseDefine($msg){
   	$ret = array();
   	
   	for($m = 0; $m < acount($msg); $m++){
   		$code = $msg[$m]['code'];
   		if($code == 151){
   			$ret[] = array();
   			$last_el = acount($ret)-1;
   			
   			$def = $msg[$m]['msg'];
   			for($i = 0; $i < acount($def); $i++){
   				$line = $def[$i];
   				
   				if($i == 0){   				   					 
   					$tmp = explode(' ', $line);
   					$ret[$last_el]['word'] = $tmp[0];
   					$ret[$last_el]['db'] = $tmp[1];
   					$ret[$last_el]['dbdesc'] = '';
   					
   					for($d = 2; $d < acount($tmp); $d++)
   					   $ret[$last_el]['dbdesc'] .= $tmp[$d].' ';

   					$ret[$last_el]['dbdesc'] = trim($ret[$last_el]['dbdesc']);
   					
   					
   				}else{
   				   $ret[$last_el]['definition'][] = $line;
   				}   
   			}
   		}
   	}
   	
   	return $ret;
   }   
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * searches an index for the dictionary, and reports words which were found using a particular strategy<br>the Return is dependent from  dictionary and strategy
    * 
    * @param string $word
    * @param string $db
    * @param string $strat
    * @return array
    */
   function match($word, $db='*', $strat='exact'){
   	$this->reconnect();
   	
   	$word = utf8_fix($word);
   	
   	fwrite($this->m_socket, "MATCH ".$db." ".$strat." ".$word."\r\n");
   	
   	$buf = $this->read(($db=='*'?true:false));
   	$msg = $this->parse_msg($buf);
   	
   	if($msg === false){
   		$err = &$this->m_error;
         $err['errmsg'] .= ' (w:'.$word.' d:'.$db.' s:'.$strat.')';   		
   		$this->m_error = $err;
   		return false;
   	}
   	
   	return $msg;
   }
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * return the msg array as php processable
    *
    * @param array $msg
    * @return array
    */
	function parseMatch($msg){
   	$ret = array();
   	
   	for($m = 0; $m < acount($msg); $m++){
   		$code = $msg[$m]['code'];
   		if($code == 152){
   			
   			$match = $msg[$m]['msg'];   			  			
   			
   			for($i = 1; $i < acount($match); $i++){
   				$line = $match[$i];
   				   				 				   					 
   				$tmp = explode(' ', $line);

   				if($tmp[0] != '.'){
   					if(isset($ret[$tmp[0]])){
   						if(!is_array($ret[$tmp[0]])) $ret[$tmp[0]] = array();
   						   						
   						$tmp2 = $tmp;
   						unset($tmp2[0]);
   						$ret[$tmp[0]][] = implode(' ', $tmp2);
   						
   						
   					}else{
   						$tmp2 = $tmp;
   				      unset($tmp2[0]);
   				      $ret[$tmp[0]] = implode(' ', $tmp2);   				      
   					}   
   				}      				   			
   			}
   		}
   	}
   	
   	return $ret;
   }
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * translate a list of words seperated by comma or semicolon on dependent relationship (require a translation dictionary like "eng-deu")
    * 
    * @param string $words
    * @param string $db
    * @return array
    */
   function translate($words, $db){
   	$words = str_replace(';', ',' , $words);   	
		$wl = explode(',', $words);
				
		$levenshtein = '';
		$definitions = array();
		for($w = 0; $w < acount($wl); $w++){
			$wl[$w] = trim($wl[$w]);
									
			$defs = $this->define($wl[$w], $db);
			
			if($defs === false){ // not possible to translate, than leave the word in original language
				$definitions[str_replace(' ', '_', $wl[$w])] = $wl[$w];
			}else{
			
				for($d = 0; $d < acount($defs); $d++){
					if(isset($defs[$d]['code']) && $defs[$d]['code'] == 151 && isset($defs[$d]['msg'][2])){
					   $definitions[str_replace(' ', '_', $wl[$w])] = $defs[$d]['msg'][2];				   				   				   
					   $levenshtein .= $definitions[str_replace(' ', '_', $wl[$w])].'; ';
					   			   				   
					   // debug only 
					   #echo $wl[$w] . '=>' . $defs[$d]['msg'][2].BR;
					}					
				}
							
			}	
		}	
		
		$c_levenshtein = substr_count($levenshtein, ';');
		
		// debug only
		#print_d($definitions);
		#echo $levenshtein;
				
		$translation = array();
		reset($definitions);
		while(list($word, $defs) = each($definitions)){
			// debug only
			#echo HR.$word.BR;
			
		   $defs = explode(';', $defs);
		   if(acount($defs) == 1){
		      $translation[str_replace(' ', '_', $word)] = trim($defs[0]);
		   }else{		
		   	$shortest = 9999999;		   	
		   	$closest = '';    			   	
		   	for($d = 0; $d < acount($defs); $d++){
		   		
		   		$lev = levenshteinEx($levenshtein, $defs[$d]);	   		
		   		
		   		// debug only
		   		#echo $defs[$d].' '.$lev .BR;

		   		if($lev == 0){		   		
		   			$closest = $defs[$d];
		   			$shortest = 0;		   		
		   			break;
		   		}
		   				   		
		   		if($lev <= $shortest || $shortest < 0){
		   			$closest = $defs[$d];
		   			$shortest = $lev;
		   		}
		   		if($c_levenshtein > 5)
		   		   if(acount($defs) < 4)break;
		   		else
		   			if(acount($defs) < 3)break; 
		   	}		   	
		   	$translation[str_replace(' ', '_', $word)] = trim($closest);		   	
		   }
		   		
		}
	   return $translation;	
   }
//-------------------------------------------------------------------------------------------------------------------------------------   
}

############################################################
}
############################################################
?>
Return current item: crVCL PHP Framework