Location: PHPKode > projects > crVCL PHP Framework > xdxf.lib.php
<?php
/* 
The contents of this file are subject to the Mozilla Public License
Version 1.1 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/MPL-1.1.html or see MPL-1.1.txt in directory "license"

Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See the License for
the specific language governing rights and limitations under the License.

The Initial Developers of the Original Code are: 
Copyright (c) 2003-2012, CR-Solutions (http://www.cr-solutions.net), Ricardo Cescon
All Rights Reserved.

Contributor(s): Ricardo Cescon

crVCL PHP Framework Version 2.4
*/
############################################################
if(!defined("XDXF_LIB")){
	define ("XDXF_LIB", 1);
############################################################
//-------------------------------------------------------------------------------------------------------------------------------------

	
/**
 * Parser for XML Dictionary Exchange Format, for more see http://xdxf.revdanica.com<br><br>To download dictionaries see the project website or visit http://xdxf.revdanica.com/down/index.php or http://www.dicts.info/uddl.php for the latest files.<br>By obtaining, using and/or copying data of any XDXF file, you agree that you have read, understood, and will comply with the terms and conditions of this XDXF file.
 */	

class XDXF_Parser{	
	private $m_xdxf_path = null;	
	private $m_buf = null;
	private $m_cache = false;
	private $m_cache_buf = array();
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    *  
    * @param string $path_to_the_xdxf_files
    * @param bool $use_cache
    */	
   function __construct($path_to_the_xdxf_files, $use_cache=false){
   	$this->m_xdxf_path = fixpath($path_to_the_xdxf_files);
   	$this->m_cache = $use_cache;
   }
//-------------------------------------------------------------------------------------------------------------------------------------
   function __destruct(){
   	$this->m_cache_buf = NULL;
   	$this->m_buf = NULL;
   	unset($this->m_cache_buf);
   	unset($this->m_buf);
   	gc_collect_cycles_overX($GLOBALS['CRVCL']['GC_COLLECT_CYCLES_PERCENT']); 
   }
//-------------------------------------------------------------------------------------------------------------------------------------
   private function open($file){
   	$xfile = str_replace(' ', '_', $file);
   	if($this->m_cache && isset($this->m_cache_buf[$xfile])){
   		$this->m_buf = &$this->m_cache_buf[$xfile]; 
   		return;
   	}
   	
   	if(!is_file($this->m_xdxf_path.'/'.$file)){   	    
   	   throw new Exception('Error open XDXF file '.$file.' in '.$this->m_xdxf_path);   	      	      	  
   	}
   	
   	$cached = false;
   	if($this->m_cache){
   		$max_available_mem = str2byteInt(ini_get('memory_limit'));
   		$current_used_mem = memory_get_usage(true);
         $fsize = filesize($this->m_xdxf_path.'/'.$file); 
 
         if(($max_available_mem - $current_used_mem)/2 > $fsize){ 
         	$this->m_cache_buf[$xfile] = file_get_contents($this->m_xdxf_path.'/'.$file);
         	$this->m_buf = &$this->m_cache_buf[$xfile];
         }
   	}
   	 
   	if(!$cached){ 
   	   $this->m_buf = file_get_contents($this->m_xdxf_path.'/'.$file);
   	}   
   	   	   	
   	if($this->m_buf === false){
   		$this->m_buf = null;
   		throw new Exception('Error read XDXF file '.$file.' in '.$this->m_xdxf_path); 
   	}
   }   
//-------------------------------------------------------------------------------------------------------------------------------------
   /**
    * return the definitions of a word or of possible similar words, false or empty array if not found
    * 
    * @param string $xdxf_file
    * @param string $word
    * @param bool $exact
    * @param bool $reverse
    * @param bool $case_sesitive
    * @return array
    */
   function search($xdxf_file, $word, $exact=true, $reverse=false, $case_sesitive=false){
   	if(empty($word) || strlen($word) < 3) return false; 
   	$word = trim($word);
   	$word = utf8_fix($word);
   	 
   	$ret = array();
   	
   	$this->open($xdxf_file);
   	$buf = &$this->m_buf;   	   	
   	
   	$matches = array();
   	$flag = PREG_SET_ORDER;
   	   	
   	$found = @preg_match_all("#(<ar[^>]*>(?:(?!</ar>).)*".preg_quote($word, '#')."(?:(?!</ar>).)*</ar>)#siu", $buf, $matches, $flag, 0);
   	   	
   	if(!$found) return false;
   	   	 	
   	
   	$subxml = '';
   	for($m = 0; $m < $found; $m++){   		
   		if(isset($matches[$m][0])){
   			$subxml .= $matches[$m][0].CRLF;
   		}
   	}
   	   	   	
   	 
   	if($reverse){
   		$xml_ar = new xmlHelper();
   		$xml_ar->setXML($subxml);
   		
   		$ar_pos = 1;
   		while(true){
   			$ar = $xml_ar->parseTag('ar', $ar_pos);
   			
   			if($ar === false){break;}
   			   			
   			
   			$xml = new xmlHelper();
   			$xml->setXML($ar);
   			
   			$def_pos = 1;
   			$tmp = '';
   			$def = $xml->parseTag('def', $def_pos);
   			if($def !== false){
	   			while($def !== false){   				  				   				
	   				$tmp .= ', '.$def;
	   				$def_pos++;
	   				$def = $xml->parseTag('def', $def_pos);
	   			}
	   			$def = trim(substr($tmp,1));
   			}else{
   				$def = $ar;
   	   	   $def = str_ireplace('</ar>', '', $def);
   	   	   $def = strrcut($def, '>', true);
   	   	   $def = trim($def); 
   	   	   $def = str_replace("\r", '', $def); 
   	   	   $def = str_replace("\n", ', ', $def);
   			}	

   			$k = $xml->parseTag('k', 1);   			
   			 
   			$xml_ar->changeTags('ar', '<k>'.$def.'</k>'.LF.$k, $ar_pos);
   			
   			$ar_pos++;   		 	 
   		}
   		$subxml = $xml_ar->getXML(true);
   	} 
   	
   	 
   	$xml_ar = new xmlHelper();
   	$xml_ar->setXML($subxml);
   	   	   	
   	$ar_pos = 1;
   	while(true){   		
   	   $ar = $xml_ar->parseTag('ar', $ar_pos);
   	   
   	   if($ar === false){break;}   
   	   
   	   $xml = new xmlHelper();
   	   $xml->setXML($ar);
   	   $k = $xml->parseTag('k', 1);
   	   
   	   if($k === false){$ar_pos++; continue;}
   	   
   	   $k_tmp = preg_split("/[,;]+/", $k);
   	      	      	#echo BR;   // rem
   	   $k_pos = '';
   	   $k_ex = '';
   	   for($i = 0; $i < acount($k_tmp); $i++){
   	   	$k = $k_tmp[$i];
	   	   
	   	   $p = strpos($k, '(');
	   	   if($p !== false){
	   	   	$k_ex = strrcut($k, '(');
	   	   	$k = strcut($k, '(');
	   	   }
	   	   
	   	   $p = strpos($k, '{'); 
	   	   if($p !== false){
	   	   	$k_pos = strrcut($k, '{');
	   	   	$k = strcut($k, '{');
	   	   }
	   	   
	   	   $p = strpos($k, '[');
	   	   if($p !== false){
	   	   	$dummy = strrcut($k, '[');
	   	   	$k = strcut($k, '[');
	   	   }
	   	   
	   	   #$k = str_replace('...','', $k);
	   	   
	   	   $k = preg_replace('/[\[\]\{\}\[\]\.\(\)\"\']+/', '', $k);
	   	   
	   	   $k = str_replace('[','', $k);
	   	   $k = str_replace('','', $k);
	   	   $k = trim($k);
	   	   $k_pos = trim($k_pos);
	   	   $k_ex = trim($k_ex);
	   	   
            if($case_sesitive){	   	   	   	   
		   	   if($exact && $word != $k){
		   	   	if($i == acount($k_tmp)-1){
		   	   	   $ar_pos++; continue 2;
		   	   	}else{continue;}   
		   	   }else if(strpos($k, $word) === false){
		   	   	if($i == acount($k_tmp)-1){  
		   	   		$ar_pos++; continue 2;
		    	   	}else{continue;}
		   	   } 
            }else{  
            	if($exact && strtolower($word) != strtolower($k)){            		
            		if($i == acount($k_tmp)-1){ 
            			$ar_pos++; continue 2;
            		}else{continue;}
            	}else if(stripos($k, $word) === false){
            		if($i == acount($k_tmp)-1){
            			$ar_pos++; continue 2;
            		}else{continue;}
            	}
            }    
	   	   break;  	   	      	   
   	   }
   	   
   	   
   	   $def = $xml->parseTag('def', 1);
   	   if($def === false){
   	   	$def = $ar;
   	   	$def = str_ireplace('</ar>', '', $def);
   	   	$def = strrcut($def, '>', true); 
   	   }
   	   $def = trim($def); 
   	   
   	    
   	   $def = str_replace(LF.LF, LF, $def);
   	   $tmp = preg_split("/[\n\r,;]+/", $def);
   	   
   	   $def = array();
   	   for($t = 0; $t < acount($tmp); $t++){   	   	
   	   	$pos = '';
   	   	$ex = '';   	   	  	   	  
   	   	
   	   	$p = strpos($tmp[$t], '(');
   	   	if($p !== false){
   	   		$ex = strrcut($tmp[$t], '(');
   	   		$tmp[$t] = strcut($tmp[$t], '(');
   	   	}
   	   	
   	   	$p = strpos($tmp[$t], '{');
   	   	if($p !== false){
   	   		$pos = strrcut($tmp[$t], '{');
   	   		$tmp[$t] = strcut($tmp[$t], '{');
   	   	}
   	   	
   	   	$p = strpos($tmp[$t], '[');
   	   	if($p !== false){
   	   		$dummy = strrcut($tmp[$t], '[');
   	   		$tmp[$t] = strcut($tmp[$t], '['); 
   	   	}
   	   	  
   	   	#$tmp[$t] = str_replace('"','',$tmp[$t]);
   	   	$tmp[$t] = preg_replace('/[\[\]\{\}\[\]\.\(\)\"\']+/', '', $tmp[$t]);
   	   	 
   	   	$a_def = array('def'=>trim($tmp[$t]));
   	   	if($pos != '')$a_def['pos'] = trim($pos);
   	   	if($ex != '')$a_def['ex'] = trim($ex);
   	   	$def[] = $a_def;   	   	    	   	   	   	    	   	   	   	
   	   }
   	   $kx = str_replace(' ','_',$k);
   	   if(!$case_sesitive){
   	   	$kx = strtolower($kx);
   	   }
   	   if(isset($kx)){   	   	
   	   	if(isset($ret[$kx]['def'])){
   	   		$def = array_merge($ret[$kx]['def'], $def);
   	   	}
   	   	$ret[$kx] = array('word'=>$k, 'pos'=>$k_pos, 'ex'=>$k_ex, 'def'=>$def);
   	   }else{
   	      $ret[$kx] = array('word'=>$k, 'pos'=>$k_pos, 'ex'=>$k_ex, 'def'=>$def);
   	   }   
   	   
   	   $ar_pos++;   	   
      }      
        	
      return $ret;   	
   }
//-------------------------------------------------------------------------------------------------------------------------------------   
   /**
    * translate a list of words seperated by comma or semicolon on dependent relationship 
    * 
    * @param string $xdxf_file
    * @param string $words
    * @param string $xdxf_file_reverse 
    * @param bool $case_sesitive
    * @param bool $keepOriginal if untranslated, otherweise return as empty
    * @param int $max_levenshtein_words handle strictness vs. performance
    * @return array
    */
   function translate($xdxf_file, $words, $xdxf_file_reverse='', $case_sesitive=false, $keepOriginal=true, $max_levenshtein_words=10){
   	if(empty($words)) return false;
   	$words = str_replace(';', ',' , $words);
   	$wl = explode(',', $words);
   	
   	$levenshtein = '';
   	$definitions = array();
   	for($w = 0; $w < acount($wl); $w++){
   		$wl[$w] = trim($wl[$w]);
   		
   		$defs = $this->search($xdxf_file, $wl[$w], true, false, $case_sesitive);
   		   		 
   		// reverse translating if not found as sample in en => de   try   de => en
   		if(!empty($xdxf_file_reverse) && empty($defs)){  
	   		$defs = $this->search($xdxf_file_reverse, $wl[$w], true, true, $case_sesitive);	   			
   		} 	
   		/////////

   		$wlx = str_replace(' ','_',$wl[$w]);
   		if(!$case_sesitive){
   			$wlx = strtolower($wlx);
   		}
   		
   		if($defs === false || !isset($defs[$wlx]['def'])){ // not possible to translate, than leave the word in original language
   			
	   		if($keepOriginal){
	   			$definitions[$wlx] = $wl[$w];
	   		}else{
	   			$definitions[$wlx] = '';	   			
	   		}
	   		
   		}else{  
   			$defs = $defs[$wlx]['def'];
   			
   			for($d = 0; $d < acount($defs); $d++){
   				 
  					$definitions[$wlx] = $defs[$d]['def'];
  					
  					if($d < $max_levenshtein_words){
  					   $levenshtein .= $definitions[$wlx].'; ';
  					
	  					if(isset($defs[$d]['ex'])){
	  						$levenshtein .= $defs[$d]['ex'].'; ';
	  					}
  					}	
   	
  					// debug only
  					#echo $wl[$w] . '=>' . $defs[$d]['def'].BR;   	
   			}
   	
   		}
   	} // for
   	
   	$c_levenshtein = substr_count($levenshtein, ';');
   	
   	// debug only
   	#print_d($definitions);
   	#echo $levenshtein;
   	
   	$translation = array();
   	reset($definitions);
   	while(list($word, $defs) = each($definitions)){
   		// debug only
   		#echo HR.$word.BR;
   	
   		$defs = explode(';', $defs);
	   	if(acount($defs) == 1){
	   		$translation[str_replace(' ', '_', $word)] = trim($defs[0]);
	   	}else{
	   		$shortest = 9999999;
	   		$closest = '';
	   		for($d = 0; $d < acount($defs); $d++){
	   	
	   			$lev = levenshteinEx($levenshtein, $defs[$d]);
	   	
	   			// debug only
	   			#echo $defs[$d].' '.$lev .BR;
	   	
	   			if($lev == 0){
	   				$closest = $defs[$d];
	   				$shortest = 0;
	   				break;
	   			}
	   	
	   			if($lev <= $shortest || $shortest < 0){
	   				$closest = $defs[$d];
	   				$shortest = $lev;
	   			}
	   			if($c_levenshtein > 5)
	   				if(acount($defs) < 4)break;
	   			else
	   				if(acount($defs) < 3)break;
	   		}
	   		$translation[str_replace(' ', '_', $word)] = trim($closest);
	   	}
   	
   	}
   	return $translation;   	
   }
//-------------------------------------------------------------------------------------------------------------------------------------
}

############################################################
}
############################################################
?>
Return current item: crVCL PHP Framework