<?php
/**
* This class use soundex algorithm to create a regular expressions
* based in a givem word this can perform a soundex comparisom on large texts
*/
class SoundRegex{
private $_string = "";
private $_word = "";
private $_patterns = array();
private $_pattern = "";
private $_matches = array();
private $_tolerance = 50;
/**
* @param string $word
* @param string $string
*/
public function __construct($word, $string){
$this->_word = $word;
$this->_string = $string;
$this->prepare();
}
/**
* Set tolerance for word's matches
* @param float $tolerance
*/
public function setTolerance($tolerance){
$this->_tolerance = $tolerance;
}
/**
* Load the patterns that will used in algorithm
*/
private function prepare(){
$this->_patterns[0] = array("B","P","F","V","b","p","f","v");
$this->_patterns[1] = array("C","G","J","K","Q","S","X","Z","c","g","j","k","q","s","x","z","�","�");
$this->_patterns[2] = array("D","T","d","t");
$this->_patterns[3] = array("L","l");
$this->_patterns[4] = array("M","N","m","n");
$this->_patterns[5] = array("R","r");
$this->_patterns[6] = array("A","Ã","Ã","Ã","Ã","a","â","à ","á","ã","ä","Ã",
"E","Ã","Ã","Ã","Ã","e","ê","ë","é","è",
"I","Ã","Ã","Ã","Ã","i","î","ï","Ã","ì",
"O","Ã","Ã","Ã","Ã","Ã","o","ô","ö","ó","ò","ö",
"U","Ã","Ã","Ã","Ã","u","û","û","ü","ú","ù",
"Y","W","H","y","w","h");
}
/**
* Returns ER criated by algorithm
* @return ER string
*/
public function getPattern(){
return $this->_pattern;
}
private function execute(&$matches = array()){
if(!$this->_word || !$this->_string){
return 0;
}
$this->_word = trim($this->_word);
$this->_pattern = "";
for($i=0; $i < strlen($this->_word); $i++){
$letter = $this->_word{$i};
if($i == 0){
$this->_pattern.= "/".$letter;
}
else{
if($letter != $this->_word[$i-1]){
if(in_array($letter, $this->_patterns[0])){
$this->_pattern.= "[".implode("",$this->_patterns[0])."]{1,}";
}
elseif(in_array($letter, $this->_patterns[1])){
$this->_pattern.= "[".implode("",$this->_patterns[1])."]{1,}";
}
elseif(in_array($letter, $this->_patterns[2])){
$this->_pattern.= "[".implode("",$this->_patterns[2])."]{1,}";
}
elseif(in_array($letter, $this->_patterns[3])){
$this->_pattern.= "[".implode("",$this->_patterns[3])."]{1,}";
}
elseif(in_array($letter, $this->_patterns[4])){
$this->_pattern.= "[".implode("",$this->_patterns[4])."]{1,}";
}
elseif(in_array($letter, $this->_patterns[5])){
$this->_pattern.= "[".implode("",$this->_patterns[5])."]{1,}";
}
elseif(in_array($letter, $this->_patterns[6])){
$this->_pattern.= "[".implode("",$this->_patterns[6])."]*";
}
else{
$this->_pattern.= ".?";
}
}
}
}
$this->_pattern.= "/i";
return preg_match_all($this->_pattern, $this->_string, $matches);
}
/**
* Get the matches found by algorithm
* @return mixed array of matches or false
*/
public function getMatches(){
$matches = array();
if(empty($this->_matches)){
if($this->execute($matches)){
for($i=0;$i<count($matches);$i++){
$matches[$i] = array_unique($matches[$i]);
}
$this->_matches = $matches;
return $this->_matches;
}
else{
return false;
}
}
else{
return $this->_matches;
}
}
/**
* The similarity percent is the index of array
* @return array matches ordered for percent similarity
*/
public function getMatchesSimilarity(){
$matches = $this->getMatches();
$orderby = array();
for($i=0;$i<count($matches);$i++){
foreach($matches[$i] as $key => $match){
$percent = 0;
similar_text($this->_word, $match, $percent);
if($percent >= $this->_tolerance){
$orderby[$percent] = $match;
}
}
}
krsort($orderby);
return $orderby;
}
/**
* @return string the text analized by algorithm, but with the matches in bold tags
*/
public function getString(){
$string = $this->_string;
$matches = $this->_matches;
for($i=0;$i<count($matches);$i++){
foreach($matches[$i] as $match){
$percent = 0;
similar_text($this->_word, $match, $percent);
if($percent >= $this->_tolerance){
$string = str_replace($match,"<b>".$match."</b>",$string);
}
}
}
return $string;
}
}
?>