Location: PHPKode > scripts > Code Graph > parse_gen.php
<?php

// dot -o parse.jpg -Tjpg parsed.dot
/**
  * parse_gen.php
  *
  * code to create a node graph of a PHP source file
  * C. McKinnon 23rd June 2006
  * 
  */

/**
  * init
  */
error_reporting(E_NONE);
set_time_limit(600);

$options=array(
		'output'=>'vertical',	/* horizontal or vertical */
		'exclude_undefined'=>false,
		'exclude_builtins'=>true
		);

class dotwriter
{
	var $calls;
	var $fns;
	function dotwriter(&$calls,&$fns)
	{
		$this->calls=&$calls;
		$this->fns=$fns;
	}
	function write_readable()
	{
		$uncalled=$this->get_uncalled();
		foreach ($uncalled as $fname=>$dummy) {
			print $fname . " is not explicitly called\n";
		}
		foreach ($this->calls as $call) {
			list($calling, $called, $cond, $loop)=$call;
			print "$calling -> $called ";
			if ($cond) {
				print "conditionally ";
			}
			if ($loop) {
				print "in loop ";
			}
			if (!isset($this->fns[$called])) {
				print "external";
			}
			print "\n";
		}
	}
	function write_dot_file()
	{
		global $src_file,$options,$exclude_fns,$builtins;
		print "digraph \"" . basename($src_file) . "\" {\n";
		if (strtoupper(substr($options['output'],0,1))=='V') {
			print "rankdir=LR;\n";
		}
		print "fontsize=8;\n";
		$uncalled=$this->get_uncalled();
		print "node [ shape = polygon, sides = 4 ];\n";
		foreach ($uncalled as $fname=>$dummy) {
			print "\"$fname\" [color=lightblue2,style=filled];\n";
		}
		$done_already=array();
		foreach ($this->calls as $call) {
			list($calling, $called, $cond, $loop)=$call;
			$do="$calling -> $called";
			if (in_array($do,$done_already)) {
				print "/* not repeating $do */\n";
				continue;
			}
			$done_already[]=$do;
			// the array $exclude_fns contains a list of fns we don't
			// want to see in the output.
			if ($options['exclude_undefined']) {
				if (!isset($this->fns[$called])) {
					$exclude_fns[]=$called;
				}
			}
			if (!in_array($called, $exclude_fns)) { 
				$edge="  edge [color=black];\n";
				$nodes="    \"$calling\" -> \"$called\"";
				$style=array();
				if ($cond) {
					$style[]="style=dashed";
				}
				if (in_array($called, $builtins)) {
					$nodes="\"$called\" [color=green,style=filled];\n$nodes";
				} elseif (!isset($this->fns[$called])) {
					$nodes="\"$called\" [color=salmon2,style=filled];\n$nodes";
				}
				if (count($style)) {
					$nodes.=' [' . implode(',',$style) . ']';
				}
				if ($loop) {
					$edge="  edge [color=red];\n";
				}
				print $edge . $nodes . ";\n";
			}
		}
		print "}\n";

	}
	function get_uncalled()
	{
		$uncalled=$this->fns;
		foreach ($this->calls as $call) {
			list($calling,$called,$cond,$loop)=$call;
			unset($uncalled[$called]);
		}
		return $uncalled;
	}
	/**
	  * this method never called - as it calls an undefined fn
	  */
	function dummy()
	{
		no_such_function();
	}
}
function get_next_string($offset, &$src,&$context,$curr_tok)
{
	// find the next t_string in array $src and return it
	$found=false;
	for ($x=1; ($x<20) && ($found===false); $x++) {
		if (is_array($src[$offset+$x])) {
			list($tok, $val)=$src[$offset+$x];
			if ($tok==T_STRING) {
				$found=array($val,$x);
			}
		}
	}

	if ($found===false) {
		$additional=collapse_context($context);
		list($line,$some_code)=get_lines(false,$offset, $src,$additional);
		trigger_error("Unable to find T_STRING identifier at $line in parsed file <pre>$some_code</pre>");
	}
	// are we defining a function inside a class?
	if ($curr_tok==T_FUNCTION) {
		$last=count($context)-1;
		for ($x=$last; $x>0; $x--) {
			list ($ctok,$val) = $context[$x];
			if ($ctok==T_CLASS) {
				$found[0]='::' . $found[0];
			}
		}
	}
	return($found);
}
function end_block(&$context,$offset,&$src)
{
	strip_context_to('{',$context,$offset,$src);
	$copy=$context;
	$last=count($context)-1;
	$found=false;
	for ($x=$last; ($x>0 && !$found); $x--) {
		list($tok,$val)=array_pop($copy);
		switch ($tok) {
			case T_FUNCTION:
			case T_CLASS:
			case T_IF:
			case T_SWITCH:
			case T_FOR:
			case T_FOREACH:
			case T_CLASS:
			case T_ELSE:
			case T_ELSEIF:
			case T_WHILE:
			case T_DO:
				$found=true;
				$context=$copy;
				break;
			default:
				break;
		}
	}
	if (!$found) {
		$additional=collapse_context($context);
		list($line,$some_code)=get_lines(false,$offset, $src,$additional);
		trigger_error("Unmatched '}' at $line in parsed file<pre>$some_code</pre>");
	}
	return($found);
}
function strip_context_to($char, &$context,$offset,&$src)
{
	$copy=$context;
	$last=count($context)-1;
	for ($x=$last; $x>0; $x--) {
		list($tok,$val)=array_pop($copy);
		if ($val==$char) {
			$context=$copy;
			return(true);
		}
	}
	$additional=collapse_context($context);
	list($line,$some_code)=get_lines(false,$offset, $src,$additional);
	trigger_error("Could not find '$char' in context stack at $line in parsed file<pre>$some_code</pre>");
	return(false);
}

// $additional=collapse_context($context);
function collapse_context($stack)
{
	$curr='';
	foreach($stack as $ival) {
		list($tok,$val)=$ival;
		$curr.="$tok($val)|";
	}
	$curr.="\n";
	return("context: $curr");
}
function find_calling_fn(&$context)
{
	$last=count($context)-1;
	$loop=0;
	$cond=0;
	for ($x=$last; $x>=0; $x--) {
		list($tok,$val)=$context[$x];
		if ($tok==T_FUNCTION) {
			$calling=$val;
			break;
		}
		switch ($tok) {
			case T_IF:
			case T_ELSE:
			case T_ELSEIF:
			case T_SWITCH:
				$cond=1;
				break;
			case T_FOR:
			case T_FOREACH:
			case T_DO:
			case T_WHILE:
				$loop=1;
				break;
			default:
				break;	
		}
	}
	return(array($calling, $cond, $loop));
}
function dump_calls($calls)
{
	foreach ($calls as $call) {
		list($calling, $called, $cond, $loop)=$call;
		print "$calling -> $called ";
		if ($cond) {
			print "conditionally ";
		}
		if ($loop) {
			print "in loop";
		}
		print "\n";
	}
}
function followed_by($char,&$tokens,$offset)
{
	$last=count($tokens)-1;
	for ($x=1; $x<=$last; $x++) {
		$tok=$tokens[$offset+$x];
		if (is_array($tok) && ($tok[0]==T_WHITESPACE)) {
			continue;
		}
		if ($tok=='(') {
			return(true);
		} else {
			return(false);
		}
	}
}
function prefixed_by($tok_const, &$tokens, $offset)
{
	for ($x=$offset-1; $x>0; $x--) {
		$tok=$tokens[$x];
		if (is_array($tok) && ($tok[0]==$tok_const)) {
			return(true);
		} elseif (is_array($tok) && ($tok[0]==T_WHITESPACE)) {
			continue;
		} elseif ($tok===$tok_const) {
			return(true);
		}
		return(false);
	}
}

function parse_tokens(&$tokens,&$context,&$calls,&$fns)
{
	global $tokens, $context;
	$last=count($tokens);
	for ($x=0; $x<$last; $x++) {
		if (is_array($tokens[$x])) {
			list($tok, $val)=$tokens[$x];
			switch($tok) {
				case T_IF:
				case T_ELSE:
				case T_ELSEIF:
				case T_SWITCH:
					$context[]=array($tok,'COND');
					break;
				case T_FOR:
				case T_FOREACH:
				case T_WHILE:
				case T_DO:
					$context[]=array($tok,'LOOP');
					break;
				case T_FUNCTION:
				case T_CLASS:
					list($named,$offset)=get_next_string($x, $tokens,$context,$tok);
					if ($tok==T_CLASS) $named='::' . $named;
					// possibly need to handle MAGIC methods here
					$x+=$offset;
					$context[]=array($tok,$named);
					$fns[$named]=1;
					break;
				case T_STRING:
					if (followed_by('(',$tokens,$x)) {
						$called=$val;
						if (prefixed_by(T_OBJECT_OPERATOR, $tokens,$x)
								|| prefixed_by(T_NEW, $tokens, $x)) {
							$called='::' . $called;
						}
						list($calling,$cond,$loop)=find_calling_fn($context);
						$calls[]=array($calling, $called, $cond, $loop);
					}
					get_lines($val,false,$loop);
					break;
				case T_WHITESPACE:
				case T_INLINE_HTML:
				case T_COMMENT: // for PHP5 need to check T_DOC_COMMENT
					get_lines($val,false,$loop);
				default:
					break;
			}
		} else {
			switch($tokens[$x]) {
				case '{':
				case '(':
				case '[':
					$context[]=array(0,$tokens[$x]);
					break;
				case ']':
					strip_context_to('[',$context,$x,$tokens);
					break;
				case ')':
					strip_context_to('(',$context,$x,$tokens);
					break;
				case '}':
					end_block($context,$x,$tokens);
					break;
				default:
					break;
			}
		}
	}

}
function get_lines($token=false,$offset=-1, &$src,$additional='')
{
	static $linecount;
	if (!$linecount) {
		$linecount=0;
	}
	if ($token===false) {
		$some_code=$additional . rebuild_code($offset, $src);
		return(array($linecount,$some_code));
	}
	$count=0;
	while ($token=strstr($token, "\n")) {
		$token=substr($token,1);
		$count++;
	}
	$linecount+=$count;
	return(array($count,'n/a'));
}
function rebuild_code($offset, &$src)
{
	$out='';
	$start=$offset-5;
	if ($start<0) $start=0;
	$end=$offset+5;
	if ($end>count($src)) $end=count($src);
	for (; $start<$end; $start++) {
		if (is_array($src[$start])) {
			$tok=$src[$start];
			$out.=$tok[1];
		} else {
			$out.=$src[$start];
		}
	}
	return(htmlentities($out));
}
function check_params()
{
	global $src_file,$options;
	if ($_REQUEST['src_file'] && $_REQUEST['opt_output'] 
			&& $_REQUEST['opt_undef'] && $_REQUEST['opt_builtin']) {
		if (file_exists($_REQUEST['src_file'])) {
			if (strtoupper(substr($_REQUEST['opt_output'],0,1))=='V') {
				$options['output']='Vertical';
			} else {
				$options['output']='Horizontal';
			}
			if ($_REQUEST['opt_undef']=='YES') {
				$options['exclude_undefined']=true;
			} else {
				$options['exclude_undefined']=false;
			}
			if ($_REQUEST['opt_builtin']=='YES') {
				$options['exclude_builtins']=true;
			} else {
				$options['exclude_builtins']=false;
			}
			return(true);
		} else {
			trigger_error("File not found");
		}
	}
	return(false);
}

function write_tmp_file(&$dotfile)
{
	$tmpfile=tempnam('/tmp','calls');
	if ($oh=fopen($tmpfile,'w')) {
		fputs($oh, $dotfile);
		fclose($oh);
		return($tmpfile);
	}
}
function get_dest_file($src_file)
{
	$src_file=serialize($_GET); // different graphs for difft params
	$dest="graphs/" . md5($src_file);
	return($dest);
}
function clean_cache()
{
// garbage collectio %%% needs to be safer
        $base=dirname(__FILE__);
        if (is_dir($base . '/graphs')) {
	   $cmd="find " . $base . '/graohs/' . " -name \*.jpg -mtime +30 -exec rm -f {} \;";
	   $chk=`echo "$cmd" | at now`;
        }
}
function not_cached($src_file)
{
	clean_cache();
	$destfile=get_dest_file($src_file) . '.jpg';
	$cachetime=(integer)@filemtime($destfile);
	$codetime=(integer)@filemtime($src_file);
	return($cachetime<$codetime);
}
function do_page($tmpfile,$cmd, $output)
{
	print "<body>\n";
	do_form();
	do_result($tmpfile,$cmd, $output);
	print "</body>";
}
function do_form()
{

	print "<form method='GET'>\n";
	print "file to parse: <input type='text' name='src_file' value='" . $_REQUEST['src_file'] . "' size='80'><br />\n";
	print "output vertical or horizontal? <select name='opt_output'>
		<option selected>Vertical</option>
		<option>Horizontal</option>
		</select><br />\n";
	print "exclude undefined functions? <select name='opt_undef'>
			<option selected>YES</option>
			<option>NO</option></select><br />\n";
	print "exclude builtin functions? <select name='opt_builtin'>
		     <option selected>YES</option>
			 <option>NO</option></select><br />\n";
	print "<input type='submit' name='submit' value='submit'></form>\n";
}

function do_result($tmpfile,$cmd,$output)
{
	global $src_file,$options,$exclude_fns,$builtins;
	if ($_REQUEST['src_file']) {
		show_errors();
		$destfile=get_dest_file($_REQUEST['src_file']) . '.jpg';
		if (file_exists($destfile)) {
			print "<h3>$src_file</h3>\n";
			list($lines_processed)=get_lines(false,false,$lines_processed);
			if ($lines_processed) {
				print "parsed $lines_processed lines from input file<br />\n";
			}
			print "<br /><img src='$destfile' alt='callgraph'>\n";
		} else {
			print "expected output in $destfile<br />\n";
			print "failed to generate for some reason dot file is shown below";
			print "<hr><pre>" . file_get_contents($tmpfile) . "</pre>\n";
		}
		print "<hr><h3>debug</h3>run=$cmd<br />output=<pre>$output</pre>\n";
		print "options=<pre>" . var_export($options,true) . "</pre>\n";
		print "params=<pre>" . var_export($_GET,true) . "</pre>\n";
		// print "exclude_fns=<pre>" . var_export($exclude_fns, true) . "</pre>\n";
		// print "builtins=<pre>" . var_export($builtins, true) . "</pre>\n";
		unlink($tmpfile);
	} else {
		print "<h3>Key</h3><br /><img src='key.jpg' alt='key'>\n";
	}
}
function show_errors()
{
	global $process_errors;
	$colors=array(
			E_ERROR          => "FF1010", // red
			E_WARNING        => "FF3333", // yellow
			E_PARSE          => "CC00FF", // unlikely
			E_NOTICE          => "00FFFF",
			E_CORE_ERROR      => "CC00FF",
			E_CORE_WARNING    => "FF0000",
			E_COMPILE_ERROR  => "FF0000",
			E_COMPILE_WARNING => "FF9933",
			E_USER_ERROR      => "FF6666",
			E_USER_WARNING    => "FFFF33",
			E_USER_NOTICE    => "CCCCFF",
			E_STRICT          => "FF99FF"
			);
	if (count($process_errors)) {
		print "<h3>Errors occurred during processing</h3>\n";
		print "<table bgcolor='#000000'>";
		foreach($process_errors as $item) {
			list($errno, $msg)=$item;
			$bg=$colors[$errno];
			if (!$bg) $bg='FFFFFF';
			print "<tr><td bgcolor='#$bg'>$errno</td><td bgcolor='#$bg'>$msg</td></tr>\n";
		}
		print "</table>\n";
	}
}
function error_logging($errno, $errmsg, $filename, $line, $vars)
{
	global $process_errors;
	$errortype = array (
			E_ERROR          => "Error",
			E_WARNING        => "Warning",
			E_PARSE          => "Parsing Error",
			E_NOTICE          => "Notice",
			E_CORE_ERROR      => "Core Error",
			E_CORE_WARNING    => "Core Warning",
			E_COMPILE_ERROR  => "Compile Error",
			E_COMPILE_WARNING => "Compile Warning",
			E_USER_ERROR      => "User Error",
			E_USER_WARNING    => "User Warning",
			E_USER_NOTICE    => "User Notice",
			E_STRICT          => "Runtime Notice"
			);
	$process_errors[]=array($errno, $errortype[$errno] . " $errmsg at $line in $filename");
}
/**
 * main 
 *
 * -----------------------------------------------------------
 */

if (check_params()) {
	$process_errors=array();
	if (not_cached($src_file)) {
		set_error_handler(error_logging);
		ob_start();
		$source=file_get_contents($src_file);
		$tokens=token_get_all($source);
		unset($source);
		$context=array(array(T_FUNCTION,'_main'));
		$calls=array();
		$fns=array("_main"=>1);

		parse_tokens($tokens,$context,$calls,$fns);

		$exclude_fns=array();
		$builtins=get_defined_functions();
		$builtins=$builtins['internal'];

		if ($options['exclude_builtins']) {
			// NB copy - since this is the list of things we are not showing
			// which may be appended to later
			$exclude_fns=$builtins;
		}

		$dotwriter=new dotwriter($calls,$fns);
		$dotwriter->write_dot_file();
		$dotfile=ob_get_contents();
		ob_end_clean();
		$dest=get_dest_file($src_file) . '.jpg';
		$tmpfile=write_tmp_file($dotfile);
		$run="/usr/bin/dot -Tjpg -o $dest $tmpfile";
		$output=`$run`;
		restore_error_handler();
	}
}
do_page($tmpfile,$run,$output);
?>
Return current item: Code Graph