Location: PHPKode > scripts > ComicLinkChecker > cosmiclinkchecker/config.php
<!--
##-----------------------------------------------------------------##
##                                                                 ##
##                                                                 ##
## © Copyright Stephen Heylings 2003. All rights reserved. No part ##
## of this or any of the attached documents shall be               ##
## reproduced/stored in any way whatsoever without written         ##
## permission from the Copyright holder.                           ##
##   The Copyright holder holds no responsibility for errors or    ##
## omissions. No liability is assumed in any way for damages       ##
## resulting from the use of this document/program.                ##
##                                                                 ##
## Have a nice day.                                                ##
##                                                                 ##
##                                                                 ##
##-----------------------------------------------------------------##
-->
<?
#display options
$Show_Checked_Links = 0;
$Show_Checked_Pages = 1;

# Starting URL
$start_url = array(
//"http://steve/cosmicphp/index.php",
"http://www.cosmicphp.com/index.php",
);

# Servers to index from
$allow_url = array(
#"http://steve/cosmicphp/",
"http://www.cosmicphp.com/",
);

# File extensions not to index
$file_ext = "php cgi asp";


# Directories not to index
#$no_index_dir = "inc.php img image temp tmp cgi-bin";
$no_index_dir = "CosmicPhpForum";

# Files not to index
$no_get_words = 'robots.txt';

# Known HTTP Codes
$error['N/A'] = "Unknown HTTP";
$error[OK]    = "Valid hostname";
$error[FEJL]  = "Invalid hostname";
$error[Død]   = "No response";
$error[100]   = "Continue";
$error[101]   = "Switching Protocols";
$error[200]   = "OK";
$error[201]   = "Created";
$error[202]   = "Accepted";
$error[203]   = "Non-Authoritative Information";
$error[204]   = "No Content";
$error[205]   = "Reset Content";
$error[206]   = "Partial Content";
$error[300]   = "Multiple Choices";
$error[301]   = "Moved Permanently";
$error[302]   = "Redirect";
$error[303]   = "See Other";
$error[304]   = "Not Modified";
$error[305]   = "Use Proxy";
$error[307]   = "Temporary Redirect";
$error[400]   = "Bad Request";
$error[401]   = "Unauthorized";
$error[402]   = "Payment Required";
$error[403]   = "Forbidden";
$error[404]   = "Not Found";
$error[405]   = "Method Not Allowed";
$error[406]   = "Not Acceptable";
$error[407]   = "Proxy Authentication Required";
$error[408]   = "Request Timeout";
$error[409]   = "Conflict";
$error[410]   = "Gone";
$error[411]   = "Length Required";
$error[412]   = "Precondition Failed";
$error[413]   = "Request Entity Too Large";
$error[414]   = "Request-URI Too Long";
$error[415]   = "Unsupported Media Type";
$error[416]   = "Requested Range Not Satisfiable";
$error[417]   = "Expectation Failed";
$error[500]   = "Internal Server Error";
$error[501]   = "Not Implemented";
$error[502]   = "Bad Gateway";
$error[503]   = "Service Unavailable";
$error[504]   = "Gateway Timeout";
$error[505]   = "HTTP Version Not Supported";

$visited = array();

?>
<?

class cLink {
	
	var $mUrl;
	var $mParentUrl;
	var $mHttpCode;
	var $mBroken;
	
	function SetParentUrl($cVal) {
		$this->mParentUrl = $cVal;
	}
	function SetUrl($cVal) {
		global $Show_Checked_Links, $visited, $aUrls, $Show_Http_Ok, $Show_Checked_Pages, $error, $TotalLinks, $BrokenLinks, $WarningLinks;
		
		$visited[] = $cVal;
		$this->mUrl = $cVal;
		if ( preg_match("'^http://'",$cVal)) { 
			
			//display no errors and use @file to open link
			$aContent= @file($cVal); 
			if($aContent) {$sContent = implode("\n", $aContent);}
			
			//get header code
			$header= join("\n", $http_response_header); 
			list(,$http_code,) = split (" ", $header, 3);
			$this->mHttpCode = $http_code;
			
			if($http_code!=200){
				$this->mBroken=true;
			} else {
				$this->mBroken=false;				
			}

			$base = $cVal;
			if (preg_match_all("/<base\\s+href=([\"']?)([^\\s\"'>]+)\\1/is", $sContent, $matches,PREG_SET_ORDER)) {
				$base = $matches[0][2];
			}
			if ( check_url($cVal)) {
				$links = get_links($sContent);
				if($Show_Checked_Pages) {
					print_page($cVal, count($links));
				}
				$TotalLinks = $TotalLinks + count($links);
				foreach ($links as $k => $v) {
					$new_link = get_abs_url($base,$k);
					$new_link = preg_replace("/#.*/","",$new_link);
					$new_link_stripped = preg_replace("/\?.*/","",$new_link);

					if ( ! in_array($new_link,$visited)) {
						$oUrl = new cLink;
						$oUrl->SetParentUrl($cVal);
						$oUrl->SetUrl($new_link);
						$aUrls[$new_link] = $oUrl;
					}

				}	
			}
		} else {
			$this->mHttpCode =  "non http";
		}
		if($Show_Checked_Links) {
			print_link($this->mUrl, $this->mHttpCode);
		} elseif ($this->mBroken) {
			if($this->mHttpCode == 400) {
				print_error($this);
				$BrokenLinks++;
			} else {
				print_warning($this);
				$WarningLinks++;
			}
		}
	}
}


function check_url($url) {
	//validate given url
	global $file_ext, $no_get_words, $no_index_dir, $allow_url;

/*
	if ( ! preg_match("'^http://'",$url)) { return FALSE; }
	if ( preg_match ("'$file_ext'i", $url)) { return FALSE; }
	if ( preg_match ("'$no_get_words'i", $url)) { return FALSE; }
*/
	if($no_index_dir) {
		if ( preg_match ("'$no_index_dir'i", $url)) { return FALSE; }
	}

	$allow = 0;
	foreach ($allow_url as $v) {
			if ( preg_match("'$v'i", $url)) {
					$allow = 1;
					break;
			}
	}
	if ($allow == 0) { return FALSE; }

	return TRUE;
}
function get_links($sContent) {
	//filter text for links in <a href> <frame src> and <area href> and return in array
	$links = array();
	$count = preg_match_all("/<a[^>]+href=([\"']?)([^\\s\"'>]+)\\1/is", $sContent, $matches, PREG_SET_ORDER);
	for($i=0; $i < count($matches); $i++) {
			$links[$matches[$i][2]] = 1;
	}

	$count = preg_match_all("/<frame[^>]+src=([\"']?)([^\\s\"'>]+)\\1/is", $sContent, $matches, PREG_SET_ORDER);
	for($i=0; $i < count($matches); $i++) {
			$links[$matches[$i][2]] = 1;
	}

	$count = preg_match_all("/<area[^>]+href=([\"']?)([^\\s\"'>]+)\\1/is", $sContent, $matches, PREG_SET_ORDER);
	for($i=0; $i < count($matches); $i++) {
			$links[$matches[$i][2]] = 1;
	}
	return $links;
}
function get_abs_url($base,$url) {
		//return absoulte url given base url and url itself
    $url_arr = parse_url($url);
    if (isset($url_arr["scheme"])) {
        return($url);
    }
    
    $base_arr = parse_url($base);
    $base_base = strtolower($base_arr["scheme"])."://";
    if (isset($base_arr["user"])) {
        $base_base .= $base_arr["user"].":".$base_arr["pass"]."@";
    }
    $base_base .= strtolower($base_arr["host"]);
    if (isset($base_arr["port"])) {
        $base_base .= ":".$base_arr["port"];
    }
    $base_path = @$base_arr["path"];
    if ($base_path == "") { $base_path = "/"; }
    $base_path = preg_replace("/(.*\/).*/","\\1",$base_path);
    
    if (@$url_arr["path"][0] == "/") {
        return $base_base.$url;
    }
    
    if (preg_match("'^\./'",$url)) {
        $url = preg_replace("'^\./'","",$url);
        return $base_base.$base_path.$url;
    }
    
    while (preg_match("'^\.\./'",$url)) {
        $url = preg_replace("'^\.\./'","",$url);
        $base_path = preg_replace("/(.*\/).*\//","\\1",$base_path);
    }
    return $base_base.$base_path.$url;    
}
function print_link($url, $http){
	global $error;
	print"	
	<table align=center><tr>
		<td class=h width=600>Link: $url :~: $http :~: $error[$http]</td></tr>
	</tr></table>\n";
	flush();
}
function print_page($url, $links){
	print"	
	<table align=center><tr>
		<td class=h width=600>Checking page $url. $links links found.</td></tr>
	</tr></table>\n";
	flush();
}
function print_error($oError){
	
	global $error;
	$parent = $oError->mParentUrl;
	$url = $oError->mUrl;
	$err = $oError->mHttpCode;
	
	print"	
	<table align=center cellpadding=4><tr>
		<td class=error width=600>Error! On page <a href='$parent'>$parent</a></td></tr>
	<tr>
		<td class=error width=600>Link: <a href='$url'>$url</a> HTTP$err $error[$err]</td></tr>
	</tr></table>\n";
	flush();
}
function print_warning($oError){
	
	global $error;
	$parent = $oError->mParentUrl;
	$url = $oError->mUrl;
	$err = $oError->mHttpCode;
	
	print"	
	<table align=center cellpadding=4><tr>
		<td class=error width=600>Warning! On page <a href='$parent'>$parent</a></td></tr>
	<tr>
		<td class=error width=600>Link: <a href='$url'>$url</a> HTTP$err $error[$err]</td></tr>
	</tr></table>\n";
	flush();
}

?>
Return current item: ComicLinkChecker