Location: PHPKode > scripts > Mini-Fetch > minifetch.php
<?php

//   MiniFetch - Remote Content Retrieval System
//   Retrieve HTML and content from a page on one site, strip or replace HTML tags,
//   reformat with your preferences, then output it onto your own site page.
//   Copyright by WebMedic.Co/
// All rights reserved.
//   Please see disclaimer info in readme.txt before use.

// To use Mini-Fetch:
// 1. Edit 3 variables below in script: $theLocation, $start, and $finish
//    Variables are clearly marked below with ***** around them.
// 2. Call it from any page like this:
/*  <?php include("http://www.yoursite.com/minifetch.php"); ?>    */      // assuming the file is at root of website
// 3. Or, simply paste the entire text of this file directly into any .php page,
//    exactly where you want the grabbed content to appear.
// Your server must support PHP 4.0 or higher.
// The remote url must allow fopen.


// This file is minifetch.php.  It fetches:
// Chron.com News box results, strips HTML junk, then displays the links.




//replace with the URL from which you want to fetch content
// ***************************************************************
$theLocation="http://www.chron.com";
// ***************************************************************





preg_match("/^(https?:\/\/)?([^\/]*)(.*)/i", "$theLocation", $matches);
$theDomain = "http://" . $matches[2];
$page = $matches[3];

$fd = fopen($theDomain.$page, "rb");
$value = "";
while(!feof($fd)){
	$value .= fread($fd, 4096);	
}
fclose($fd);

//  Below, at $start and $finish, you'll enter the start and finish points in the remote HTML.
// THESE START AND FINISH FRAGMENTS OF HTML MUST BE UNIQUE.
// Look for tags in their source page.
// Don't forget to escape any " marks with a \ mark.
// Example: If the HTML is:  <img src="images/something.jpg">
// You would tell MiniFetch: <img src=\"images/something.jpg\">
// Tip: Frequently there are unique tags around boxes and tables
// ********************************************************************************
// replace with your UNIQUE start point in the source of the HTML page
$start= strpos($value, "<span class=\"first-word\">Latest</span>");   // <-- replace with your starting html
// replace with the UNIQUE finish point in the source of the HTML page 
$finish= strpos($value, "<!-- e hearst/collections/headline_list2_body.tpl -->");  // <-- replace with ending html 
//***********************************************************************************


// That's it! No need to go below here unless you want to strip out even more content, words or HTML


$length= $finish-$start;  // Don't touch.
$value=substr($value, $start, $length); // Don't touch


//  Below, we strip a lot of HTML from what we grabbed.
//  You can add or comment out with these two marks in front of it: 
//***********************************************************************************
$value = eregi_replace( "<img src=[^>]*>", "", $value );   // Remove image tags.
$value = eregi_replace( "<IMG alt=[^>]*>", "", $value );   // Remove image alt tags
$value = eregi_replace( "<class[^>]*>", "", $value ); // Remove all variations of <class> tags.
$value = eregi_replace( "</font>", "", $value ); // Remove closing </font> tags.
$value = eregi_replace( "<tr[^>]*>", "", $value ); // Remove <tr> tags with <br>.
$value = eregi_replace( "<td[^>]*>", "", $value ); // Remove <td> tags.
$value = eregi_replace( "<table[^>]*>", "", $value ); // Remove ALL variations of <table> tags.
$value = eregi_replace( "</table>", "", $value ); // Remove closing </table> tags.
$value = eregi_replace( "</tr>", "", $value );  // Remove closing </tr> tags.
$value = eregi_replace( "</td>", "", $value ); // Remove closing </td> tags.
$value = eregi_replace( "<center>", "", $value ); // Remove <center> tag...
$value = eregi_replace( "</center>", "", $value ); // ...alignment calls.
$value = eregi_replace( "<b>", "", $value ); // Remove <b> tags.
$value = eregi_replace( "</b>", "", $value ); // Remove closing </b> tags...
$value = eregi_replace( "</aside>", "", $value ); // Remove certain text...
$value = eregi_replace( "</div>", "", $value ); // Remove certain text...
$value = eregi_replace( "<h4>", "", $value ); // Remove certain text...
$value = eregi_replace( "</h4>", "", $value ); // Remove certain text...
$value = eregi_replace( " class=\"item\"", "", $value ); // Remove certain text...

//$value = eregi_replace( "<javascript[^>]*>", "", $value ); //remove javascripts
//$value = eregi_replace( "<script[^>]*>", "", $value ); //remove scripts


$FinalOutput = preg_replace("/(href=\"?)(\/[^\"\/]+)/", "\\1" . $theDomain . "\\2", $value);
echo $FinalOutput; //prints it to your page
// The next line is not mandatory.
echo "<a href=\"http://www.webmedic.co\"><img src=\"http://www.webmedic.co/images/php.gif\" alt=\"WebMedic.co Minifetch\" width=\"2\" height=\"2\" border=\"0\"></a>";
//not mandatory
//flush (); //force output faster

?>
Return current item: Mini-Fetch