Location: PHPKode > scripts > Sitemap Manager > sitemap-manager/SitemapManager.class.php
<?php

/* SitemapManager.class.php
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 $Id: $
 */ 

/** A class to generate a tree of sitemaps and add urls to existent one. 
 * This class manages full sitemaps organizing these in a tree according 
 * to sitemap protocol (see http://www.sitemaps.org/protocol.php).
 * 
 * @category   class
 * @package    sitemap_manager
 * @author     Giuseppe Sucameli <hide@address.com>
 * @copyright  2009 Giuseppe Sucameli
 * @license    http://www.gnu.org/licenses/gpl.html GPL V 2.0
 * @version    0.1
 */ 

class SitemapManager
{
    const FILENAME = 'sitemap'; // start of sitemaps filename

    /** The following properties store the directory in which create sitemaps. 
     * Is raccommended to use the top-level directory of your web server to 
     * store sitemaps because there are some limitations using another 
     * directory (see http://www.sitemaps.org/protocol.php#location).
     * 
     * The directory must be the same for both properties.
     */ 
    private $dir_path;  // full path of the directory like /var/www/site/dir/
    private $dir_url;   // url of the directory like http://www.host.com/dir/

    /** Construct the object and initialize its property
     *
     * @param $dir_path full path of the directory
     * @param $dir_url url of the directory
     */ 
    public function __construct($dir_path, $dir_url) {
        if(empty($dir_path) || !is_string($dir_path))
            trigger_error("Missing argument 1 (dir_path) for ".__CLASS__."::".__FUNCTION__."(),", E_USER_ERROR);

        if(empty($dir_url) || !is_string($dir_url))
            trigger_error("Missing argument 2 (dir_url) for ".__CLASS__."::".__FUNCTION__."(),", E_USER_ERROR);

		$this->dir_path = $dir_path;        
        if(substr($dir_path, -1, 1) != '/') // make sure that the last char in $dir_path is a /
            $this->dir_path .= '/';

		$this->dir_url = $dir_url;
        if(substr($dir_url, -1, 1) != '/')  // make sure that the last char in $dir_url is a /
            $this->dir_url .= '/';
	}

    /** Add urls to the tree of sitemaps
     *
     * @param $urls single url or a SitemapItem object, array of urls or array of SitemapItem objects
     */ 
    public function AddUrls($urls) {
            $this->AddUrls_r($urls, 0);
    }

    /** Add urls to the last empty sitemap in a level
     *
     * @param $urls single url or a SitemapItem object, array of urls or array of SitemapItem objects
     * @param $level tree level in which insert urls, 0 if is a leaf, greater otherwise
     */ 
    private function AddUrls_r($urls, $level = 0) {
        if(!is_array($urls))    // convert in an array
            $arr = array($urls);
        else
            $arr = $urls;

        $map = $this->GetLastSitemap($level);
        $inserted = $map->AddUrls($arr);
        while($inserted < count($arr)) {
            for($i = 0; $i < $inserted; $i++)
                array_shift($arr);

            $index = $this->GetLastSitemap($level + 1);

            if($this->GetLastSitemapNumber($level) == 0)    // $map is the first sitemap in level $level (is the actual root), add it to index in the next level
                $this->AddUrls_r($this->ConvertPathToUrl($map->GetPath()), $level + 1);
            $map = $this->GetNewSitemap($level);
            $this->AddUrls_r($this->ConvertPathToUrl($map->GetPath()), $level + 1);

            $inserted = $map->AddUrls($arr);
        } 
    }

    /** Get the url of the sitemap that is the actual tree root
     *
     * @return FALSE if no sitemaps exist, the url of the actual root sitemap otherwise
     */ 
    public function GetSitemapRootUrl() {
        $levels = $this->GetNumberOfLevels();
        if($levels <= 0) return FALSE;
        $root_path = $this->GetPathFromMapNumber($levels - 1, 0);
        return $this->ConvertPathToUrl($root_path);
    }

    /** Get the number of levels in the tree
     *
     * @return the number of levels 
     */ 
    private function GetNumberOfLevels() {
        $level = -1;
        do {
            $path = $this->GetPathFromMapNumber(++$level, 0);
        } while(file_exists($path));
        return $level;
    }

    /** Get the path of a sitemap by its level and number
     *
     * @param $level sitemap tree level
     * @param $num number of sitemap in level $level
     * @return the path of that sitemap
     */ 
    private function GetPathFromMapNumber($level, $num) {
        $name = self::FILENAME.'_'.$level.'_'.$num.'.xml';
        return $this->dir_path.$name;
    }

    /** Get the number of the last sitemap in a level
     *
     * @param $level sitemap tree level
     * @return the number of the last sitemap
     */ 
     /* TODO optimize this function, it's more expensive!! 
        The idea is to create an array which stores the last sitemap number
        for every level in the tree. This array will be serialized and 
        writed in a file so, in future insertion, we can read the file to 
        retrieve the array. */
    private function GetLastSitemapNumber($level) {
        $num = -1;
        do {
            $path = $this->GetPathFromMapNumber($level, ++$num);
        } while(file_exists($path));

        return $num - 1;
    }

    /** Get a new sitemap for a level
     *
     * @param $level sitemap tree level
     * @return a Sitemap object for the new sitemap in that level
     */ 
    private function GetNewSitemap($level) {
        $num = $this->GetLastSitemapNumber($level) + 1;
        return $this->GetSitemap($level, $num);
    }

    /** Get the last sitemap for a level
     *
     * @param $level sitemap tree level
     * @return a Sitemap object for the last sitemap in that level
     */ 
    private function GetLastSitemap($level) {
        $num = $this->GetLastSitemapNumber($level);
        if($num < 0) $num++;  // no sitemap on this level, create a new one
        return $this->GetSitemap($level, $num);
    }

    /** Get a sitemap by its level and number
     *
     * @param $level sitemap tree level
     * @param $num the sitemap number
     * @return a Sitemap object for that sitemap
     */ 
    private function GetSitemap($level, $num) {
        $path = $this->GetPathFromMapNumber($level, $num);
        $type = $level > 0 ? Sitemap::TYPE_SITEMAP_INDEX : Sitemap::TYPE_SITEMAP;
        return new Sitemap($path, $type);
    }

    /** Convert a sitemap path into a url
     *
     * @param $path the sitemap path
     * @return the sitemap url
     */ 
    private function ConvertPathToUrl($path) {
        preg_match('@/[^/]+$@', $path, $matches);
        $name = substr($matches[0], 1);
        return $this->dir_url.$name;
    }
};

/** A class that implements a sitemap and permits to add urls to it.
 * 
 * @category   class
 * @package    sitemap_manager
 * @author     Giuseppe Sucameli <hide@address.com>
 * @copyright  2009 Giuseppe Sucameli
 * @license    http://www.gnu.org/licenses/gpl.html GPL V 2.0
 * @version    0.1
 */ 
class Sitemap
{
    /* Sitemap types definition: sitemap or sitemap index */
    const TYPE_SITEMAP_INDEX = 0;
    const TYPE_SITEMAP = 1;

    /* XML constants used in sitemap */
    const XML_HEADER = '<?xml version="1.0" encoding="UTF-8"?>';
    const XML_NAMESPACE = 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"';

    const GENERATOR = '<!-- sitemap-generator-program="sitemap-manager-class" sitemap-generator-version="0.1" -->
<!-- programmed-by="Giuseppe Sucameli" programmer-email="hide@address.com" --> ';

    const MAX_URLS = 50000; // max number of urls into sitemap file (no more than 50,000 urls)
    const MAX_SIZE = 10485760; // max size of sitemap file (10MB)

    private static $TAGS = array(
	    self::TYPE_SITEMAP_INDEX => array('sitemapindex', 'sitemap'), 
	    self::TYPE_SITEMAP => array('urlset', 'url')
    );  // tag names for every sitemap type

    /** Get the root tag associated to the type of this sitemap
     *
     * @return the sitemap root tag
     */ 
	private function GetRootTag() {
		return self::$TAGS[$this->type][0];
	}

    /** Get the url tag associated to the type of this sitemap
     *
     * @return the sitemap url tag
     */ 
	private function GetUrlTag() {
		return self::$TAGS[$this->type][1];
	}

	private $type;  // store sitemap type
	private $path;  // the sitemap file path
    private $tmp;   // path of the temporary sitemap file
	private $stream;    // manteins the stream to read/write into sitemap file
	private $num_urls;  // actual number of urls in sitemap file
    private $size;  // actual size of sitemap file

    /** Construct the sitemap object and set its path and type
     *
     * @param $path file path of this sitemap
     * @param $type type of this sitemap
     */ 
	public function __construct($path, $type = self::TYPE_SITEMAP) {
		$this->path = $path;
        $this->type = $type;

        $this->tmp = $this->path.".tmp";
	}

    /** Get file path of this sitemap
     */ 
    public function GetPath() {
        return $this->path;
    }

    /** Get type of this sitemap
     */ 
    public function GetType() {
        return $this->type;
    }

    /** Open the sitemap file. 
     * If did exist, make a temporary copy, retrieve some informations from 
     * the file and then move the file pointer to the position for a new 
     * insertion. 
     * If didn't exist, create a new one, initilize some variables and write 
     * the opening informations.
     */ 
	private function Open() {
		if(isset($this->stream)) return; // already opened

		if(!file_exists($this->path)) {
			$this->stream = fopen($this->tmp, 'w');
            if(flock($this->stream, LOCK_EX) === FALSE)
                trigger_error("Couldn't get the lock on the file in function ".__CLASS__."::".__FUNCTION__."(),", E_USER_ERROR);

            $this->num_urls = 0;
            $this->size = $this->GetFirst2RowsLength() + 1 + $this->GetLatest2RowsLength(); // 1st and 2nd row + new line (1) + penultimate and last row
			$this->WriteOpenInfo(); // write 1st and 2nd row
		} else {
            copy($this->path, $this->tmp);  // make a file copy and work on it
			$this->stream = fopen($this->tmp, 'r+');
            if(flock($this->stream, LOCK_EX) === FALSE)
                trigger_error("Couldn't get the lock on the file in function ".__CLASS__."::".__FUNCTION__."(),", E_USER_ERROR);

            $this->num_urls = $this->GetNumUrls();
            $this->size = $this->GetSize();
			$this->GoToInsertPosition();
		}
	}

    /** Close the sitemap file.
     * First, write the closing informations, then close the file and delete
     * the temporary copy.
     */ 
	private function Close() {
		if(!isset($this->stream)) return;    // already closed

		$this->WriteCloseInfo();    // write the latest 2 rows
		fclose($this->stream);
        rename($this->tmp, $this->path);
		unset($this->stream);
        unset($this->num_urls);
        unset($this->size);
	}

    /** Write the opening informations on the file
     */ 
	private function WriteOpenInfo() {
        if(!isset($this->stream))
            trigger_error("Trying to write on a closed stream in function ".__CLASS__."::".__FUNCTION__."(),", E_USER_ERROR);

		fwrite($this->stream, $this->GetFirstRow()."\n");
		fwrite($this->stream, $this->GetSecondRow()."\n");
	}

    /** Get the first row to write as opening informations in sitemap file.
     *
     * @return the first row
     */ 
    private function GetFirstRow() {
        return self::XML_HEADER."\n".self::GENERATOR;
    }

    /** Get the second row to write as opening informations in sitemap file
     *
     * @return the second row
     */ 
    private function GetSecondRow() {
        return '<'.$this->GetRootTag().' '.self::XML_NAMESPACE.'>';
    }

    /** Get the first 2 rows length
     *
     * @return the first 2 rows length
     */ 
    private function GetFirst2RowsLength() {
        return strlen($this->GetFirstRow()) + 1 + strlen($this->GetSecondRow());   // first row + new line (1) + second row
    }

    /** Write the closing informations on the file
     */ 
    private function WriteCloseInfo() {
        if(!isset($this->stream))
            trigger_error("Trying to write on a closed stream in function ".__CLASS__."::".__FUNCTION__."(),", E_USER_ERROR);

		fwrite($this->stream, $this->GetPenultimateRow()."\n");
		fwrite($this->stream, $this->GetLastRow());
	}

    /** Get the first row to write as closing informations in sitemap file
     * (penultimate file row).
     *
     * @return the penultimate row
     */ 
    private function GetPenultimateRow() {
        return '</'.$this->GetRootTag().'>';
    }

    /** Get the second row to write as closing informations in sitemap file
     * (last file row).
     *
     * @return the last row
     */ 
    private function GetLastRow() {
        $num_urls = $this->GetNumUrls();
        $max_len = strlen(self::MAX_URLS);
        while(strlen($num_urls) < $max_len)  // insert spaces to complete the MAX_URLS length
            $num_urls = ' '.$num_urls;
        return '<!-- '.$num_urls.' -->';
    }

    /** Get the last row length
     * Used in GetNumUrls in place of strlen($this->GetLastRow()) to avoid the 
     * recursion GetNumUrls() -> GetLastRow() -> GetNumUrls()
     *
     * @return the last row length
     */ 
    private function GetLastRowLength() {   // used in GetNumUrls to avoid the recursion GetNumUrls() -> GetLastRow() -> GetNumUrls()
        return strlen('<!-- '.self::MAX_URLS.' -->');
    }

    /** Get the latest 2 rows length
     *
     * @return the latest 2 rows length
     */ 
    private function GetLatest2RowsLength() {
        return strlen($this->GetPenultimateRow()) + 1 + $this->GetLastRowLength();  // penultimate row + new line (1) + last row
    }

    /** Move the file pointer to the position for a new insertion
     */ 
	private function GoToInsertPosition() {
        if(!isset($this->stream))
            trigger_error("Trying to read into a closed stream in function ".__CLASS__."::".__FUNCTION__."(),", E_USER_ERROR);

        $offset = $this->GetLatest2RowsLength();  // latest 2 rows length
    	fseek($this->stream, -$offset, SEEK_END);
	}

    /** Get the number of urls in the file
     *
     * @return the number of urls in the file
     */ 
    private function GetNumUrls() {
        if(!isset($this->stream))
            trigger_error("Trying to read into a closed stream in function ".__CLASS__."::".__FUNCTION__."(),", E_USER_ERROR);

        if(isset($this->num_urls)) return $this->num_urls;

        $pos = ftell($this->stream);    // save the current position

        $offset = $this->GetLastRowLength();  // last row length
    	fseek($this->stream, -$offset, SEEK_END);
    	$str = fread($this->stream, $offset);
    	$str = strstr($str, '<!-- ');
        preg_match('/[0-9]+/', $str, $matches);

        fseek($this->stream, $pos); // restore the file pointer
    	return $matches[0];
	}

    /** Get the file size
     *
     * @return the file size
     */ 
    private function GetSize() {
        if(isset($this->size)) return $this->size;
        return filesize($this->path);
    }

    /** Add an url to the sitemap file.
     *
     * @return TRUE if the url is added, FALSE otherwise
     */ 
	private function AddUrl($url) {
        if(!isset($this->stream))
            trigger_error("Trying to write on a closed stream in function ".__CLASS__."::".__FUNCTION__."(),", E_USER_ERROR);

        if($this->num_urls >= self::MAX_URLS) return FALSE;    // too more urls in sitemap

        if($url instanceof SitemapItem)  // make sure the url is a SitemapItem
            $u = $url;
        else
            $u = new SitemapItem($url);

		$xml_url = "\t".'<'.$this->GetUrlTag().'>'."\n";

        $xml_url .= "\t\t".'<loc>'.$u->loc.'</loc>'."\n";   // loc is a required element
        if(!is_null($u->lastmod))
            $xml_url .= "\t\t".'<lastmod>'.$u->lastmod.'</lastmod>'."\n";
        if(!is_null($u->changefreq))
            $xml_url .= "\t\t".'<changefreq>'.$u->changefreq.'</changefreq>'."\n";
        if(!is_null($u->priority))
            $xml_url .= "\t\t".'<priority>'.$u->priority.'</priority>'."\n";

        $xml_url .= "\t".'</'.$this->GetUrlTag().'>';

        if($this->GetSize() + strlen($xml_url) + 1 > self::MAX_SIZE) return FALSE;    // sitemap is too large

		fwrite($this->stream, $xml_url."\n");
        $this->num_urls++;
        $this->size = $this->GetSize() + strlen($xml_url) + 1;
        return TRUE;
	}

    /** Add urls to this sitemap file.
     * First, open the sitemap file, add urls and then close it.
     *
     * @return the number of urls inserted
     */ 
    public function AddUrls($urls) {
   		$this->Open();

        if(!is_array($urls))    // convert in an array
            $arr = array($urls);
        else
            $arr = $urls;

        $before = $this->GetNumUrls();
        foreach($arr as $url) {
            if(!$this->AddUrl($url))
                break;
        }
        $inserted = $this->GetNumUrls() - $before;

        $this->Close();
        return $inserted;
    }
};

/** A class for creating sitemap items that will be added to a Sitemap object.
 * 
 * @category   class
 * @package    sitemap_manager
 * @author     Giuseppe Sucameli <hide@address.com>
 * @copyright  2009 Giuseppe Sucameli
 * @license    http://www.gnu.org/licenses/gpl.html GPL V 2.0
 * @version    0.1
 */ 
class SitemapItem
{
    const MAX_URL_LENGTH = 2047;    // max length for the $loc property
    const FORMAT_DATE = "Y-m-d";    // format for the $lastmod property
    const MIN_PRIORITY = 0.0;   // min value for the $priority property
    const MAX_PRIORITY = 1.0;   // max value for the $priority property

    private $loc;   // location of a page
    private $lastmod;   // date of last modification
    private $changefreq;    // change frequency
    private $priority;  // link priority

    /** Get the value of a object's property
     *
     * @param $name property name
     * @return the property value if that property exists, NULL otherwise
     */ 
    public function __get($name) {
        return $this->$name;
    }

    private static $CHANGE_FREQ_VALUES = array(
        'always',
        'hourly',
        'daily',
        'weekly',
        'monthly',
        'yearly',
        'never'
    );  // list of possible values for $changefreq property

    /** Construct the object and initialize its properties.
     *
     * @param $location url of a page/sitemap
     * @param $lastmod last modification of the page/sitemap (optional), a date in format YYYY-MM-DD or in ticks (returned by time())
     * @param $changefreq change frequency of the page/sitemap (optional), one of values in $CHANGE_FREQ_VALUES
     * @param $priority the priority of this url (optional), in range 0.0-1.0
     */ 
    public function __construct($location, $lastmod = NULL, $changefreq = NULL, $priority = NULL) {
        $loc = $location;
        if(strlen($loc) > self::MAX_URL_LENGTH) {
            trigger_error("Argument 1 (location) for function ".__CLASS__."::".__FUNCTION__."() is more than ".self::MAX_URL_LENGTH." characters and will be truncated,", E_USER_WARNING);
            $loc = substr($loc, 0, self::MAX_URL_LENGTH);
        }
        $this->loc = htmlentities($loc);

        if(is_string($lastmod) && !empty($lastmod))
            $this->lastmod = $lastmod;
        else if(is_numeric($lastmod) && ($date = date(self::FORMAT_DATE, $lastmod)) !== FALSE)
            $this->lastmod = $date;

        if(in_array($changefreq, self::$CHANGE_FREQ_VALUES))
            $this->changefreq = $changefreq;

        if(is_numeric($priority)) {
            if($priority < self::MIN_PRIORITY) $priority = self::MIN_PRIORITY;
            else if($priority > self::MAX_PRIORITY) $priority = self::MAX_PRIORITY;

            $this->priority = sprintf("%01.1f", $priority);
        }
    }

};

?>
Return current item: Sitemap Manager