Location: PHPKode > scripts > PHP XML Sitemap generator > PHP XML Sitemap generator.php
<?php
/**
* @author Alan T. Miller <hide@address.com>
* @copyright Copyright (C) 2010, Alan T Miller, All Rights Reserved.
 *
 * Creates a properly formatted XML sitemap
 *
 * Sample Usage:
 *
 * // Create an array of xml_sitemap_entry objects
 * $entries[] = new xml_sitemap_entry('/', '1.00', 'weekly');
 * $entries[] = new xml_sitemap_entry('/somepage.html', '0.95', 'weekly');
 * $entries[] = new xml_sitemap_entry('/otherpage.html', '0.90', 'monthly');
 *
 * // set up the xml generator config object
 * $conf = new xml_sitemap_generator_config();
 * $conf->setDomain('www.somedomainsomewhere.com');
 * $conf->setPath('/var/tmp/');
 * $conf->setFilename('sitemap.xml');
 * $conf->setEntries($entries);
 *
 * // instantiate and execute
 * $generator = new xml_sitemap_generator($conf);
 * $generator->write(); // or $generator->toString();
 *
 */
class xml_sitemap_generator_config
{
    private $_domain;
    private $_path;
    private $_filename;
    private $_entries = array();

    public function get($arg)
    {
        switch ($arg) {
            case 'domain':
                return $this->_domain;
            case 'path':
                return $this->_path;
            case 'filename':
                return $this->_filename;
            case 'filepath':
                return $this->_getFilepath();
            case 'entries':
                return $this->_entries;
        }
    }

    public function setDomain($domain)
    {
        $this->_domain = trim($domain);
        return $this;
    }

    public function setPath($path)
    {
        $path = trim($path);
        // clean trailing slash if exists
        if (substr($path,-1) == '/') {
            $path = substr($path, 0, -1);
        }

        // check if write directory is valid
        if (!is_dir($path)) {
            exit(sprintf('write directory does not exist: %s'."\n",$path));
        }

        // check if write dir is writable
        if (!is_writable($path)) {
            exit(sprintf('write directory not writable: %s'."\n", $path));
        }

        $this->_path = $path;
        return $this;
    }

    public function setFilename($filename)
    {
        $filename = trim($filename);
        if (strtolower(substr($filename,-3)) != 'xml') {
            exit(sprintf('filename must end with: xml: %s'."\n", $filename));
        }

        // remove leading slash if exists
        if (substr($filename, 0, 1) == '/') {
            $filename = substr($filename, 1, 0);
        }

        $this->_filename = $filename;
        return $this;
    }

    public function setEntries($entries)
    {
        if (!is_array($entries)) {
            throw new exception('setEntries() method expecs an array of objects');
        }

        foreach ($entries AS $entry) {
            if (!is_object($entry) || !get_class($entry) == 'xml_sitemap_entry') {
                throw new exception('setEntries() method expects an aray of xml_sitemap_entry objects');
            }
        }
        $this->_entries = $entries;
        return $this;
    }

    /**
     * make sure we can proceed without issues
     *
     */
    public function sanityCheck()
    {
        if (!strlen($this->_filename) > 0) {
            exit('Error: sitemap filename not set in configuration object');
        }

        if (!strlen($this->_domain) > 0) {
            exit('Error: domain not set in configuration object');
        }
    }

    private function _getFilepath()
    {
        return sprintf('%s/%s',$this->_path, $this->_filename);
    }
}

class xml_sitemap_entry
{
    private $_loc;
    private $_priority;
    private $_changefreq;
    private $_lastmod;

    private $_frequencies = array('always','hourly','daily','weekly','monthly','yearly','never');
    private $_priorities = array('0.0','0.1','0.2','0.3','0.4','0.5','0.6','0.7','0.8','0.9','1.0');

    public function __construct($loc, $priority, $changefreq="", $lastmod='')
    {
        $this->_setLoc($loc);
        $this->_setPriority($priority);

        if (strlen($changefreq)> 0) {
            $this->_setChangefreq($changefreq);
        }
        if (strlen($lastmod)> 0) {
            $this->_setLastmod($lastmod);
        }
    }

    public function get($arg)
    {
        switch($arg)
        {
            case 'loc':
                return $this->_getLoc();
            case 'priority':
                return $this->_getPriority();
            case 'changefreq':
                return $this->_getChangefreq();
            case 'lastmod':
                return $this->_getLastmod();
            case 'frequencies':
                return $this->_frequencies;
            case 'priorities':
                return $this->_priorities;
            default:
                throw new Exception('get() method in class: '.__CLASS__.' did not recognize the argument');
        }
    }

    private function _setLoc($loc)
    {
        $this->_loc = $loc;
        return $this;
    }

    private function _setPriority($priority)
    {
        $priority = trim($priority);
        if (!in_array($priority, $this->_priorities)) {
            throw new Exception('setPriority() method is expecting a value between 0.0 and 1.0');
        } else {
            $this->_priority = trim($priority);
        }
        return $this;
    }

    private function _setChangefreq($changefreq)
    {
        $changefreq = strtolower(trim($changefreq));

        if (!in_array($changefreq, $this->_frequencies)) {
            throw new Exception('setChangefreq() method is expecting a value such as hourly daily, weekly etc');
        } else {
            $this->_changefreq = $changefreq;
        }
        return $this;
    }

    private function _setLastmod($lastmod)
    {
        $arr = date_parse($lastmod);
        if (!checkdate($arr['month'], $arr['day'], $arr['year'])) {
            throw new Exception('setLastmod() method expects a valid date');
        } else {
            $this->_lastmod =
                sprintf('%s-%s-%s',$arr['year'], $arr['month'], $arr['day']);
        }
        return $this;
    }

    private function _getLoc()
    {
        return $this->_loc;
    }

    private function _getPriority()
    {
        if (!strlen($this->_priority) > 0) {
            return  '0.5';
        } else {
            return $this->_priority;
        }
    }

    private function _getLastmod()
    {
        // set default values
        if (!strlen($this->_lastmod) > 0) {
             return date('Y-m-d');
        } else {
            return $this->_lastmod;
        }
    }

    private function _getChangefreq()
    {
        if (!strlen($this->_changefreq) > 0) {
            return 'monthly';
        } else {
            return $this->_changefreq;
        }
    }
}

class xml_sitemap_generator
{
    private $_conf;
    private $_blocks;
    private $_xml;

    public function __construct(xml_sitemap_generator_config $conf)
    {
        $this->_conf = $conf;
    }

    /**
     * exists to maintain legacy interface,
     * other than that, it is not needed.
     *
     */
    public function build()
    {
        $this->_build();
    }

    /**
     * retrieve XML sitemap as a string
     *
     * @return unknown
     */
    public function getXml()
    {
        $this->_build();
        return $this->_xml;
    }

    public function toString()
    {
        $this->_build();
        return $this->_xml;
    }

    /**
     * write XML sitemap to disk
     *
     */
    public function write()
    {
        $this->_build();

        print_r($this->_xml);
        if (!file_put_contents($this->_conf->get('filepath'), $this->_xml)){
            throw new Exception('cound not write file: '.$this->_conf->get('filepath')."\n");
        }
    }

    private function _append($xml)
    {
        $this->_xml .= $xml;
    }

    private function _build()
    {
        $this->_conf->sanityCheck();
        $this->_append($this->_buildHeader());
        $this->_append($this->_buildBlocks());
        $this->_append($this->_buildFooter());
    }

    private function _buildHeader()
    {
        $header  = '<'.'?'.'xml version="1.0" encoding="UTF-8"?'.'>'."\n";
        $header .= "\t".'<urlset ';
        $header .= 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'."\n";
        return $header;
    }

    private function _buildFooter()
    {
        return '</urlset>'."\n";
    }

    private function _buildBlocks()
    {
        foreach ($this->_conf->get('entries') AS $entry) {
            $this->_blocks .= $this->_buildEntry($entry);
        }
        return $this->_blocks;
    }

    private function _buildEntry(xml_sitemap_entry $entry)
    {
        $loc = sprintf("http://%s%s",
                       $this->_conf->get('domain'),$entry->get('loc'));

        return sprintf("<url>\n%s%s%s%s</url>\n",
                $this->_buildLine('loc', $loc),
                $this->_buildLine('priority',$entry->get('priority')),
                $this->_buildLine('changefreq',$entry->get('changefreq')),
                $this->_buildLine('lastmod', $entry->get('lastmod')));
    }

    private function _buildLine($tagname, $content)
    {
        if(!$this->_is_utf8($content)) {
            $content = trim(utf8_encode($content));
        }
        return sprintf("\t<%s>%s</%s>\n",
                       $tagname, $content, $tagname);
    }

    private function _is_utf8($str)
    {
        // function borrowed from:
        // http://w3.org/International/questions/qa-forms-utf-8.html

        return preg_match('%^(?:
              [\x09\x0A\x0D\x20-\x7E]            # ASCII
            | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
            |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
            | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
            |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
            |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
            | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
            |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
        )*$%xs', $str);
    }


}
Return current item: PHP XML Sitemap generator