Location: PHPKode > scripts > PHP DataGrid > pear/SQL/Lexer.php
<?php
/* vim: set expandtab tabstop=4 shiftwidth=4: */
// +----------------------------------------------------------------------+
// | Copyright (c) 2002-2004 Brent Cook                                        |
// +----------------------------------------------------------------------+
// | This library is free software; you can redistribute it and/or        |
// | modify it under the terms of the GNU Lesser General Public           |
// | License as published by the Free Software Foundation; either         |
// | version 2.1 of the License, or (at your option) any later version.   |
// |                                                                      |
// | This library is distributed in the hope that it will be useful,      |
// | but WITHOUT ANY WARRANTY; without even the implied warranty of       |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    |
// | Lesser General Public License for more details.                      |
// |                                                                      |
// | You should have received a copy of the GNU Lesser General Public     |
// | License along with this library; if not, write to the Free Software  |
// | Foundation, Inc., 59 Temple Place, Suite 330,Boston,MA 02111-1307 USA|
// +----------------------------------------------------------------------+
// | Authors: Brent Cook <hide@address.com>                          |
// |          Jason Pell <hide@address.com>                          |
// +----------------------------------------------------------------------+
//
// $Id: Lexer.php,v 1.20 2004/05/07 12:33:35 busterb Exp $
//

include PEAR_DIR . 'SQL/ctype.php';

// {{{ token definitions
// variables: 'ident', 'sys_var'
// values:    'real_val', 'text_val', 'int_val', null
// }}}

/**
 * A lexigraphical analyser inspired by the msql lexer
 *
 * @author  Brent Cook <hide@address.com>
 * @version 0.5
 * @access  public
 * @package SQL_Parser
 */
class Lexer
{
    // array of valid tokens for the lexer to recognize
    // format is 'token literal'=>TOKEN_VALUE
    var $symbols = array();

// {{{ instance variables
    var $tokPtr = 0;
    var $tokStart = 0;
    var $tokLen = 0;
    var $tokText = '';
    var $lineNo = 0;
    var $lineBegin = 0;
    var $string = '';
    var $stringLen = 0;
    
    // Will not be altered by skip()
    var $tokAbsStart = 0;
    var $skipText = '';
    
    // Provide lookahead capability.
    var $lookahead = 0;
    // Specify how many tokens to save in tokenStack, so the
    // token stream can be pushed back.
    var $tokenStack = array();
    var $stackPtr = 0;
// }}}

// {{{ incidental functions
    function Lexer($string = '', $lookahead=0)
    {
        $this->string = $string;
        $this->stringLen = strlen($string);
        $this->lookahead = $lookahead;
    }
    
    function get() {
        ++$this->tokPtr;
        ++$this->tokLen;
        return ($this->tokPtr <= $this->stringLen) ? $this->string{$this->tokPtr - 1} : null;
    }

    function unget() {
        --$this->tokPtr;
        --$this->tokLen;
    }

    function skip() {
        ++$this->tokStart;
        return ($this->tokPtr != $this->stringLen) ? $this->string{$this->tokPtr++} : '';
    }

    function revert() {
        $this->tokPtr = $this->tokStart;
        $this->tokLen = 0;
    }

    function isCompop($c) {
        return (($c == '<') || ($c == '>') || ($c == '=') || ($c == '!'));
    }
// }}}

// {{{ pushBack()
/*
 * Push back a token, so the very next call to lex() will return that token.
 * Calls to this function will be ignored if there is no lookahead specified
 * to the constructor, or the pushBack() function has already been called the
 * maximum number of token's that can be looked ahead.
 */
function pushBack()
{
    if($this->lookahead>0 && count($this->tokenStack)>0 && $this->stackPtr>0) {
        $this->stackPtr--;
    }
}
// }}}

// {{{ lex()
function lex()
{
    if($this->lookahead>0) {
        // The stackPtr, should always be the same as the count of
        // elements in the tokenStack.  The stackPtr, can be thought
        // of as pointing to the next token to be added.  If however
        // a pushBack() call is made, the stackPtr, will be less than the
        // count, to indicate that we should take that token from the
        // stack, instead of calling nextToken for a new token.

        if ($this->stackPtr<count($this->tokenStack)) {

            $this->tokText = $this->tokenStack[$this->stackPtr]['tokText'];
            $this->skipText = $this->tokenStack[$this->stackPtr]['skipText'];
            $token = $this->tokenStack[$this->stackPtr]['token'];
            
            // We have read the token, so now iterate again.
            $this->stackPtr++;
            return $token;

        } else {

            // If $tokenStack is full (equal to lookahead), pop the oldest
            // element off, to make room for the new one.

            if ($this->stackPtr == $this->lookahead) {
                // For some reason array_shift and
                // array_pop screw up the indexing, so we do it manually.
                for($i=0; $i<(count($this->tokenStack)-1); $i++) {
                    $this->tokenStack[$i] = $this->tokenStack[$i+1];
                }
                
                // Indicate that we should put the element in
                // at the stackPtr position.
                $this->stackPtr--;
            }
            
            $token = $this->nextToken();
            $this->tokenStack[$this->stackPtr] =
                array('token'=>$token,
                      'tokText'=>$this->tokText,
                      'skipText'=>$this->skipText);
            $this->stackPtr++;
            return $token;
        }
    }
    else
    {
        return $this->nextToken();
    }
}
// }}}

// {{{ nextToken()
function nextToken()
{
    if ($this->string == '') return;
    $state = 0;
    $this->tokAbsStart = $this->tokStart;
    
    while (true){
        //echo "State: $state, Char: $c\n";
        switch($state) {
            // {{{ State 0 : Start of token
            case 0:
                $this->tokPtr = $this->tokStart;
                $this->tokText = '';
                $this->tokLen = 0;
                $c = $this->get();

                if (is_null($c)) { // End Of Input
                    $state = 1000;
                    break;
                }

                while (($c == ' ') || ($c == "\t")
                    || ($c == "\n") || ($c == "\r")) {
                    if ($c == "\n" || $c == "\r") {
                        // Handle MAC/Unix/Windows line endings.
                        if($c == "\r") {
                            $c = $this->skip();
                                
                            // If not DOS newline
                            if($c != "\n")
                                $this->unget();
                        }
                        ++$this->lineNo;
                        $this->lineBegin = $this->tokPtr;
                    }
                    
                       $c = $this->skip();
                    $this->tokLen = 1;
                }
                
                // Escape quotes and backslashes
                if ($c == '\\') {
                     $t = $this->get();
                    if ($t == '\'' || $t == '\\' || $t == '"') {
                        $this->tokText = $t;
                        $this->tokStart = $this->tokPtr;
                        return $this->tokText;
                    } else {
                        $this->unget();
                        
                        // Unknown token.  Revert to single char
                        $state = 999;
                        break;
                    }
                }
                
                if (($c == '\'') || ($c == '"')) { // text string
                    $quote = $c;
                    $state = 12;
                    break;
                }

                if ($c == '_') { // system variable
                    $state = 18;
                    break;
                }

                if (ctype_alpha(ord($c))) { // keyword or ident
                    $state = 1;
                    break;
                }

                if (ctype_digit(ord($c))) { // real or int number
                    $state = 5;
                    break;
                }

                if ($c == '.') {
                    $t = $this->get();
                    if ($t == '.') { // ellipsis
                        if ($this->get() == '.') {
                            $this->tokText = '...';
                            $this->tokStart = $this->tokPtr;
                            return $this->tokText;
                        } else {
                            $state = 999;
                            break;
                        }
                    } else if (ctype_digit(ord($t))) { // real number
                        $this->unget();
                        $state = 7;
                        break;
                    } else { // period
                        $this->unget();
                    }
                }

                if ($c == '#') { // Comments
                    $state = 14;
                    break;
                }
                if ($c == '-') {
                    $t = $this->get();
                    if ($t == '-') {
                        $state = 14;
                        break;
                    } else { // negative number
                        $this->unget();
                        $state = 5;
                        break;
                    }
                }

                if ($this->isCompop($c)) { // comparison operator
                    $state = 10;
                    break;
                }
                // Unknown token.  Revert to single char
                $state = 999;
                break;
            // }}}

            // {{{ State 1 : Incomplete keyword or ident
            case 1:
                $c = $this->get();
                if (ctype_alnum(ord($c)) || ($c == '_') || ($c == '.')) {
                    $state = 1;
                    break;
                }
                $state = 2;
                break;
            // }}}

            /* {{{ State 2 : Complete keyword or ident */
            case 2:
                $this->unget();
                $this->tokText = substr($this->string, $this->tokStart,
                                        $this->tokLen);
                
                $testToken = strtolower($this->tokText);
                if (isset($this->symbols[$testToken])) {
                
                    $this->skipText = substr($this->string, $this->tokAbsStart,
                                            $this->tokStart-$this->tokAbsStart);
                    $this->tokStart = $this->tokPtr;
                    return $testToken;
                } else {
                    $this->skipText = substr($this->string, $this->tokAbsStart,
                                            $this->tokStart-$this->tokAbsStart);
                    $this->tokStart = $this->tokPtr;
                    return 'ident';
                }
                break;
            // }}}

            // {{{ State 5: Incomplete real or int number
            case 5:
                $c = $this->get();
                if (ctype_digit(ord($c))) {
                    $state = 5;
                    break;
                } else if ($c == '.') {
                    $t = $this->get();
                    if($t == '.') { // ellipsis
                        $this->unget();
                    } else { // real number
                        $state = 7;
                        break;
                    }
                } else if(ctype_alpha(ord($c))) { // number must end with non-alpha character
                    $state = 999;
                    break;
                } else {
                // complete number
                $state = 6;
                break;
                }
            // }}}

            // {{{ State 6: Complete integer number
            case 6:
                $this->unget();
                $this->tokText = intval(substr($this->string, $this->tokStart,
                                               $this->tokLen));
                $this->skipText = substr($this->string, $this->tokAbsStart,
                                         $this->tokStart-$this->tokAbsStart);
                $this->tokStart = $this->tokPtr;
                return 'int_val';
                break;
            // }}}

            // {{{ State 7: Incomplete real number
            case 7:
                $c = $this->get();

                /* Analogy Start */
                if ($c == 'e' || $c == 'E') {
                        $state = 15;
                        break;
                }
                /* Analogy End   */

                if (ctype_digit(ord($c))) {
                    $state = 7;
                    break;
                }
                $state = 8;
                break;
            // }}}

            // {{{ State 8: Complete real number */
            case 8:
                $this->unget();
                $this->tokText = floatval(substr($this->string, $this->tokStart,
                                        $this->tokLen));
                $this->skipText = substr($this->string, $this->tokAbsStart,
                                         $this->tokStart-$this->tokAbsStart);
                $this->tokStart = $this->tokPtr;
                return 'real_val';
            // }}}

            // {{{ State 10: Incomplete comparison operator
            case 10:
                $c = $this->get();
                if ($this->isCompop($c))
                {
                    $state = 10;
                    break;
                }
                $state = 11;
                break;
            // }}}

            // {{{ State 11: Complete comparison operator
            case 11:
                $this->unget();
                $this->tokText = substr($this->string, $this->tokStart,
                                        $this->tokLen);
                if($this->tokText) {
                    $this->skipText = substr($this->string, $this->tokAbsStart,
                                            $this->tokStart-$this->tokAbsStart);
                    $this->tokStart = $this->tokPtr;
                    return $this->tokText;
                }
                $state = 999;
                break;
            // }}}

            // {{{ State 12: Incomplete text string
            case 12:
                $bail = false;
                while (!$bail) {
                    switch ($this->get()) {
                        case '':
                            $this->tokText = null;
                            $bail = true;
                            break;
                        case "\\":
                            if ( $this->get() === null ) {
                                $this->tokText = null;
                                $bail = true;
                            }
                                //$bail = true;
                            break;
                        case $quote:
                            $this->tokText = stripslashes(substr($this->string,
                                       ($this->tokStart+1), ($this->tokLen-2)));
                            $bail = true;
                            break;
                    }
                }
                if (!is_null($this->tokText)) {
                    $state = 13;
                    break;
                }
                $state = 999;
                break;
            // }}}

            // {{{ State 13: Complete text string
            case 13:
                $this->skipText = substr($this->string, $this->tokAbsStart,
                                         $this->tokStart-$this->tokAbsStart);
                $this->tokStart = $this->tokPtr;
                return 'text_val';
                break;
            // }}}

            // {{{ State 14: Comment
            case 14:
                $c = $this->skip();
                if ($c == "\n" || $c == "\r" || $c == "") {
                    // Handle MAC/Unix/Windows line endings.
                    if ($c == "\r") {
                        $c = $this->skip();
                        // If not DOS newline
                        if ($c != "\n") {
                            $this->unget();
                        }
                    }

                    if ($c != "") {
                        ++$this->lineNo;
                        $this->lineBegin = $this->tokPtr;
                    }

                    // We need to skip all the text.
                    $this->tokStart = $this->tokPtr;
                    $state = 0;
                } else {
                    $state = 14;
                }
                break;
            // }}}

            // {{{ State 15: Exponent Sign in Scientific Notation
            case 15:
                    $c = $this->get();
                    if($c == '-' || $c == '+') {
                            $state = 16;
                            break;
                    }
                    $state = 999;
                    break;
            // }}}

            // {{{ state 16: Exponent Value-first digit in Scientific Notation
            case 16:
                    $c = $this->get();
                    if (ctype_digit(ord($c))) {
                            $state = 17;
                            break;
                    }
                    $state = 999;  // if no digit, then token is unknown
                    break;
            // }}}

            // {{{ State 17: Exponent Value in Scientific Notation
            case 17:
                    $c = $this->get();
                    if (ctype_digit(ord($c))) {
                            $state = 17;
                            break;
                    }
                    $state = 8;  // At least 1 exponent digit was required
                    break;
            // }}}

            // {{{ State 18 : Incomplete System Variable
            case 18:
                $c = $this->get();
                if (ctype_alnum(ord($c)) || $c == '_') {
                    $state = 18;
                    break;
                }
                $state = 19;
                break;
            // }}}

            // {{{ State 19: Complete Sys Var
            case 19:
                $this->unget();
                $this->tokText = substr($this->string, $this->tokStart,
                                        $this->tokLen);
                $this->skipText = substr($this->string, $this->tokAbsStart,
                                         $this->tokStart-$this->tokAbsStart);
                $this->tokStart = $this->tokPtr;
                return 'sys_var';
            // }}}

            // {{{ State 999 : Unknown token.  Revert to single char
            case 999:
                $this->revert();
                $this->tokText = $this->get();
                $this->skipText = substr($this->string, $this->tokAbsStart,
                                         $this->tokStart-$this->tokAbsStart);
                $this->tokStart = $this->tokPtr;
                return $this->tokText;
            // }}}

            // {{{ State 1000 : End Of Input
            case 1000:
                $this->tokText = '*end of input*';
                $this->skipText = substr($this->string, $this->tokAbsStart,
                                         $this->tokStart-$this->tokAbsStart);
                $this->tokStart = $this->tokPtr;
                return null;
            // }}}
        }
    }
}
// }}}
}
?>
Return current item: PHP DataGrid