Location: PHPKode > scripts > Search Words > searchwords.class.php
<?php
/**
 * Class SearchWords
 * @author Rubens Takiguti Ribeiro
 * @version 1.0 2010-11-28
 * @license LGPL 3 or higher (http://www.gnu.org/licenses/lgpl-3.0.txt)
 * @copyright Copyright (C) 2010 Rubens Takiguti Ribeiro
 */
class SearchWords {

    /**
     * Gets key-words used in search sites by its url.
     * @param string $referer_url URL to be checked
     * @return string Words
     */
    public static function getWords($referer_url) {
        $url_data = parse_url($referer_url);
        if ($url_data === false) {
            throw new InvalidArgumentException('Invalid url: '.$referer_url, 1);
        }
        if (!isset($url_data['host'])) {
            return '';
        }
        $host_data = self::parseHost($url_data['host']);
        switch ($host_data['main_domain']) {

        // Based on query string
        case 'google':
        case 'bing':
        case 'altavista':
        case 'aol':
        case 'galaxy':
        case 'dibdabdoo':
        case 'gigablast':
        case 'alexa':
        case 'blogscope':
        case 'icerocket':
        case 'sphere':
        case 'technorati':
        case 'freebooksearch':
        case 'exalead':
        case 'nstein':
        case 'oracle':
        case 'cheatsearch':
        case 'mahalo':
        case 'rollyo':
        case 'trexy':
        case 'accoona':
        case 'alleba':
        case 'ansearch':
        case 'daum':
        case 'guruji':
        case 'najdi':
        case 'sapo':
        case 'search':
        case 'walla':
            return self::getQueryParam('q', $url_data['query']);
        case 'yahoo':
            return self::getQueryParam('p', $url_data['query']);
        case 'email-search';
            return self::getQueryParam('s', $url_data['query']);
        case 'saic':
        case 'onet':
            return self::getQueryParam('qt', $url_data['query']);
        case 'baidu':
            return self::getQueryParam('wd', $url_data['query']);
        case 'goo':
        case 'rediff':
            return self::getQueryParam('MT', $url_data['query']);
        case 'hotbot':
        case 'lycos':
        case 'autonomy':
        case 'funnelback':
        case 'vivisimo':
        case 'naver':
        case 'rambler':
            return self::getQueryParam('query', $url_data['query']);
        case 'kidsclick':
            return self::getQueryParam('keywords', $url_data['query']);
        case 'askmenow':
            return self::getQueryParam('Keywords', $url_data['query']);
        case 'souq':
            return self::getQueryParam('s_keyword', $url_data['query']);
        case 'ifac':
            return self::getQueryParam('search', $url_data['query']);
        case 'alibaba':
            return self::getQueryParam('SearchText', $url_data['query']);
        case 'mymcpl':
            return self::getQueryParam('searchq', $url_data['query']);
        case 'youtube':
            return self::getQueryParam('search_query', $url_data['query']);
        case 'blogperfect':
            return self::getQueryParam('tsearch', $url_data['query']);
        case 'dieselpoint':
            return self::getQueryParam('simplequerystring', $url_data['query']);
        case 'yandex':
            return self::getQueryParam('text', $url_data['query']);
        case 'awesomelibrary':
            return self::getQueryParam('terms', $url_data['query']);
        case 'endeca':
            return self::getQueryParam('Nrt', $url_data['query']);

        // Based on path
        case 'omgili':
        case 'eurekster':
        case 'wink':
        case 'miner':
            return urldecode($url_data['path']);
        case 'wikipedia':
            return substr($url_data['path'], strrpos($url_data['path'], '/') + 1);
        case 'excite':
            $path = explode('/', $url_data['path']);
            $pos = array_search('Web', $path);
            if ($pos !== false) {
                return urldecode($path[$pos + 1]);
            }
            return '';
        }
    }


    /**
     * Return a parameter value of a query string.
     * @param $param Parameter to be get
     * @return string Parameter value
     */
    private static function getQueryParam($param, $query) {
        parse_str($query, $query_data);
        if (isset($query_data[$param])) {
            return $query_data[$param];
        }
        return '';
    }


    /**
     * Gets host informations.
     * @param string $host Host to be checked
     * @return array[string => string] Associative array with domain data.
     *     Potential keys are:
     *     - country
     *     - propose
     *     - main_domain
     *     - sub_domain
     */
    public static function parseHost($host) {
        $data = array();

        $country_domain = self::getCountryDomain();
        $propose_domain = self::getProposeDomain();

        $host_domains = explode('.', $host);

        $domain = array_pop($host_domains);
        if (in_array($domain, $country_domain)) {
            $data['country'] = $domain;
            $domain = array_pop($host_domains);
            if (in_array($domain, $propose_domain)) {
                $data['propose'] = $domain;
                $domain = array_pop($host_domains);
                $data['main_domain'] = $domain;
            } else {
                $data['main_domain'] = $domain;
            }
        } elseif (in_array($domain, $propose_domain)) {
            $data['propose'] = $domain;
            $domain = array_pop($host_domains);
            $data['main_domain'] = $domain;
        }
        if (!empty($host_domains)) {
            $data['sub_domain'] = implode('.', $host_domains);
        }
        return $data;
    }


    /**
     * Return an array of generic proposed domains
     * @return array[string]
     */
    public static function getProposeDomain() {
        return array(
            'aero', 'asia', 'biz', 'cat', 'co', 'com', 'coop', 'edu', 'gov', 'info', 'int',
            'jobs', 'mil', 'mobi', 'museum', 'name', 'net', 'org', 'pro', 'tel', 'travel'
        );
    }


    /**
     * Return an array of country domains
     * @return array[string]
     */
    public static function getCountryDomain() {
        return array(
            'ac', 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
            'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi',
            'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz', 'ca', 'cc',
            'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co', 'cr', 'cu', 'cv',
            'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec', 'ee', 'eg', 'er',
            'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge',
            'gf', 'gg', 'gh', 'gi', 'gl', 'gm', 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu',
            'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in',
            'io', 'iq', 'ir', 'is', 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki',
            'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr',
            'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml',
            'mm', 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
            'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz',
            'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr', 'ps', 'pt',
            'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb', 'sc', 'sd', 'se',
            'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv',
            'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk', 'tl', 'tm', 'tn', 'to',
            'tp', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va',
            'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'ye', 'yt', 'za', 'zm', 'zw'
        );
    }
}
Return current item: Search Words