<?php
define('BROWSE_START', "<table border=0>\n<tr><td valign=top><ul>");
define('BROWSE_START_2', '<table align=center width="100%" cellpadding=0 cellspacing=0>');
define('BROWSE_END', '<table width="95%" cellpadding=0');
define('BROWSE_END_2', '" search');
define('SEARCH_START', '<CENTER>Search: ');
define('SEARCH_END', '<TABLE cellpadding=0 ');
// Hack to remove other languages. This only works for the English categories.
define('OTHER_LANG_STR', '<p><table><tr><td valign=top colspan=2>This category in other languages:');
function preprocessMainPage($data) {
if (($startPos = strpos($data, '<table cellspacing="4" cellpadding="4"><tr><td valign=top>')) !== false)
{
$data = substr($data, $startPos);
if (($endPos = strpos($data, '</td></tr></table>')) !== false)
{
$data = substr($data, 0, $endPos + strlen('</td></tr></table>'));
return $data;
}
}
$GLOBALS['action']->setAction('cannotBrowse');
return false;
}
function processMainPage($data, &$node) {
preg_match_all('#<b><a href\="(.*?)">(.*?)</a></b><br>'.
'.*?<small>.*?<a href\="(.*?)">(.*?)</a>,.*?'.
'<a href\="(.*?)">(.*?)</a>,.*?<a href\="(.*?)">(.*?)</a>#s', $data, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$block = new AstCategoryBlock(urlCat(dehtml($match[1])), dehtml(strip_tags($match[2]))); // Strip the colored "Kids and Teens" category.
$block->addNode(new AstCategoryBlockExample(urlCat(dehtml($match[3])), dehtml($match[4])));
$block->addNode(new AstCategoryBlockExample(urlCat(dehtml($match[5])), dehtml($match[6])));
$block->addNode(new AstCategoryBlockExample(urlCat(dehtml($match[7])), dehtml($match[8])));
$node->addNode($block);
}
return true;
}
function preprocessBrowse($data) {
if ($data != '' && substr($data, 0, 33) != '<html><head><title>Page Has Moved')
{
// don't skip A-Z links if there are any
$isAToZ = strpos($data, '[ <a href="');
$isAToZ2 = strpos($data, '[ <b>');
$fetched = false;
if ($isAToZ !== false)
{
$data = '<hr><center>' . substr($data, $isAToZ);
$fetched = true;
}
elseif ($isAToZ2 !== false)
{
$data = '<hr><center>' . substr($data, $isAToZ2);
$fetched = true;
}
elseif (($pos = strpos($data, BROWSE_START)) !== false)
{
$data = substr($data, $pos);
$fetched = true;
}
elseif (($pos = strpos($data, BROWSE_START_2)) !== false)
{
$data = substr($data, $pos);
$fetched = true;
}
if ($fetched)
{
// delete dmoz header and footer
if (($pos = strpos($data, BROWSE_END)) !== false)
$data = substr($data, 0, $pos);
elseif (($pos = strpos($data, BROWSE_END_2)) !== false)
$data = substr($data, 0, $pos);
// hide "This category in other languages:" sections
if (($t = strpos($data, OTHER_LANG_STR)) !== false)
{
$data = substr($data, 0, $t) . substr($data, strpos($data, '</table></td></tr></table>') + 27);
}
// remove usenet links
if (($t = strpos($data, '<table cellpadding=0 cellspacing=0><tr><td width="100%" valign=top>')) !== false)
{
$data = substr($data, 0, $t);
}
return $data;
}
else
{
$GLOBALS['action']->setAction('cannotBrowse');
return false;
}
}
else
{
$GLOBALS['action']->setAction('cannotBrowse');
return false;
}
}
function processBrowse($data, &$node) {
$AToZStart = false;
if (($pos = strpos($data, '[ <a href="')) !== false)
{
$AToZStart = $pos;
}
elseif (($pos = strpos($data, '[ <b>')) !== false)
{
$AToZStart = $pos;
}
if ($AToZStart !== false)
{
$AToZEnd = strpos($data, '] </center>');
if ($AToZEnd !== false && $AToZStart < $AToZEnd)
{
$contents = substr($data, $AToZStart + 2, $AToZEnd - $AToZStart - 2);
$items = preg_split('#> | <#', $contents);
$AToZ = new AstAToZ();
foreach ($items as $item) {
if (preg_match('#a href\="(.*?)"><b>(.*?)</b>#', $item, $m)) {
$AToZ->addNode(new AstAToZItem(dehtml($m[2]), urlCat(dehtml($m[1]))));
} elseif (preg_match('#b>(.*?)</b#', $item, $m)) {
$AToZ->addNode(new AstCurCat(dehtml($m[1])));
}
}
$node->addNode($AToZ);
}
else
{
$GLOBALS['action']->setAction('formatNotSupported');
return false;
}
}
$contCatPosRelated = strpos($data, '<p>See also:<ul>');
if ($contCatPosRelated !== false) {
$catSections = explode("<table border=0>\n<tr><td valign=top><ul>",
substr($data, 0, -(strlen($data) - $contCatPosRelated)));
} else {
$catSections = explode("<table border=0>\n<tr><td valign=top><ul>",
$data);
}
array_shift($catSections);
$contCat = new AstBrowseCategoryContainer();
foreach ($catSections as $section) {
$rawSecCat = array();
// some symlinks have different names
preg_match_all('#<li><a href\="(.+?)"><b>(.+?)</b></a>(@)?\s+\ <i>\((.+?)\)</i>#s', $section,
$rawSecCat, PREG_SET_ORDER);
$secCat = new AstBrowseCategorySection();
foreach ($rawSecCat as $v) {
$cat = urlCat($v[1]);
if (strpos($v[2], ':') !== false) { // Hack to detect "see also" categories in foreign languages. It's not catch-all.
$secCat->addNode(
new AstBrowseCategoryRelated($cat, dehtml($v[2]), dehtml($v[4]))
);
} elseif ($v[3] == '@') {
$secCat->addNode(
new AstBrowseCategorySymbolic($cat, dehtml($v[2]), dehtml($v[4]))
);
// Alternatively, "if ($cat[0] === $this->curCats[0]) {"
} else {
$secCat->addNode(
new AstBrowseCategory($cat, dehtml($v[2]), dehtml($v[4]))
);
}
}
$contCat->addNode($secCat);
}
$node->addNode($contCat);
if ($contCatPosRelated !== false) {
$end = strpos($data, '</ul>', $contCatPosRelated);
$catsRelatedData = substr($data, $contCatPosRelated, $end - $contCatPosRelated);
$catsRelated = array();
preg_match_all('#<li><a href\="(.+?)"><b>(.+?)</b></a> \ <i>\((.+?)\)</i>#s',
$catsRelatedData,
$catsRelated, PREG_SET_ORDER);
foreach ($catsRelated as $v) {
$cat = urlCat($v[1]);
$node->addNode(new AstBrowseCategoryRelated($cat, dehtml($v[2]), dehtml($v[3])));
}
}
// trim the age range from the kids and teens sections
$data = preg_replace('#<small>\[.*?\]</small> #', '', $data);
$lines = explode("\n", $data);
$count = count($lines) - 2;
$i = 0;
while ($i < $count && strpos($lines[$i], '://') === false)
{
$i++;
}
$contBrowse = new AstBrowseListingContainer();
for (; $i < $count; $i++)
{
if (strpos($lines[$i], '://') === false)
{
continue;
}
$sections = explode('">', $lines[$i], 2);
if (!isset($sections[1])) continue;
$url = substr($sections[0], 13);
$title = substr($sections[1], 0, $linkEnd = strpos($sections[1], '</a>'));
$descStart = strpos($sections[1], ' - ', $linkEnd) + 3;
$desc = substr($sections[1], $descStart);
$star = false;
if (substr($lines[$i+1], 0, 3) == ' - ') {
$desc = substr($lines[$i+1], 3);
$star = true;
$i++;
}
// Featured sites have bold tags
$contBrowse->addNode(new AstBrowseListing(dehtml($url), dehtml(strip_tags($title)), dehtml(strip_tags($desc)), $star, true));
}
$node->addNode($contBrowse);
return true;
}
function preprocessSearch($data) {
if ($data != '' || (strpos($data, '<CENTER><I>The Open Directory search is temporarily unavailable. Please try back later.</I></CENTER>') == false))
{
if (strpos($data, 'Try your search on:') == false)
{
// delete dmoz header and footer
if (($start = strpos($data, SEARCH_START)) !== false) {
if (($end = strpos($data, SEARCH_END) - $start) !== false) {
return substr($data, $start, $end);
}
return substr($data, $start);
}
else
{
$GLOBALS['action']->setAction('cannotSearch');
return false;
}
}
else
{
$GLOBALS['action']->setAction('searchNoResults');
return false;
}
}
else
{
$GLOBALS['action']->setAction('searchHeavyLoad');
return false;
}
}
function processSearch($data, &$node) {
$lines = explode("\n", $data);
$count = count($lines) - 3;
$i = 0;
/**
* Since category search results have the same categories as the categories in the top listings,
* category results are omitted.
*/
$listingNum = 0;
for (; $i < $count; $i++)
{
// note that this is not the same as the browse listings code
if (strpos($lines[$i], '<li><a href="') === 0 && strpos($lines[$i], '://') !== false && substr($lines[$i], 0, 7) !== '<li><b>' && substr($lines[$i], 0, 4) !== '</b>')
{
++$listingNum;
$star = false;
$sections = explode('">', $lines[$i], 2);
if (!isset($sections[1])) continue;
$url = substr($sections[0], 13);
$linkEnd = strpos($sections[1], '</a>');
$title = substr($sections[1], 0, $linkEnd);
if (substr($sections[1], $linkEnd + 4, 4) === '<img') {
$star = true;
$sections[1] = substr($sections[1], strpos($sections[1], ' ') - $linkEnd);
$i++;
}
$ss = strpos($sections[1], '<br><small><i>--');
$descStart = strpos($sections[1], ' - ', $linkEnd) + 3;
if ($ss !== false) {
$desc = substr($sections[1], $descStart, $ss - $descStart);
$cat = substr($sections[1],
$first = strpos($sections[1],
'">', $ss) + 2,
strpos($sections[1],
'</a>', $first) - $first);
} else {
$desc = substr($sections[1], $descStart, strpos($sections[1], '<br>') - $descStart);
$cat = substr($sections[1],
$first = strpos($sections[1],
'">') + 2,
strpos($sections[1],
'</a>', $first) - $first);
}
$node->addNode(new AstSearchListing(dehtml(strip_tags($url)), dehtml(strip_tags($title)), dehtml(strip_tags($desc)), $star, explode(': ', dehtml(strip_tags(($cat)))), true));
}
}
return true;
}
?>