A base class and 2 derivated classes (pkg.dict.php) to generate clients for
dictionary servers that follow the DICT protocol (RFC 2229), such as the
server at dict.org
Included also 2 example of query interfaces generated using this classes, one
performs a simple search (simplesearch.php), and the other shows how to get
information from the dictionary server and use that to generate a form
allowing for more complex searches (extendedsearch.php).
[UPDATE: 2000/06/05] Fixed some bugs reported by Davor Cengija
<davor@croart.com> and found another bug related to URLs referred in the
definition that are also marked as crossrefence terms
[UPDATE: 2000/06/06] Check for duplicates when creating the links, and
modified the external URLs regex to recognize more protocols, as well as
eliminate crossrefences to RFC's
[UPDATE: 2000/06/07] Fixed the missing </form> tag in the examples, thanks
to Stoyan Jekov <jekov@cig.nml.mot.com> for finding the omission.
NOTE: Separate the 3 files before using the examples.
::::::::::::::
pkg.dict.php
::::::::::::::
<?php
/*
* This set of classes implement a subset of the DICT protocol
* and it is meant to be use to generate clients that
* query a DICT server
*
* By default, the class uses the dict.org server in
* the port 2628.
*
* (c) 2000, Jesus M. Castagnetto <jmcastagnetto@zkey.com>
*
* License: GPL, see http://www.gnu.org/copyleft/gpl.txt
*
* Changes:
*
* 2000/06/04 - added the correct link to the GPL license
* 2000/06/05 - added the in_array() method to the base class
* from a suggestion by Davor Cengija <davor@croart.com>
* who pointed out that PHP gained the in_array()
* function in 3.0.12, and 4.0.0
*/
//============================================================
/*
* class DictBase
*
* Base class for implementing the DICT protocol to communicate
* with dictionary servers. It defaults to dict.org and port 2628.
*/
class DictBase {
        var $host = "dict.org";
        var $port = "2628";
        var $max_length = 6144; // 1024 * 6 to cover UTC8 chars
        var $socket;
        var $valid_codes =
                array (
                        110, 111, 112, 113, 114, 130, 150, 151, 152,
                        210, 220, 221, 230, 250,
                        330,
                        420, 421,
                        500, 501, 502, 503, 530, 531, 532,
                        550, 551, 552, 554, 555
                );
        var $return_code = array();
        
        function DictBase() {
                // empty constructor
        }
        
        function set($var, $val) {
                $this->$var = $val;
        }
        function get($var) {
                return $this->$var;
        }
        function parse_code($str) {
                ereg("^([0-9]{3}) (.+)", $str, &$reg);
                $error = ( $reg[1] >= 300 );
                $this->return_code = array(        "error"=>$error,
                                                                        "code"=>$reg[1],
                                                                        "desc"=>$reg[2]);
        }
        function is_valid_code() {
                return $this->in_array($this->return_code["code"], $this->valid_codes);
        }
        function is_error_code() {
                return $this->return_code["error"];        
        }
        
        function print_code() {
                $out = $this->is_error_code() ? "<ERROR> " : "";
                $out .= "[".$this->return_code["code"]."] ".$this->return_code["desc"]."\n";
                echo $out;
        }
        function connect() {
                $fp = fsockopen($this->host, $this->port, &$errno, &$errstr, 90);
                if (!$fp) {
                        echo "Cannot connect: ".$errno." = ".$errstr."\n";
                        exit;
                } else {
                        $this->socket = $fp;
                }
        }
        function close() {
                fputs($this->socket, "QUIT\r\n");
                $tmp = fgets($this->socket, $this->max_length);
                fclose($this->socket);
        }
        function read_data() {
                while ($read = fgets($this->socket, $this->max_length)) {
                        if (ereg("^\.\r\n$",$read))
                                break;
                        $out .= $read;
                }
                return $out;
        }
        // To support old PHP3 versions ( older than 3.0.12 )
        // A point I forgot and Davor Cengija reminded me about
        function in_array($element, $arr) {
                // figure out version
                list($major, $minor, $release) = explode(".", phpversion());
                if (($major == 3 && $relesase >= 12) || $major == 4) {
                        return in_array($element, $arr);
                } else {
                        // assumes that we want to compare element value
                        while (list($key, $val) = each($arr)) {
                                if ($val == $element)
                                        return true;
                        }
                        return false;
                }
        }
} // end of base class Dict
//============================================================
/*
* Class DictServerInfo
*
* To generate objects containing DICT server information.
* Extends the DictBase class.
*/
class DictServerInfo extends DictBase {
        var $info = array();
        function DictServerInfo($host="", $port="", $extended=false) {
                $this->init($host, $port, $extended);
        }
        function init($host, $port, $extended) {
                if ($host)
                        $this->set("host", $host);
                if ($port)
                        $this->set("port", $port);
                $this->connect();
                // get connection response line
                $line = fgets($this->socket, $this->max_length);
                $this->parse_code($line);
                if ($this->is_error_code()) {
                        $this->print_code();
                        exit;
                }
                // extract capabilities info from response line
                ereg("^[0-9]{3} (.*) <([^<]*)> <(.*)>", $line, &$reg);
                $this->info["signature"] = $reg[1];
                $this->info["capabilities"] = explode(".", $reg[2]);
                $this->info["msg-id"] = $reg[3];
                
                // get description on the server and store verbatim
                $this->info["server"] = $this->show("SERVER");
                // get the dbs and strategies for this server
                $dbs = $this->show("DB");
                $this->store("databases",$dbs);
                $strats = $this->show("STRAT");
                $this->store("strategies",$strats);
                // get the description of each database
                // if extended info is requested
                if ($extended)
                        $this->get_dbs_info();
                // close the connection
                $this->close();
                }
        function show($str) {
                fputs($this->socket, "SHOW ".$str."\r\n");
                $tmp = chop(fgets($this->socket, $this->max_length));
                $tmp2 = explode (" ", $tmp);
                if ($str == "DB")
                        $this->info["num_dbs"] = (int) $tmp2[1];
                if ($str == "STRAT")
                        $this->info["num_strat"] = (int) $tmp2[1];
                $data = $this->read_data();
                $tmp = fgets($this->socket, $this->max_length);
                return $data;
        }
        function store($str, $data) {
                $arr = explode("\r\n", $data);
                $out = array();
                for ($i=0; $i<count($arr); $i++) {
                        if (chop($arr[$i]) == "")
                                continue;
                        ereg("^([^ ]+) \"?([^\"]+)\"?", $arr[$i], &$reg);
                        $out[$reg[1]] = $reg[2];
                }
                $this->info[$str] = $out;
        }
        function get_dbs_info() {
                $ndb = $this->info["num_dbs"];
                $dbs = $this->info["databases"];
                $dbinfo = array();
                while (list($k, $v) = each($dbs)) {
                        $dbinfo[$k] = $this->show("INFO ".$k);
                }
                $this->info["dbs_desc"] = $dbinfo;
                
        }
        function get_info ($str) {
                return $this->info[$str];
        }
} // end of class DictServerInfo
//============================================================
/*
* class DictQuery
*
* To create query objects to search a DICT server
*/
class DictQuery extends DictBase {
        var $term = "";
        var $method = "exact";
        var $searchdb = "*";
        var $query_type = "DEFINE";
        var $valid_methods = array ("exact", "prefix", "substring", "suffix",
                                                                "re", "regexp", "soundex", "lev");
        var $result = array();
        var $numres = 0;
        function DictQuery($host="", $port="") {
                $this->init($host, $port);
        }
        
        function init($host, $port) {
                if ($host)
                        $this->set("host", $host);
                if ($port)
                        $this->set("port", $port);
                
                $this->connect();
                // get connection response line
                $line = fgets($this->socket, $this->max_length);
                $this->parse_code($line);
                if ($this->is_error_code()) {
                        $this->print_code();
                        exit;
                }
        }
        function search($term, $method, $db) {
                if (!$this->is_method($method)) {
                        echo "**ERROR** invalid method: ".$method."\n";
                        exit;
                }
                $this->clear_results();
                $this->term = $term;
                $this->method = $method;
                $this->searchdb = $db;
                $query = ($method=="exact") ? "DEFINE $db " : "MATCH $db $method ";
                $query .= "\"".$term."\"\r\n";
                
                fputs($this->socket, $query);
                $line = fgets($this->socket, $this->max_length);
                ereg("^[0-9]{3} ([0-9]+) .+", $line, &$reg);
                $this->numres = (int) $reg[1];
                if ($method != "exact") {
                        $rlist = $this->read_data();
                        $this->result = explode("\r\n", chop($rlist));
                } else {
                        $regex = "^[0-9]{3} \"([^\"]+)\" ([^\" ]+) \"([^\"]+)\"";
                        $allres = array();
                        $entry = array();
                        for ($i=0; $i<$this->numres; $i++) {
                                $line = chop(fgets($this->socket, $this->max_length));
                                if ($line == "")
                                        continue;
                                ereg($regex, $line, &$reg);
                                $entry["term"] = $reg[1];
                                $entry["dbcode"] = $reg[2];
                                $entry["dbname"] = $reg[3];
                                $entry["definition"] = $this->read_data();
                                $this->result[$i] = $entry;
                        }
                }
        }        
        function define($term, $db="*") {
                $this->search($term, "exact", $db);
                $this->close();
        }
        function match($term, $method="prefix", $db="*") {
                $this->search($term, $method, $db);
                $this->close();
        }
        function is_method($method) {
                return $this->in_array($method, $this->valid_methods);
        }
        function clear_results() {
                $this->numres = 0;
                $this->result = array();
        }
} // end of class DictQuery
?>
::::::::::::::
simplesearch.php
::::::::::::::
<!--
Simple example that uses pkg.dict.org
(c) 2000, Jesus M. Castagnetto <jmcastagnetto@zkey.com>
License: GPL, see www.gnu.org/copyleft/gpl.txt
Changes:
2000/06/04 - Added the correct link to the GPL license
2000/06/05 - Fixes to bugs reported by Davor Cengija <davor@croart.com>
(1) Links that look like {{UNIX}} were not being stripped
correctly
(2) Crossreference terms sometimes spanned a line break
which looked real ugly/bad
Thanks to Davor for reporting the bugs in this example
2000/06/05 - Found another bug, some crossreferences are to URLs and
not terms in the dictionaries, the makeLink() function has
been fixed to account for that.
2000/06/06 - Check for duplicates when creating the links, and modified
the external URLs regex to recognize more protocols, as well
as eliminate crossrefences to RFC's
2000/06/07 - Stoyan Jekov <jekov@cig.nml.mot.com> has sharp eye, and
noticed that I forgot to close my HTML FORM element
-->
<?php
        $start=time();
        include ("./pkg.dict.php");
?>
<html>
<head>
<title>Simple Form to Query dict.org</title>
</head>
<body>
Search the dict.org server
<p>
<form action="<?php echo $PHP_SELF ?>" method="post">
<input type="text" name="query_term" size="60"
<?php
if ($query_term)
        echo "value=\"".$query_term."\"";
?>
><br>
<input type="hidden" name="database" value="*">
<input type="hidden" name="strategy" value="exact">
<input type="submit" name="submit" value=" Search ">
<input type="reset" name="reset" value=" Clear form input ">
</form>
<hr>
<?php
// check if element is in the array
function inArray($element, $arr) {
        // figure out version
        list($major, $minor, $release) = explode(".", phpversion());
        if (($major == 3 && $relesase >= 12) || $major == 4) {
                return in_array($element, $arr);
        } else {
                // assumes that we want to compare element value
                while (list($key, $val) = each($arr)) {
                        if ($val == $element)
                                return true;
                }
                return false;
        }
}
// remove duplicates from array
// and eliminate the patterns in $nolinks
function cleanArray($arr) {
        $nolinks = "rfc:";
        $out = array();
        for ($i=0; $i<count($arr); $i++)
                if (!inArray($arr[$i], $out) && !ereg($nolinks, $arr[$i]))
                        $out[] = $arr[$i];
        return $out;
}
//make the links to other words in the description
function mkLinks($str, $db) {
        global $PHP_SELF;
        
        // modified the regexes to fix the bug reported by <davor@croart.com>
        $patt = "\{+([^{}]+)\}+";
        $regex = "<b>\\1</b>";
        $out = ereg_replace($patt, $regex, $str);
        $patt = "/\{+([^{}]+)\}+/";
        preg_match_all($patt, $str, &$reg);
        $link = $reg[1];
        // clean up array
        $link = cleanArray($link);
        if (count($link) > 0)
                $out .= "<i>See also:</i>\n";
        
        for ($i=0; $i<count($link); $i++) {
                // added the line below to fix a second bug reported by <davor@croart.com>
                $link[$i] = ereg_replace("[[:space:]]+", " ", $link[$i]);
                // observed myself another bug with references to URLs - JMC
                // check if it is a HTTP URL or a crossrefence
                $protocols = "(http|https|ftp|telnet|gopher)://|(mailto|news):";
                if (ereg($protocols, $link[$i])) {
                        // parse the link and mark it using <>
                        $prot1 = "^((mailto|news):.+)$";
                        $prot2 = "(http|https|ftp|telnet|gopher)://";
                        $prot2 = "^(.*) *\((".$prot2.".+)\)$";
                        if (ereg($prot1, $link[$i], &$regurl)) {
                                list ($tmp, $url) = $regurl;
                                $desc = $url;
                        } elseif (ereg($prot2, $link[$i], &$regurl)) {
                                list ($tmp, $desc, $url) = $regurl;
                                if ($desc == "")
                                        $desc = $url;
                        }
                        $out .= "<<a href=\"".chop($url)."\" target=\"_blank\">";
                        $out .= chop($desc)."</a>> ";
                } else {
                        $out .= "[<a href=\"".$PHP_SELF."?query_term=";
                        $out .= urlencode($link[$i])."&database=".urlencode($db);
                        $out .= "&strategy=exact\">".$link[$i]."</a>] ";
                        if (($i % 5) == 0 && $i > 0)
                                $out .= "\n";
                }
        }
        $out .= "\n";
        return $out;
}
function parr($arr) {
        echo "<ul>";
        while (list($k,$v) = each($arr)) {
                if (gettype($v) == "array") {
                        echo "<ul>";
                        echo "* $k , new array*<br>";
                        parr($v);
                        echo "</ul>";
                } else {
                        echo "$k = $v<br>";
                }
        }
        echo "</ul>";
}
// perform a query to the server
function doQuery($str, $db, $strategy) {
        global $PHP_SELF;
        $query = new DictQuery();
        $query->define($str, $db);
        $n = $query->get("numres");
        $res = $query->get("result");
        $out = "<b>Found ".count($res);
        $out .= (count($res)==1) ? " hit" : " hits";
        $out .= "</b> - <i>Term: ".$str.", Database: ".$db.", Strategy: ".$strategy;
        $out .= "</i><br>\n<dl>\n";
        for ($i=0; $i<count($res); $i++) {
                $entry = $res[$i];
                $out .= "<dt>[".($i + 1)."] : ".$entry["dbname"]."</dt>\n";
                $out .= "<dd><pre>".mkLinks($entry["definition"], $db)."</pre></dd>\n";
        }
        $out .= "</dl>";
        return $out;
}
if ($query_term){
        $out = doQuery($query_term, $database, $strategy);
        echo $out."\n<hr>\n";
}
?>
Last accessed:
<?php
$end = time();
echo date("Y/m/d H:i:s",time());
echo " [Total processing time: ".($end - $start)." seconds]";
?>
</body>
</html>
::::::::::::::
extendedsearch.php
::::::::::::::
<!--
Extended example that uses pkg.dict.org
(c) 2000, Jesus M. Castagnetto <jmcastagnetto@zkey.com>
License: GPL, see www.gnu.org/copyleft/gpl.txt
Changes:
2000/06/04 - Added the correct link to the GPL license
2000/06/05 - Fixes to bugs reported by Davor Cengija <davor@croart.com>
(1) Links that look like {{UNIX}} were not being stripped
correctly
(2) Crossreference terms sometimes spanned a line break
which looked real ugly/bad
Thanks to Davor for reporting the bugs in this example
2000/06/05 - Found another bug, some crossreferences are to URLs and
not terms in the dictionaries, the makeLink() function has
been fixed to account for that.
2000/06/06 - Check for duplicates when creating the links, and modified
the external URLs regex to recognize more protocols, as well
as eliminate crossrefences to RFC's
2000/06/07 - Stoyan Jekov <jekov@cig.nml.mot.com> has sharp eye, and
noticed that I forgot to close my HTML FORM element
-->
<?php
        $start=time();
        include ("./pkg.dict.php");
?>
<html>
<head>
<title>Extended autogenerated form to query dict.org</title>
</head>
<body>
Search the dict.org server
<p>
<form action="<?php echo $PHP_SELF ?>" method="post">
<input type="text" name="query_term" size="60"
<?php
if ($query_term)
        echo "value=\"".$query_term."\"";
?>
><BR>
<?php
/*
* I will autogenerate the form in the code below
* You might want to make a static form to avoid
* the overhead of the on-the-fly HTML code generation
* and the query to the DICT server
*/
// functions to make the form
function mkSelect($name, $nopt, $options, $default) {
        $out = "<select name=\"".$name."\">\n";
        list($defkey,$defval) = each($default);
        $out .= "<option value=\"".$defkey."\" selected>".$defval."</option>\n";
        while (list($key,$val) = each($options)) {
                if ($key != $defkey)
                        $out .= "<option value=\"".$key."\">".$val."</option>\n";
        }
        $out .= "</select>\n";
        return $out;
}
function printForm($s) {
        $ndbs = $s->get_info("num_dbs");
        $dbs = $s->get_info("databases");
        $nstrats = $s->get_info("num_strats");
        $strategies = $s->get_info("strategies");
        $out = "<BR>Use database:<BR>\n";
        $out .= mkSelect("database", $ndbs, $dbs, array("*"=>"Any"));
        $out .= "<BR>Type of Search:<BR>\n";
        $out .= mkSelect("strategy", $ndbs, $strategies, array("exact"=>"Exact Search"));
        echo $out;
}
// get non-extended info from the server
$server = new DictServerInfo();
// print the form
printForm($server);
?>
<br>
<input type="submit" name="submit" value=" Search ">
<input type="reset" name="reset" value=" Clear form input ">
</form>
<hr>
<?php
/*
* If there was a query, this part will show the result
*/
// check if element is in the array
function inArray($element, $arr) {
        // figure out version
        list($major, $minor, $release) = explode(".", phpversion());
        if (($major == 3 && $relesase >= 12) || $major == 4) {
                return in_array($element, $arr);
        } else {
                // assumes that we want to compare element value
                while (list($key, $val) = each($arr)) {
                        if ($val == $element)
                                return true;
                }
                return false;
        }
}
// remove duplicates from array
// and eliminate the patterns in $nolinks
function cleanArray($arr) {
        $nolinks = "rfc:";
        $out = array();
        for ($i=0; $i<count($arr); $i++)
                if (!inArray($arr[$i], $out) && !ereg($nolinks, $arr[$i]))
                        $out[] = $arr[$i];
        return $out;
}
//make the links to other words in the description
function mkLinks($str, $db) {
        global $PHP_SELF;
        
        // modified the regexes to fix the bug reported by <davor@croart.com>
        $patt = "\{+([^{}]+)\}+";
        $regex = "<b>\\1</b>";
        $out = ereg_replace($patt, $regex, $str);
        $patt = "/\{+([^{}]+)\}+/";
        preg_match_all($patt, $str, &$reg);
        $link = $reg[1];
        // clean up array
        $link = cleanArray($link);
        if (count($link) > 0)
                $out .= "<i>See also:</i>\n";
        
        for ($i=0; $i<count($link); $i++) {
                // added the line below to fix a second bug reported by <davor@croart.com>
                $link[$i] = ereg_replace("[[:space:]]+", " ", $link[$i]);
                // observed myself another bug with references to URLs - JMC
                // check if it is a HTTP URL or a crossrefence
                $protocols = "(http|https|ftp|telnet|gopher)://|(mailto|news):";
                if (ereg($protocols, $link[$i])) {
                        // parse the link and mark it using <>
                        $prot1 = "^((mailto|news):.+)$";
                        $prot2 = "(http|https|ftp|telnet|gopher)://";
                        $prot2 = "^(.*) *\((".$prot2.".+)\)$";
                        if (ereg($prot1, $link[$i], &$regurl)) {
                                list ($tmp, $url) = $regurl;
                                $desc = $url;
                        } elseif (ereg($prot2, $link[$i], &$regurl)) {
                                list ($tmp, $desc, $url) = $regurl;
                                if ($desc == "")
                                        $desc = $url;
                        }
                        $out .= "<<a href=\"".chop($url)."\" target=\"_blank\">";
                        $out .= chop($desc)."</a>> ";
                } else {
                        $out .= "[<a href=\"".$PHP_SELF."?query_term=";
                        $out .= urlencode($link[$i])."&database=".urlencode($db);
                        $out .= "&strategy=exact\">".$link[$i]."</a>] ";
                        if (($i % 5) == 0 && $i > 0)
                                $out .= "\n";
                }
        }
        $out .= "\n";
        return $out;
}
// Perform a query to the server
function doQuery($str, $db, $strategy) {
        global $PHP_SELF;
        $query = new DictQuery();
        if ($strategy == "exact") {
                $query->define($str, $db);
        } else {
                $query->match($str, $strategy, $db);
        }
        $n = $query->get("numres");
        $res = $query->get("result");
        $out = "<b>Found ".count($res);
        $out .= (count($res)==1) ? " hit" : " hits";
        $out .= "</b> - <i>Term: ".$str.", Database: ".$db.", Strategy: ".$strategy;
        $out .= "</i><br>\n<dl>\n";
        for ($i=0; $i<count($res); $i++) {
                $entry = $res[$i];
                if ($strategy == "exact") {                
                        $out .= "<dt>[".($i + 1)."] : ".$entry["dbname"]."</dt>\n";
                        $out .= "<dd><pre>".mkLinks($entry["definition"], $db)."</pre></dd>\n";
                } else {
                        $match = explode(" ",chop($entry));
                        $match_term = str_replace("\"", "", $match[1]);
                        $out .= "<dt>[". ($i + 1 ) . "] : ";
                        $out .= "<A HREF=\"".$PHP_SELF."?query_term=".urlencode($match_term);
                        $out .= "&database=".urlencode($db);
                        $out .= "&strategy=exact\">";
                        $out .= $match_term."</a></dt>\n";
                        $out .= "<dd> Database: ".$match[0]."</dd>";
                }
        }
        $out .= "</dl>";
        return $out;
}
// if there was a query ...
if ($query_term){
        $out = doQuery($query_term, $database, $strategy);
        echo $out."\n<hr>\n";
}
?>
Last accessed:
<?php
$end = time();
echo date("Y/m/d H:i:s",time());
echo " [Total processing time: ".($end - $start)." seconds]";
?>
</body>
</html>