Chris Pollett >
Students >
Shailesh [Bio] [CS297 Part of Speech Tagging Code] |
Part of Speech TaggingA simplified form with the identification of words as nouns, verbs, adjectives, adverbs, etc. Reference : http://phpir.com/part-of-speech-tagging @author : Ian Barber <!DOCTYPE html> <html> <body> <?php # Class to perform function of tagging from lexicon.txt file class PosTagger { private $dict; public function __construct($lexicon) { $fh = fopen($lexicon, 'r'); while($line = fgets($fh)) { $tags = explode(' ', $line); $this->dict[strtolower(array_shift($tags))] = $tags; } fclose($fh); } public function tag($text) { preg_match_all("/[\w\d\.]+/", $text, $matches); $nouns = array('NN', 'NNS'); $return = array(); $i = 0; foreach($matches[0] as $token) { // default to a common noun $return[$i] = array('token' => $token, 'tag' => 'NN'); // To remove trailing full stops if(substr($token, -1) == '.') { $token = preg_replace('/\.+$/', '', $token); } // To get from dict if set if(isset($this->dict[strtolower($token)])) { $return[$i]['tag'] = $this->dict[strtolower($token)][0]; } // To converts verbs after 'the' to nouns if($i > 0) { if($return[$i - 1]['tag'] == 'DT' && in_array($return[$i]['tag'], array('VBD', 'VBP', 'VB'))) { $return[$i]['tag'] = 'NN'; } } // To convert noun to number if . appears if($return[$i]['tag'][0] == 'N' && strpos($token, '.') !== false) { $return[$i]['tag'] = 'CD'; } // To convert noun to past particile if ends with 'ed' if($return[$i]['tag'][0] == 'N' && substr($token, -2) == 'ed') { $return[$i]['tag'] = 'VBN'; } // Anything that ends 'ly' is an adverb if(substr($token, -2) == 'ly') { $return[$i]['tag'] = 'RB'; } // To get common noun to adjective if it ends with al if(in_array($return[$i]['tag'], $nouns) && substr($token, -2) == 'al') { $return[$i]['tag'] = 'JJ'; } // To get noun to verb if the word before is 'would' if($i > 0) { if($return[$i]['tag'] == 'NN' && strtolower($return[$i-1]['token']) == 'would') { $return[$i]['tag'] = 'VB'; } } // Convert noun to plural if it ends with an s if($return[$i]['tag'] == 'NN' && substr($token, -1) == 's') { $return[$i]['tag'] = 'NNS'; } // Convert common noun to gerund if(in_array($return[$i]['tag'], $nouns) && substr($token, -3) == 'ing') { $return[$i]['tag'] = 'VBG'; } // If we get noun noun, and the second can be a verb, convert to verb if($i > 0) { if(in_array($return[$i]['tag'], $nouns) && in_array($return[$i-1]['tag'], $nouns) && isset($this->dict[strtolower($token)])) { if(in_array('VBN', $this->dict[strtolower($token)])) { $return[$i]['tag'] = 'VBN'; } else if(in_array('VBZ', $this->dict[strtolower($token)])) { $return[$i]['tag'] = 'VBZ'; } } } $i++; } return $return; } } ?> <?php # Function to print the results function printTag($tags) { $output= null; foreach($tags as $t) { $output .= $t['token'] . "/" . $t['tag'] . " "; } echo "\n"; return $output; } $tagger = new PosTagger('lexicon.txt'); ?> <?php #Function used in another file to get Part of speech tagging to given string function getPartofSpeech($query) { $tagger = new PosTagger('lexicon.txt'); $tags = $tagger->tag($query); $output=printTag($tags); return $output; } ?> </body> </html> |