Part of Speech Tagging
A simplified form with the identification of words as nouns, verbs, adjectives, adverbs, etc.
Reference : http://phpir.com/part-of-speech-tagging
@author : Ian Barber
<!DOCTYPE html>
<html>
<body>
<?php
# Class to perform function of tagging from lexicon.txt file
class PosTagger {
private $dict;
public function __construct($lexicon) {
$fh = fopen($lexicon, 'r');
while($line = fgets($fh)) {
$tags = explode(' ', $line);
$this->dict[strtolower(array_shift($tags))] = $tags;
}
fclose($fh);
}
public function tag($text) {
preg_match_all("/[\w\d\.]+/", $text, $matches);
$nouns = array('NN', 'NNS');
$return = array();
$i = 0;
foreach($matches[0] as $token) {
// default to a common noun
$return[$i] = array('token' => $token, 'tag' => 'NN');
// To remove trailing full stops
if(substr($token, -1) == '.') {
$token = preg_replace('/\.+$/', '', $token);
}
// To get from dict if set
if(isset($this->dict[strtolower($token)])) {
$return[$i]['tag'] = $this->dict[strtolower($token)][0];
}
// To converts verbs after 'the' to nouns
if($i > 0) {
if($return[$i - 1]['tag'] == 'DT' &&
in_array($return[$i]['tag'],
array('VBD', 'VBP', 'VB'))) {
$return[$i]['tag'] = 'NN';
}
}
// To convert noun to number if . appears
if($return[$i]['tag'][0] == 'N' && strpos($token, '.') !== false) {
$return[$i]['tag'] = 'CD';
}
// To convert noun to past particile if ends with 'ed'
if($return[$i]['tag'][0] == 'N' && substr($token, -2) == 'ed') {
$return[$i]['tag'] = 'VBN';
}
// Anything that ends 'ly' is an adverb
if(substr($token, -2) == 'ly') {
$return[$i]['tag'] = 'RB';
}
// To get common noun to adjective if it ends with al
if(in_array($return[$i]['tag'], $nouns)
&& substr($token, -2) == 'al') {
$return[$i]['tag'] = 'JJ';
}
// To get noun to verb if the word before is 'would'
if($i > 0) {
if($return[$i]['tag'] == 'NN'
&& strtolower($return[$i-1]['token']) == 'would') {
$return[$i]['tag'] = 'VB';
}
}
// Convert noun to plural if it ends with an s
if($return[$i]['tag'] == 'NN' && substr($token, -1) == 's') {
$return[$i]['tag'] = 'NNS';
}
// Convert common noun to gerund
if(in_array($return[$i]['tag'], $nouns)
&& substr($token, -3) == 'ing') {
$return[$i]['tag'] = 'VBG';
}
// If we get noun noun, and the second can be a verb, convert to verb
if($i > 0) {
if(in_array($return[$i]['tag'], $nouns)
&& in_array($return[$i-1]['tag'], $nouns)
&& isset($this->dict[strtolower($token)])) {
if(in_array('VBN', $this->dict[strtolower($token)])) {
$return[$i]['tag'] = 'VBN';
} else if(in_array('VBZ',
$this->dict[strtolower($token)])) {
$return[$i]['tag'] = 'VBZ';
}
}
}
$i++;
}
return $return;
}
}
?>
<?php
# Function to print the results
function printTag($tags) {
$output= null;
foreach($tags as $t) {
$output .= $t['token'] . "/" . $t['tag'] . " ";
}
echo "\n";
return $output;
}
$tagger = new PosTagger('lexicon.txt');
?>
<?php
#Function used in another file to get Part of speech tagging to given string
function getPartofSpeech($query) {
$tagger = new PosTagger('lexicon.txt');
$tags = $tagger->tag($query);
$output=printTag($tags);
return $output;
}
?>
</body>
</html>
|