Part of Speech Tagging
A simplified form with the identification of words as nouns, verbs, adjectives, adverbs, etc.
Reference :
@author : Ian Barber
<!DOCTYPE html>
# Class to perform function of tagging from lexicon.txt file
class PosTagger {
private $dict;
public function __construct($lexicon) {
$fh = fopen($lexicon, 'r');
while($line = fgets($fh)) {
$tags = explode(' ', $line);
$this->dict[strtolower(array_shift($tags))] = $tags;
public function tag($text) {
preg_match_all("/[\w\d\.]+/", $text, $matches);
$nouns = array('NN', 'NNS');
$return = array();
$i = 0;
foreach($matches[0] as $token) {
// default to a common noun
$return[$i] = array('token' => $token, 'tag' => 'NN');
// To remove trailing full stops
if(substr($token, -1) == '.') {
$token = preg_replace('/\.+$/', '', $token);
// To get from dict if set
if(isset($this->dict[strtolower($token)])) {
$return[$i]['tag'] = $this->dict[strtolower($token)][0];
// To converts verbs after 'the' to nouns
if($i > 0) {
if($return[$i - 1]['tag'] == 'DT' &&
array('VBD', 'VBP', 'VB'))) {
$return[$i]['tag'] = 'NN';
// To convert noun to number if . appears
if($return[$i]['tag'][0] == 'N' && strpos($token, '.') !== false) {
$return[$i]['tag'] = 'CD';
// To convert noun to past particile if ends with 'ed'
if($return[$i]['tag'][0] == 'N' && substr($token, -2) == 'ed') {
$return[$i]['tag'] = 'VBN';
// Anything that ends 'ly' is an adverb
if(substr($token, -2) == 'ly') {
$return[$i]['tag'] = 'RB';
// To get common noun to adjective if it ends with al
if(in_array($return[$i]['tag'], $nouns)
&& substr($token, -2) == 'al') {
$return[$i]['tag'] = 'JJ';
// To get noun to verb if the word before is 'would'
if($i > 0) {
if($return[$i]['tag'] == 'NN'
&& strtolower($return[$i-1]['token']) == 'would') {
$return[$i]['tag'] = 'VB';
// Convert noun to plural if it ends with an s
if($return[$i]['tag'] == 'NN' && substr($token, -1) == 's') {
$return[$i]['tag'] = 'NNS';
// Convert common noun to gerund
if(in_array($return[$i]['tag'], $nouns)
&& substr($token, -3) == 'ing') {
$return[$i]['tag'] = 'VBG';
// If we get noun noun, and the second can be a verb, convert to verb
if($i > 0) {
if(in_array($return[$i]['tag'], $nouns)
&& in_array($return[$i-1]['tag'], $nouns)
&& isset($this->dict[strtolower($token)])) {
if(in_array('VBN', $this->dict[strtolower($token)])) {
$return[$i]['tag'] = 'VBN';
} else if(in_array('VBZ',
$this->dict[strtolower($token)])) {
$return[$i]['tag'] = 'VBZ';
return $return;
# Function to print the results
function printTag($tags) {
$output= null;
foreach($tags as $t) {
$output .= $t['token'] . "/" . $t['tag'] . " ";
echo "\n";
return $output;
$tagger = new PosTagger('lexicon.txt');
#Function used in another file to get Part of speech tagging to given string
function getPartofSpeech($query) {
$tagger = new PosTagger('lexicon.txt');
$tags = $tagger->tag($query);
return $output;