Chris Pollett > Students > Shailesh

    (Print View)

    [Bio]

    [Project Blog]

    [CS297 Proposal]

    [CS297 Presentation-PDF]

    [CS297 Part of Speech Tagging Code]

    [CS297 Cosine Ranking Code]

    [CS297 Wordnet Code]

    [CS297 Report-PDF]

    [CS298 Proposal]

    [CS298 Presentation-PDF]

    [CS298 Project Report-PDF]

                          

























Cosine Ranking Algorithm

Cosine similarity is a measure of similarity between two vectors of an inner product space that measures the cosine of the angle between them.Cosine similarity is particularly used in positive space, where the outcome is neatly bounded in [0,1].

<!DOCTYPE html>
<html>
<body>

<?php
require_once 'wordnet.php';
require_once 'PartofSpeech.php';

# function to remove the common words
function removeCommonWords($input){
	 
		$commonWords = array('overview',"'",'a','able','about','above','abroad','according','accordingly','across','actually',
		'adj','after','afterwards','again','against','ago','ahead','ain\'t','all','allow','allows','almost','alone','along',
		'alongside','already','also','although','always','am','amid','amidst','among','amongst','an','and','another','any',
		'anybody','anyhow','anyone','anything','anyway','anyways','anywhere','apart','appear','appreciate','appropriate','are',
		'aren\'t','around','as','a\'s','aside','ask','asking','associated','at','available','away','awfully','b','back',
		'backward','backwards','be','became','because','become','becomes','becoming','been','before','beforehand','begin',
		'behind','being','believe','below','beside','besides','best','better','between','beyond','both','brief','but','by',
		'c','came','can','cannot','cant','can\'t','caption','cause','causes','certain','certainly','changes','clearly',
		'c\'mon','co','co.','com','come','comes','concerning','consequently','consider','considering','contain','containing',
		'contains','corresponding','could','couldn\'t','course','c\'s','currently','d','dare','daren\'t','definitely','described',
		'despite','did','didn\'t','different','directly','do','does','doesn\'t','doing','done','don\'t','down','downwards','during',
		'e','each','edu','eg','eight','eighty','either','else','elsewhere','end','ending','enough','entirely','especially','et',
		'etc','even','ever','evermore','every','everybody','everyone','everything','everywhere','ex','exactly','example','except',
		'f','fairly','far','farther','few','fewer','fifth','first','five','followed','following','follows','for','forever','former',
		'formerly','forth','forward','found','four','from','further','furthermore','g','get','gets','getting','given','gives','go',
		'goes','going','gone','got','gotten','greetings','h','had','hadn\'t','half','happens','hardly','has','hasn\'t','have',
		'haven\'t','having','he','he\'d','he\'ll','hello','help','hence','her','here','hereafter','hereby','herein','here\'s',
		'hereupon','hers','herself','he\'s','hi','him','himself','his','hither','hopefully','how','howbeit','however','hundred',
		'i','i\'d','ie','if','ignored','i\'ll','i\'m','immediate','in','inasmuch','inc','inc.','indeed','indicate','indicated',
		'indicates','inner','inside','insofar','instead','into','inward','is','isn\'t','it','it\'d','it\'ll','its','it\'s',
		'itself','i\'ve','j','just','k','keep','keeps','kept','know','known','knows','l','last','lately','later','latter',
		'latterly','least','less','lest','let','let\'s','like','liked','likely','likewise','little','look','looking','looks',
		'low','lower','ltd','m','made','mainly','make','makes','many','may','maybe','mayn\'t','me','mean','meantime','meanwhile',
		'merely','might','mightn\'t','mine','minus','miss','more','moreover','most','mostly','mr','mrs','much','must','mustn\'t',
		'my','myself','n','name','namely','nd','near','nearly','necessary','need','needn\'t','needs','neither','never','neverf',
		'neverless','nevertheless','new','next','nine','ninety','no','nobody','non','none','nonetheless','noone','no-one','nor',
		'normally','not','nothing','notwithstanding','novel','now','nowhere','o','obviously','of','off','often','oh','ok','okay',
		'old','on','once','one','ones','one\'s','only','onto','opposite','or','other','others','otherwise','ought','oughtn\'t',
		'our','ours','ourselves','out','outside','over','overall','own','p','particular','particularly','past','per','perhaps',
		'placed','please','plus','possible','presumably','probably','provided','provides','q','que','quite','qv','r','rather',
		'rd','re','really','reasonably','recent','recently','regarding','regardless','regards','relatively','respectively','right',
		'round','s','said','same','saw','say','saying','says','second','secondly','see','seeing','seem','seemed','seeming','seems',
		'seen','self','selves','sensible','sent','serious','seriously','seven','several','shall','shan\'t','she','she\'d',
		'she\'ll','she\'s','should','shouldn\'t','since','six','so','some','somebody','someday','somehow','someone','something',
		'sometime','sometimes','somewhat','somewhere','soon','sorry','specified','specify','specifying','still','sub','such',
		'sup','sure','t','take','taken','taking','tell','tends','th','than','thank','thanks','thanx','that','that\'ll','thats',
		'that\'s','that\'ve','the','their','theirs','them','themselves','then','thence','there','thereafter','thereby','there\'d',
		'therefore','therein','there\'ll','there\'re','theres','there\'s','thereupon','there\'ve','these','they','they\'d',
		'they\'ll','they\'re','they\'ve','thing','things','think','third','thirty','this','thorough','thoroughly','those',
		'though','three','through','throughout','thru','thus','till','to','together','too','took','toward','towards','tried',
		'tries','truly','try','trying','t\'s','twice','two','u','un','under','underneath','undoing','unfortunately','unless',
		'unlike','unlikely','until','unto','up','upon','upwards','us','use','used','useful','uses','using','usually','v','value',
		'various','versus','very','via','viz','vs','w','want','wants','was','wasn\'t','way','we','we\'d','welcome','well','we\'ll',
		'went','were','we\'re','weren\'t','we\'ve','what','whatever','what\'ll','what\'s','what\'ve','when','whence','whenever',
		'where','whereafter','whereas','whereby','wherein','where\'s','whereupon','wherever','whether','which','whichever','while',
		'whilst','whither','who','who\'d','whoever','whole','who\'ll','whom','whomever','who\'s','whose','why','will','willing',
		'wish','with','within','without','wonder','won\'t','would','wouldn\'t','x','y','yes','yet','you','you\'d','you\'ll','your',
		'you\'re','yours','yourself','yourselves','you\'ve','z','zero');

		return preg_replace('/\b('.implode('|',$commonWords).')\b/','',$input);
	}
# function to print the results 	
function writeMsg($inputString)
{
		echo "<br><b>Sentences seperated by \n</b></br>";
		echo "<pre>";
		print_r(explode('\n', $inputString));
		echo "</pre>";

		//To make it lower case
		$inputString = strtolower ($inputString);
		
		//To separate it by space character
		echo "<b>Sentences seperated by space</b>";
		echo "<pre>"; 
		print_r(array_values(array_filter(explode(' ', $inputString))));
		echo "</pre>";
		echo "<br />\n";

		
		$result_str=removeCommonWords($inputString);
		
		//Display the array element
		echo "<b>Word Count is</b>";
		echo "<pre>";
		print_r( array_count_values(str_word_count($result_str, 1)) );
		
		echo "</pre>";
		echo "<br />\n";
}

	//To open the file for reading
	$file=fopen("welcome.txt","r") or exit("Unable to open file!");
	echo fgets($file);
	echo "<br />\n";
	$str = file_get_contents('welcome.txt');
	writemsg($str);	
	fclose($file);

	$query = 'famous school';
	$query_words=explode(" ",$query);
	$output = getPartofSpeech($query);
	echo "<br>part of speeech for query is ".$output;
	
	$output_words = preg_split("/\s+/",$output,-1,PREG_SPLIT_NO_EMPTY);
		
	$output_words_cnt = count($output_words);
	$word_type = null;
	$noun_arr = array("NN","NNS","NNP","NNPS","PRP","PRP$","WP","WP");
 	$verb_arr = array("VB","VBD","VBG","VBN","VBP","VBZ");
	$adj_arr = array("JJ","JJR","JJS");	
	$adv_arr = array("RB","RBR","RBS","WRB");
	$similar_words = array();
	for($i=0 ; $i<$output_words_cnt ; $i++)
	{
		$pos = strpos($output_words[$i],'/');
		echo "<br>";
		$substring_output_word = substr($output_words[$i],$pos+1);		
		echo "substrinf is --$substring_output_word--";		
		if(in_array($substring_output_word,$noun_arr)==1)
			$word_type = "Noun";
		else if(in_array($substring_output_word,$verb_arr)==1)
			$word_type = "Verb";
		else if(in_array($substring_output_word,$adj_arr)==1)
			$word_type = "Adjective";
		else if(in_array($substring_output_word,$adv_arr)==1)
			$word_type = "Adverb";	
		else
		    $word_type = "NA";
		
		echo "<br>Word type is : $word_type";
		
		if($word_type != "NA"){			
		shell_exec('"C:/Program Files (x86)/WordNet/2.1/bin/wn.exe" '.$query_words[$i].' -over > C:\xampp\htdocs\wordnet_output.txt');
		$avg_score = split_wordnet_output($word_type,$query);
		if($avg_score!=null){		
		asort($avg_score[0]);		
		$extracted_words=$avg_score[1];		
		echo "<br>Soreted score is" ;
		foreach($avg_score[0] as $key=>$value)
		{
			$sorted_score_key = $key;
			$sorted_score_value = $value;
		}
		echo "<br><br> Highesh score for $query_words[$i] is $sorted_score_value and key is $sorted_score_key";
		echo "<br><br><b> words are $extracted_words[$sorted_score_key]</b>";
		
		$similar_words[$i]=$extracted_words[$sorted_score_key];		
		print_r($avg_score);
		
		}
		else
		{
		echo "<br><br><b>Output from wordnet is null</b>";
		}		
	}	
	}
	echo "<br><br><br> Words and words from wordnet are as follows : ";
	print_r($query_words);
	print_r($similar_words);
?>
</body>
</html>
</xmp>

<br> 
<p>Code to parse the Wordnet output in specified format. </p>
<xmp>
<!DOCTYPE html>
<html>
<body>

<?php

function split_wordnet_output($word_type,$query){
echo "<br><br>";

$doc = file_get_contents("wordnet_output.txt");
if($doc!=""){
echo "<b>Original Content:</b><br>$doc<br><br>";

$paragraphs = preg_split("/\n\r/",$doc,-1,PREG_SPLIT_NO_EMPTY);

echo "<b>Paragraphs:</b><br>";
print_r($paragraphs);
$str = 'verb';
$para=array();
$para1 = array();
	if($word_type=="Verb")
		$para1= preg_grep ("/\bThe\sverb\s/" , $paragraphs);
	else if($word_type=="Noun")
		$para1= preg_grep ("/\bThe\snoun\s/" , $paragraphs);
	else if($word_type=="Adjective")
		$para1= preg_grep ("/\bThe\sadj\s/" , $paragraphs);
	else if($word_type=="Adverb")
		$para1= preg_grep ("/\bThe\sadv\s/" , $paragraphs);
    
$para2 = $para1[1];

$para = preg_split("/\d\.\s/",$para2,-1,PREG_SPLIT_NO_EMPTY);

$p = count($para);
$sent=array();
$wordmatch=array();
$avg_score = array();
for($i=1;$i<$p;$i++)
{	
	preg_match_all('/\"(.*?)\"/', $para[$i], $match);	
	print_r($match);
	//to seperate out the words
	preg_match('/[\w+\s\,]+\s\-+/', $para[$i], $matchword);
	print_r($matchword);
	$wordmatch[$i]=$matchword[0];
	$sent_count = count($match[1]);	
	echo "<br> count is $sent_count";
	$score = array();
	
	for($j=0;$j<$sent_count;$j++)
	{
		$score[$j] = get_intersection($query,$match[1][$j]);		
	}
	
	$avg_score[$i] = array_sum($score)/$sent_count;
}	


echo "<br>Full Average score is";
print_r($avg_score);
return array($avg_score,$wordmatch);
}
else
return null;
}

# Function to get score by intersection method
function get_intersection($s1,$s2)
{
	echo "<br>$s1 and Seond is$s2";
	$sentence1 = explode(" ",$s1);
	$sentence2 = explode(" ",$s2);
	
	if ((count($sentence1) + count($sentence2)) == 0)
		return 0;	
	return count(array_intersect($sentence1,$sentence2)) / ((count($sentence1) + count($sentence2)) / 2);
}
?>
</body>
</html>