Chris Pollett >
Students >
Shailesh [Bio] [CS297 Part of Speech Tagging Code] |
Cosine Ranking AlgorithmCosine similarity is a measure of similarity between two vectors of an inner product space that measures the cosine of the angle between them.Cosine similarity is particularly used in positive space, where the outcome is neatly bounded in [0,1]. <!DOCTYPE html> <html> <body> <?php require_once 'wordnet.php'; require_once 'PartofSpeech.php'; # function to remove the common words function removeCommonWords($input){ $commonWords = array('overview',"'",'a','able','about','above','abroad','according','accordingly','across','actually', 'adj','after','afterwards','again','against','ago','ahead','ain\'t','all','allow','allows','almost','alone','along', 'alongside','already','also','although','always','am','amid','amidst','among','amongst','an','and','another','any', 'anybody','anyhow','anyone','anything','anyway','anyways','anywhere','apart','appear','appreciate','appropriate','are', 'aren\'t','around','as','a\'s','aside','ask','asking','associated','at','available','away','awfully','b','back', 'backward','backwards','be','became','because','become','becomes','becoming','been','before','beforehand','begin', 'behind','being','believe','below','beside','besides','best','better','between','beyond','both','brief','but','by', 'c','came','can','cannot','cant','can\'t','caption','cause','causes','certain','certainly','changes','clearly', 'c\'mon','co','co.','com','come','comes','concerning','consequently','consider','considering','contain','containing', 'contains','corresponding','could','couldn\'t','course','c\'s','currently','d','dare','daren\'t','definitely','described', 'despite','did','didn\'t','different','directly','do','does','doesn\'t','doing','done','don\'t','down','downwards','during', 'e','each','edu','eg','eight','eighty','either','else','elsewhere','end','ending','enough','entirely','especially','et', 'etc','even','ever','evermore','every','everybody','everyone','everything','everywhere','ex','exactly','example','except', 'f','fairly','far','farther','few','fewer','fifth','first','five','followed','following','follows','for','forever','former', 'formerly','forth','forward','found','four','from','further','furthermore','g','get','gets','getting','given','gives','go', 'goes','going','gone','got','gotten','greetings','h','had','hadn\'t','half','happens','hardly','has','hasn\'t','have', 'haven\'t','having','he','he\'d','he\'ll','hello','help','hence','her','here','hereafter','hereby','herein','here\'s', 'hereupon','hers','herself','he\'s','hi','him','himself','his','hither','hopefully','how','howbeit','however','hundred', 'i','i\'d','ie','if','ignored','i\'ll','i\'m','immediate','in','inasmuch','inc','inc.','indeed','indicate','indicated', 'indicates','inner','inside','insofar','instead','into','inward','is','isn\'t','it','it\'d','it\'ll','its','it\'s', 'itself','i\'ve','j','just','k','keep','keeps','kept','know','known','knows','l','last','lately','later','latter', 'latterly','least','less','lest','let','let\'s','like','liked','likely','likewise','little','look','looking','looks', 'low','lower','ltd','m','made','mainly','make','makes','many','may','maybe','mayn\'t','me','mean','meantime','meanwhile', 'merely','might','mightn\'t','mine','minus','miss','more','moreover','most','mostly','mr','mrs','much','must','mustn\'t', 'my','myself','n','name','namely','nd','near','nearly','necessary','need','needn\'t','needs','neither','never','neverf', 'neverless','nevertheless','new','next','nine','ninety','no','nobody','non','none','nonetheless','noone','no-one','nor', 'normally','not','nothing','notwithstanding','novel','now','nowhere','o','obviously','of','off','often','oh','ok','okay', 'old','on','once','one','ones','one\'s','only','onto','opposite','or','other','others','otherwise','ought','oughtn\'t', 'our','ours','ourselves','out','outside','over','overall','own','p','particular','particularly','past','per','perhaps', 'placed','please','plus','possible','presumably','probably','provided','provides','q','que','quite','qv','r','rather', 'rd','re','really','reasonably','recent','recently','regarding','regardless','regards','relatively','respectively','right', 'round','s','said','same','saw','say','saying','says','second','secondly','see','seeing','seem','seemed','seeming','seems', 'seen','self','selves','sensible','sent','serious','seriously','seven','several','shall','shan\'t','she','she\'d', 'she\'ll','she\'s','should','shouldn\'t','since','six','so','some','somebody','someday','somehow','someone','something', 'sometime','sometimes','somewhat','somewhere','soon','sorry','specified','specify','specifying','still','sub','such', 'sup','sure','t','take','taken','taking','tell','tends','th','than','thank','thanks','thanx','that','that\'ll','thats', 'that\'s','that\'ve','the','their','theirs','them','themselves','then','thence','there','thereafter','thereby','there\'d', 'therefore','therein','there\'ll','there\'re','theres','there\'s','thereupon','there\'ve','these','they','they\'d', 'they\'ll','they\'re','they\'ve','thing','things','think','third','thirty','this','thorough','thoroughly','those', 'though','three','through','throughout','thru','thus','till','to','together','too','took','toward','towards','tried', 'tries','truly','try','trying','t\'s','twice','two','u','un','under','underneath','undoing','unfortunately','unless', 'unlike','unlikely','until','unto','up','upon','upwards','us','use','used','useful','uses','using','usually','v','value', 'various','versus','very','via','viz','vs','w','want','wants','was','wasn\'t','way','we','we\'d','welcome','well','we\'ll', 'went','were','we\'re','weren\'t','we\'ve','what','whatever','what\'ll','what\'s','what\'ve','when','whence','whenever', 'where','whereafter','whereas','whereby','wherein','where\'s','whereupon','wherever','whether','which','whichever','while', 'whilst','whither','who','who\'d','whoever','whole','who\'ll','whom','whomever','who\'s','whose','why','will','willing', 'wish','with','within','without','wonder','won\'t','would','wouldn\'t','x','y','yes','yet','you','you\'d','you\'ll','your', 'you\'re','yours','yourself','yourselves','you\'ve','z','zero'); return preg_replace('/\b('.implode('|',$commonWords).')\b/','',$input); } # function to print the results function writeMsg($inputString) { echo "<br><b>Sentences seperated by \n</b></br>"; echo "<pre>"; print_r(explode('\n', $inputString)); echo "</pre>"; //To make it lower case $inputString = strtolower ($inputString); //To separate it by space character echo "<b>Sentences seperated by space</b>"; echo "<pre>"; print_r(array_values(array_filter(explode(' ', $inputString)))); echo "</pre>"; echo "<br />\n"; $result_str=removeCommonWords($inputString); //Display the array element echo "<b>Word Count is</b>"; echo "<pre>"; print_r( array_count_values(str_word_count($result_str, 1)) ); echo "</pre>"; echo "<br />\n"; } //To open the file for reading $file=fopen("welcome.txt","r") or exit("Unable to open file!"); echo fgets($file); echo "<br />\n"; $str = file_get_contents('welcome.txt'); writemsg($str); fclose($file); $query = 'famous school'; $query_words=explode(" ",$query); $output = getPartofSpeech($query); echo "<br>part of speeech for query is ".$output; $output_words = preg_split("/\s+/",$output,-1,PREG_SPLIT_NO_EMPTY); $output_words_cnt = count($output_words); $word_type = null; $noun_arr = array("NN","NNS","NNP","NNPS","PRP","PRP$","WP","WP"); $verb_arr = array("VB","VBD","VBG","VBN","VBP","VBZ"); $adj_arr = array("JJ","JJR","JJS"); $adv_arr = array("RB","RBR","RBS","WRB"); $similar_words = array(); for($i=0 ; $i<$output_words_cnt ; $i++) { $pos = strpos($output_words[$i],'/'); echo "<br>"; $substring_output_word = substr($output_words[$i],$pos+1); echo "substrinf is --$substring_output_word--"; if(in_array($substring_output_word,$noun_arr)==1) $word_type = "Noun"; else if(in_array($substring_output_word,$verb_arr)==1) $word_type = "Verb"; else if(in_array($substring_output_word,$adj_arr)==1) $word_type = "Adjective"; else if(in_array($substring_output_word,$adv_arr)==1) $word_type = "Adverb"; else $word_type = "NA"; echo "<br>Word type is : $word_type"; if($word_type != "NA"){ shell_exec('"C:/Program Files (x86)/WordNet/2.1/bin/wn.exe" '.$query_words[$i].' -over > C:\xampp\htdocs\wordnet_output.txt'); $avg_score = split_wordnet_output($word_type,$query); if($avg_score!=null){ asort($avg_score[0]); $extracted_words=$avg_score[1]; echo "<br>Soreted score is" ; foreach($avg_score[0] as $key=>$value) { $sorted_score_key = $key; $sorted_score_value = $value; } echo "<br><br> Highesh score for $query_words[$i] is $sorted_score_value and key is $sorted_score_key"; echo "<br><br><b> words are $extracted_words[$sorted_score_key]</b>"; $similar_words[$i]=$extracted_words[$sorted_score_key]; print_r($avg_score); } else { echo "<br><br><b>Output from wordnet is null</b>"; } } } echo "<br><br><br> Words and words from wordnet are as follows : "; print_r($query_words); print_r($similar_words); ?> </body> </html> </xmp> <br> <p>Code to parse the Wordnet output in specified format. </p> <xmp> <!DOCTYPE html> <html> <body> <?php function split_wordnet_output($word_type,$query){ echo "<br><br>"; $doc = file_get_contents("wordnet_output.txt"); if($doc!=""){ echo "<b>Original Content:</b><br>$doc<br><br>"; $paragraphs = preg_split("/\n\r/",$doc,-1,PREG_SPLIT_NO_EMPTY); echo "<b>Paragraphs:</b><br>"; print_r($paragraphs); $str = 'verb'; $para=array(); $para1 = array(); if($word_type=="Verb") $para1= preg_grep ("/\bThe\sverb\s/" , $paragraphs); else if($word_type=="Noun") $para1= preg_grep ("/\bThe\snoun\s/" , $paragraphs); else if($word_type=="Adjective") $para1= preg_grep ("/\bThe\sadj\s/" , $paragraphs); else if($word_type=="Adverb") $para1= preg_grep ("/\bThe\sadv\s/" , $paragraphs); $para2 = $para1[1]; $para = preg_split("/\d\.\s/",$para2,-1,PREG_SPLIT_NO_EMPTY); $p = count($para); $sent=array(); $wordmatch=array(); $avg_score = array(); for($i=1;$i<$p;$i++) { preg_match_all('/\"(.*?)\"/', $para[$i], $match); print_r($match); //to seperate out the words preg_match('/[\w+\s\,]+\s\-+/', $para[$i], $matchword); print_r($matchword); $wordmatch[$i]=$matchword[0]; $sent_count = count($match[1]); echo "<br> count is $sent_count"; $score = array(); for($j=0;$j<$sent_count;$j++) { $score[$j] = get_intersection($query,$match[1][$j]); } $avg_score[$i] = array_sum($score)/$sent_count; } echo "<br>Full Average score is"; print_r($avg_score); return array($avg_score,$wordmatch); } else return null; } # Function to get score by intersection method function get_intersection($s1,$s2) { echo "<br>$s1 and Seond is$s2"; $sentence1 = explode(" ",$s1); $sentence2 = explode(" ",$s2); if ((count($sentence1) + count($sentence2)) == 0) return 0; return count(array_intersect($sentence1,$sentence2)) / ((count($sentence1) + count($sentence2)) / 2); } ?> </body> </html> |