. * * END LICENSE * * @author Chris Pollett chris@pollett.org * @license http://www.gnu.org/licenses/ GPL3 * @link http://www.seekquarry.com/ * @copyright 2009 - 2015 * @filesource */ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** * Load the Dutch Tokenizer via phrase_parser (5.4 hack) */ require_once BASE_DIR."/lib/phrase_parser.php"; /** * Load the run function */ require_once BASE_DIR.'lib/unit_test.php'; /** * Code used to test the Dutch stemming algorithm. The inputs for the * algorithm are words in * http://snowball.tartarus.org/algorithms/Dutch/voc.txt and the resulting * stems are compared with the stem words in * http://snowball.tartarus.org/algorithms/Dutch/output.txt * * @author Chris Pollett * @package seek_quarry\test */ class NlTokenizerTest extends UnitTest { /** * Each test we set up a new Dutch Tokenizer object */ function setUp() { $this->test_objects['FILE1'] = PhraseParser::getTokenizer("nl"); } /** * Nothing done for unit test tear done */ function tearDown() { } /** * Tests whether the stem funtion for the Dutch stemming algorithm * stems words according to the rules of stemming. The function tests stem * by calling stem with the words in $test_words and compares the results * with the stem words in $stem_words * * $test_words is an array containing a set of words in Dutch provided in * the snowball web page * $stem_words is an array containing the stems for words in $test_words */ function stemmerTestCase() { $stem_dir = BASE_DIR.'/tests/test_files/dutch_stemmer'; //Test word set from snowball $test_words = file("$stem_dir/input_vocabulary.txt"); //Stem word set from snowball for comparing results $stem_words = file("$stem_dir/stemmed_result.txt"); /** * check if function stem correctly stems the words in $test_words by * comparing results with stem words in $stem_words */ error_reporting(-1); for($i = 0; $i < count($test_words); $i++) { $word = trim($test_words[$i]); if(in_array(mb_strtolower($word), NlTokenizer::$no_stem_list) || strlen($word) < 3) { continue; } $stem = trim($stem_words[$i]); $word_stem = $this->test_objects['FILE1']->stem($word); if($stem != $word_stem) { echo "Stemming $word to $word_stem should be $stem\n"; exit(); } $this->assertEqual($word_stem, $stem,"function stem correctly stems $word to $stem"); } } } ?>