.
 *
 * END LICENSE
 *
 * @author Chris Pollett (chris@pollett.org)
 * @license http://www.gnu.org/licenses/ GPL3
 * @link http://www.seekquarry.com/
 * @copyright 2009 - 2015
 * @filesource
 */
namespace seekquarry\yioop\library\summarizers;
use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\library\CrawlConstants;
use seekquarry\yioop\library\PhraseParser;
use seekquarry\yioop\library\processors\PageProcessor;
/**
 * Class which may be used by the processors to get a summary for a text
 * document that may later be used for indexing. Generate a summary based
 * the Lanczos algorithm.
 * @author Charles Bocage (charles.bocage@sjsu.edu)
 */
class LanczosSummarizer extends Summarizer
{
    /**
     * Number of bytes in a sentence before it is considered long
     * We use strlen rather than mbstrlen. This might actually be
     * a better metric of the potential of a sentence to have info.
     */
    const LONG_SENTENCE_LEN = 50;
    /**
     * Number of sentences in a document before only consider longer
     * sentences in centroid
     */
    const LONG_SENTENCE_THRESHOLD = 100;
    /**
     * Number of distinct terms to use in generating summary
     */
    const MAX_DISTINCT_TERMS = 1000;
    /**
     * Number of words in word cloud
     */
    const WORD_CLOUD_LEN = 5;
    /**
     * Number of nonzero centroid components
     */
    const CENTROID_COMPONENTS = 50;
    /**
     * whether to output the results to the disk or not
     */
    const OUTPUT_TO_FILE = false;
    /**
     * The full disk location to save the result to
     */
    const OUTPUT_FILE_PATH = "/temp/centroid_weighted_summarizer_result.txt";
    /**
     * Generate a summary based on it closeness to the average sentence.
     * It also weights sentences based on the CMS that produced it.
     * @param string $doc complete raw page to generate the summary from.
     * @param string $lang language of the page to decide which stop words to
     *     call proper tokenizer.php of the specified language.
     *
     * @return array array of summary and word cloud
     */
    public static function getLanczosSummary($doc, $lang)
    {
        $raw_doc = $doc;
        $doc = self::pageProcessing($doc);
        /* Format the document to remove characters other than periods and
           alphanumerics.
        */
        $formatted_doc = self::formatDoc($doc);
        $stop_obj = PhraseParser::getTokenizer($lang);
        /* Splitting into sentences */
        $out_sentences = self::getSentences($doc);
        $sentences = self::removeStopWords($out_sentences, $stop_obj);
        $sentence_array = self::splitSentences($sentences, $lang, $raw_doc);
        $terms = $sentence_array[0];
        $tf_per_sentence = $sentence_array[1];
        $tf_per_sentence_normalized = $sentence_array[2];
        $tf_average_sentence =
            self::getAverageSentence($tf_per_sentence_normalized);
        $tf_dot_product_per_sentence =
            self::getDotProduct($tf_per_sentence_normalized,
            $tf_average_sentence);
        usort($tf_dot_product_per_sentence, 'self::sortInAscendingOrder');
        $summary = self::getSummary($tf_dot_product_per_sentence,
            $out_sentences);
        $n = count($out_sentences);
        $terms = array_filter($terms);
        $terms_counts = array_count_values($terms);
        arsort($terms_counts);
        $terms_counts = array_slice($terms_counts, 0,
            self::MAX_DISTINCT_TERMS);
        $terms = array_unique(array_keys($terms_counts));
        $t = count($terms);
        if ($t == 0) {
            return ["", ""];
        }
        /* Initialize Nk [Number of sentences the term occurs] */
        $nk = [];
        $nk = array_fill(0, $t, 0);
        $nt = [];
        /* Count TF for each word */
        for ($i = 0; $i < $n; $i++) {
            for ($j = 0; $j < $t; $j++) {
                if (strpos($sentences[$i], $terms[$j]) !== false) {
                    $nk[$j]++;
                }
            }
        }
        /* Calculate weights of each term for every sentence */
        $w = [];
        $idf = [];
        $idf_temp = 0;
        for ($k = 0; $k < $t; $k++) {
            if ($nk[$k] == 0) {
                $idf_temp = 0;
                $tmp = 0;
            } else {
                $idf_temp = $n / $nk[$k];
                $tmp = log($idf_temp);
            }
            $idf[$k] = $tmp;
        }
        /* Count TF for finding centroid */
        $wc = [];
        $max_nt = -1;
        $b = "\b";
        if (in_array($lang, ["zh-CN", "ja", "ko"])) {
            $b = "";
        }
        for ($j = 0; $j < $t; $j++) {
            $nt = @preg_match_all("/$b{$terms[$j]}$b/", $formatted_doc,
                $matches); //$matches included for backwards compatibility
            $wc[$j] = $nt * $idf[$j];
            if (is_nan($wc[$j]) || is_infinite($wc[$j])) {
                $wc[$j] = 0;
            }
        }
        /* Calculate centroid */
        arsort($wc);
        $centroid = array_slice($wc, 0, self::CENTROID_COMPONENTS, true);
        /* Initializing centroid weight array by 0 */
        $wc = array_fill(0, $t, 0);
        /* Word cloud */
        $i = 0;
        $word_cloud = [];
        foreach ($centroid as $key => $value) {
            $wc[$key] = $value;
            if ($i < self::WORD_CLOUD_LEN) {
                $word_cloud[$i] = $terms[$key];
            }
            $i++;
        }
        //should not need anything below this line
//        if (strlen($formatted_doc) < PageProcessor::$max_description_len
//            || $n == 1) {
//            //if input short only use above to get a word cloud
//            $formatted_doc = substr($formatted_doc, 0,
//                PageProcessor::$max_description_len);
//            return [$formatted_doc, $word_cloud];
//        }
//        ksort($wc);
//        /* Calculate similarity measure between centroid and each sentence */
//        $sim = [];
//        for ($i=0; $i < $n; $i++) {
//            $a = $b1 = $b2 = $c1 = $c2 = $d = 0;
//            for ($k = 0; $k < $t; $k++) {
//                    $wck = $wc[$k];
//                    $idfk = $idf[$k];
//                    $tmp = substr_count($sentences[$i], $terms[$k]);
//                    $wik = ($tmp > 0) ? $idfk * (1 + log($tmp)) : 0;
//                    $a += ($wik * $wck * $idfk);
//                    $b1 += ($wik * $wik);
//                    $c1 += ($wck * $wck);
//            }
//            $b2 = sqrt($b1);
//            $c2 = sqrt($c1);
//            $d = $b2 * $c2;
//            if ($d == 0) {
//                $sim[$i] = 0;
//            } else {
//                $sim[$i] = $a / $d;
//            }
//        }
//        arsort($sim);
//        /* Getting how many sentences should be there in summary */
//        $top = self::summarySentenceCount($out_sentences, $sim);
//        $sum_array = [];
//        $sum_array = array_keys(array_slice($sim, 0, $top - 1, true));
//        sort($sum_array);
//        $summary = '';
//        foreach ($sum_array as $key) {
//            $summary .= $out_sentences[$key] . ". ";
//        }
//
//
//
//
//        if (self::OUTPUT_TO_FILE) {
//            $output_file_contents = "";
//            foreach ($sum_array as $key) {
//                $output_file_contents .= $out_sentences[$key] . ".\n";
//            }
//            file_put_contents(C\WORK_DIRECTORY . self::OUTPUT_FILE_PATH,
//                $output_file_contents);
//        }
        /* Summary of text summarization */
        return [$summary, $word_cloud];
    }
    /**
     * Calculates how many sentences to put in the summary to match the
     * MAX_DESCRIPTION_LEN.
     *
     * @param array $sentences sentences in doc in their original order
     * @param array $sim associative array of sentence-number-in-doc =>
     *      similarity score to centroid (sorted from highest to lowest score).
     * @return int number of sentences
     */
    public static function summarySentenceCount($sentences, $sim)
    {
        $top = null;
        $count = 0;
        foreach ($sim as $key => $value)
        {
            if ($count < PageProcessor::$max_description_len) {
                $count += strlen($sentences[$key]);
                $top++;
            }
        }
        return $top;
    }
    /**
     * Breaks any content into sentences by splitting it on spaces or carriage
     *   returns
     * @param string $content complete page.
     * @return array array of sentences from that content.
     */
    public static function getSentencesOriginal($content)
    {
        $lines = preg_split(
            '/(\.|\||\!|\?|!|?|。)\s+|(\n|\r)(\n|\r)+|\s{5}/',
            $content, 0, PREG_SPLIT_NO_EMPTY);
        $out = [];
        $sentence = "";
        $count = 0;
        $theshold_factor = 1;
        foreach ($lines as $line) {
            $sentence .= " " . $line;
            if (strlen($line) < 2) {
                continue;
            }
            if ($count < self::LONG_SENTENCE_THRESHOLD ||
                strlen($sentence) > $theshold_factor *
                    self::LONG_SENTENCE_LEN){
                $sentence = preg_replace("/\s+/ui", " ", $sentence);
                $out[] = trim($sentence);
                $count++;
                $theshold_factor =
                    pow(1.5, floor($count/self::LONG_SENTENCE_THRESHOLD));
            }
            $sentence = "";
        }
        if (trim($sentence) != "") {
            $sentence = preg_replace("/\s+/ui", " ", $sentence);
            $out[] = trim($sentence);
        }
        return $out;
    }
    /**
     * Formats the sentences to remove all characters except words,
     *   digits and spaces
     * @param string $sent complete page.
     * @return string formatted sentences.
     */
    public static function formatSentence($sent)
    {
        $sent = trim(preg_replace('/[^\p{L}\p{N}\s]+/u',
            ' ', mb_strtolower($sent)));
        return $sent;
    }
    /**
     * Formats the document to remove carriage returns, hyphens and digits
     * as we will not be using digits in word cloud.
     * The formatted document generated by this function is only used to
     * compute centroid.
     * @param string $content formatted page.
     * @return string formatted document.
     */
    public static function formatDoc($content)
    {
        $substitute = ['/[\n\r\-]+/', '/[^\p{L}\s\.]+/u', '/[\.]+/'];
        $content = preg_replace($substitute, ' ', mb_strtolower($content));
        return $content;
    }
    /**
     * This function does an additional processing on the page
     * such as removing all the tags from the page
     * @param string $page complete page.
     * @return string processed page.
     */
    public static function pageProcessing($page)
    {
        $substitutions = ['@@si',
            '/\ \;|\&rdquo\;|\&ldquo\;|\&mdash\;/si',
            '@@si', '/[\^\(\)]/',
            '/\[(.*?)\]/', '/\t\n/'
        ];
        $page = preg_replace($substitutions, ' ', $page);
        $page = preg_replace('/\s{2,}/', ' ', $page);
        $new_page = preg_replace("/\
/", "\n", $page);
        $changed = false;
        if ($new_page != $page) {
            $changed = true;
            $page = $new_page;
        }
        $page = preg_replace("/\<\/(h1|h2|h3|h4|h5|h6|table|tr|td|div|".
            "p|address|section)\s*\>/", "\n\n", $page);
        $page = preg_replace("/\ $v) {
                $sum_of_squares += ($v * $v);
            }
            $square_root = sqrt($sum_of_squares);
            foreach ($term_frequencies as $k => $v) {
                if ($square_root == 0) {
                    $result[$k] = 0;
                } else {
                    $result[$k] = ($v / $square_root);
                }
            }
            foreach ($result as $k => $v) {
                $result_sum += $v;
            }
        }
        return $result;
    }
    /**
     * Get the average sentence by adding up the values from each column and
     * dividing it by the rows in the array.
     * @param array $term_frequencies_normalized the array with the terms as
     *      the key and its normalized frequency as the value
     * @return array array of frequencies averaged
     */
    public static function getAverageSentence($term_frequencies_normalized)
    {
        $result = [];
        if (count($term_frequencies_normalized) != 0) {
            foreach ($term_frequencies_normalized as $k => $v) {
                foreach ($v as $l => $w) {
                    if (count($result) == 0) {
                        $result[$l] = $w;
                    } else {
                        if (@array_key_exists($l, $result)) {
                            $result[$l] = $result[$l] + $w;
                        } else {
                            $result[$l] = $w;
                        }
                    }
                }
            }
            $count = count($term_frequencies_normalized);
            foreach ($result as $k => $v) {
                $result[$k] = ($v / $count);
            }
        }
        return $result;
    }
    /**
     * Get the dot product of the normalized array and the average sentence
     * @param array $term_frequencies_normalized the array with the terms as
     *      the key and its normalized frequency as the value
     * @param array $average_sentence an array of each words average
     *      frequency value
     * @return array array of frequencies averaged
     */
    public static function getDotProduct($term_frequencies_normalized,
        $average_sentence)
    {
            $result = [];
            $count = 0;
            foreach ($term_frequencies_normalized as $k => $v) {
                $tempResult = 0;
                foreach ($v as $l => $w) {
                    if (@array_key_exists($l, $average_sentence)) {
                        $tempResult = $tempResult +
                            ($average_sentence[$l] * $w);
                    }
                }
                $result[$count] = $tempResult;
                $count++;
            }
            return $result;
    }
    /**
     * Compare the two values and return if b is greater than a
     * @param string $a the first value to compare
     * @param string $b the second value to compare
     * @return boolean if b is greater than a
     */
    public static function sortInAscendingOrder($a, $b)
    {
        return $b > $a ? 1 : -1;
    }
    /**
     * Returns a new array of sentences without the stop words
     * @param array $sentences the array of sentences to process
     * @param object $stop_obj the class that has the stopworedRemover method
     * @return array a new array of sentences without the stop words
     */
    public static function removeStopWords($sentences, $stop_obj)
    {
        $n = count($sentences);
        $result = [];
        if ($stop_obj && method_exists($stop_obj, "stopwordsRemover")) {
            for ($i = 0; $i < $n; $i++ ) {
                $result[$i] = $stop_obj->stopwordsRemover(
                    self::formatDoc($sentences[$i]));
             }
        } else {
            $result = $sentences;
        }
        return $result;
    }
    /**
     * Split up the sentences and return an array with all of the needed parts
     * @param array $sentences the array of sentences to process
     * @param string $lang the current locale
     * @param string $doc complete raw page to generate the summary from.
     * @return array an array with all of the needed parts
     */
    public static function splitSentences($sentences, $lang, $doc)
    {
        $result = [];
        $terms = [];
        $tf_index = 0;
        $tf_per_sentence = [];
        $tf_per_sentence_normalized = [];
        foreach ($sentences as $sentence) {
            $temp_terms = PhraseParser::segmentSegment($sentence, $lang);
            $terms = array_merge($terms, $temp_terms);
            $tf_per_sentence[$tf_index] =
                self::getTermFrequencies($temp_terms, $sentence, $doc);
            $tf_per_sentence_normalized[$tf_index] =
                self::normalizeTermFrequencies($tf_per_sentence[$tf_index]);
            $tf_index++;
        }
        $result[0] = $terms;
        $result[1] = $tf_per_sentence;
        $result[2] = $tf_per_sentence_normalized;
        return $result;
    }
    /**
     * Split up the sentences and return an array with all of the needed parts
     * @param array $tf_dot_product_per_sentence an array that holds the dot
            product of each sentence.  It should be sorted from highest to
            lowest when it is passed to this method.
     * @param array $sentences the array of sentences to process
     * @return string a string that represents the summary
     */
    public static function getSummary($tf_dot_product_per_sentence,
            $sentences)
    {
        $result = "";
        $result_length = 0;
        $i = 0;
        foreach ($tf_dot_product_per_sentence as $k => $v) {
            if ($result_length + strlen($sentences[$k]) >
                PageProcessor::$max_description_len) {
                break;
            } else {
                $result_length += strlen($sentences[$k]);
                if ($i == 0) {
                    $i = 1;
                    $result = $sentences[$k] . ". ";
                    if (self::OUTPUT_TO_FILE) {
                        $output_file_contents = $sentences[$k] . ". ";
                    }
                } else {
                    $result .= " " . $sentences[$k] . ". ";
                    if (self::OUTPUT_TO_FILE) {
                        $output_file_contents = $output_file_contents .
                            "\r\n" . $sentences[$k] . ". ";
                    }
                }
            }
        }
        if (self::OUTPUT_TO_FILE) {
            file_put_contents(C\WORK_DIRECTORY . self::OUTPUT_FILE_PATH,
                $output_file_contents);
        }
        return $result;
    }
    public static $q;
	public static $lineCount = 0;
	public static $matrix;
	public static $senten;
	public static $senMatrix;
	public static $words;
	public static $ignores;
	public static $rr;
	public static $cc;
	public static function main($args){
		//makeIgnorelists();//remove stop words
		//makeFinalWSMatrix();
		//double [][]A = copy(matrix);
        //List ev = new LinkedList();
        //
        ///*
        //double[][] A
        //= new double[][] {
        //		{1.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 1.000, 0.000, 0.000, 0.000, 0.000},
        //		{0.000, 1.000, 0.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000 ,0.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{2.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{3.000, 1.000, 0.000, 0.000, 0.000, 1.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000 ,0.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{2.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{3.000, 1.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 1.000, 0.000, 0.000, 0.000, 0.000},
        //		{0.000, 1.000, 0.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000 ,0.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{2.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{3.000, 1.000, 0.000, 0.000, 0.000, 1.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000 ,0.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{2.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{3.000, 1.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 1.000, 0.000, 0.000, 0.000, 0.000},
        //		{0.000, 1.000, 0.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000, 0.000},
        // 		{0.000, 0.000, 1.000, 0.000, 0.000 ,0.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{2.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{3.000, 1.000, 0.000, 0.000, 0.000, 1.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000 ,0.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{2.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{3.000, 1.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 1.000, 0.000, 0.000, 0.000, 0.000},
        //		{0.000, 1.000, 0.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000 ,0.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{2.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{3.000, 1.000, 0.000, 0.000, 0.000, 1.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000, 0.000},
        //		{0.000, 0.000, 1.000, 0.000, 0.000 ,0.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{0.000, 0.000, 0.000, 1.000, 0.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{0.000, 0.000, 0.000, 0.000, 1.000, 0.000},
        //		{2.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{3.000, 1.000, 0.000, 0.000, 0.000, 1.000}
        //
        //};
        //
        //
        //
        //   double[][] A
        //= new double[][] {
        //		{4.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{3.000, 0.000, 0.000, 0.000, 0.000, 1.000},
        //		{1.000, 2.000, 0.000, 0.000, 0.000 ,0.000},
        //		{3.000, 3.000, 0.000, 2.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 1.000, 0.000, 1.000},
        //		{1.000, 0.000, 0.000, 0.000, 0.000 ,0.000},
        //		{1.000, 0.000, 0.000, 2.000, 0.000 ,0.000},
        //		{1.000, 2.000, 0.000, 1.000, 0.000, 1.000}};
        //
        //*/
        ////double [][] A = new double[][] {{1,1,1},{-1,3,1}};
        //
        ////double [][] A = new double [][] {{0.5, 0.5,-0.5},{0,0,-2},{1.5, -0.5,4.5}};
        //
        //rr= A.length;
        //cc = A[0].length;
        //
	    ////int n = a1.length;
    	////double[][] A = new double[n][n];
        //
        ////double[][] A = { { 1, 2,3, 1,2,2}, { 4,5,6,2,1,2 }, {2,1,3,1,1,3}, {1,1,0,1,1,5}, {2,1,0,0,1,3}};
        //
        //int n = A.length;
        //
        //
        //
        //double[][] AT = transpose(A);
        //double [][] AAT = matrixMultiplication(AT, A); // a*at
        //// double[][] aat = (double[][])AAT.clone();
        //printM(AAT);
        //
        //n = AAT.length;
        //
        //
        //double[][] I = new double[n][n];
        //q = new double[n][];
        //
        //
        //
    	//double[][] o = (double[][])A.clone();
    	//int i = 0;
        //for(i = 0; i < n; i++)
	    //    for(int j = 0; j < n; j++)
	    //    	I[i][j] = (i == j) ? 1 : 0;
        //
        //System.out.println("Starting Matrix");
        //printM(A);
        //
        //int j = 0;
        //A = lanczos(AAT);
        //
        //double[][] l = (double[][])A.clone();
        //
        ////dsfsdfsdfsdfsd
        //System.out.println(" ");
        //System.out.println("Generating a tri-diagonal matrix");
        //printM(A);
        //System.out.println(" ");
        //
        //double[][][] qrArrays;
        //boolean iteration= true;
        //i = 0;
        //double count =0;
        //
        //while(iteration && count <30)
        //{
        //	qrArrays = qRDecompose(A);
        //    A = matrixMultiplication(qrArrays[1],qrArrays[0]);
        //
        //    if(checkSubDiagonal(A))
        //    {
        //    	iteration = false;
        //    }
        //    count ++;
        //}
        //
        //System.out.println(" ");
        //System.out.println("QR factoriztion");
        //printM(A);
        ////don't put .3f in calculating eigen
        //double [] eigen = new double[n];
        //
        //for(i=0; i1; out--)
        //{
        //	 for(int in=0; in sen = getSentences(r.data,2);
		//System.out.println("Summary: ");
		//printSentences(sen);
        //
        //
		////System.out.println("Multiplication of three matrices above");
		////Matrix out = left.times(singular).times(r);
		////out.show();
        
        self::makeIgnorelists();
		self::makeFinalWSMatrix();
		$A = self::copy(self::$matrix);
        $ev = array();
        self::$rr = count($A);
        self::$cc = count($A[0]);
        $n = count($A);
        $AT = self::transpose($A);
        $AAT = self::matrixMultiplication($AT, $A); // a*at
        self::printM($AAT);
        $n = count($AAT);
        $I = array();
        self::$q = array();
   	    $o = self::copy($A);
    	$i = 0;
        for ($i = 0; $i < $n; $i++) {
	        for ($j = 0; $j < $n; $j++) {
	        	$I[$i][$j] = ($i == $j) ? 1 : 0;
            }
        }
        print("Starting Matrix\n");
        self::printM($A);
        $j = 0;
        $A = self::lanczos($AAT);
        $l = self::copy($A);
        print(" \n");
        print("Generating a tri-diagonal matrix\n");
        self::printM($A);
        print(" \n");
        $qrArrays = array();
        $iteration = true;
        $i = 0;
        $count = 0.0;
        while ($iteration && $count < 30) {
        	$qrArrays = self::qRDecompose($A);
            $A = self::matrixMultiplication($qrArrays[1], $qrArrays[0]);
            if (self::checkSubDiagonal($A)) {
            	$iteration = false;
            }
            $count++;
        }
        print(" \n");
        print("QR factoriztion\n");
        self::printM($A);
        //don't put .3f in calculating eigen
        $eigen = array();
        for ($i = 0; $i < $n; $i++) {
        	for ($j = 0; $j < $n; $j++) {
        		if ($i == $j) {
        			if (abs($A[$i][$j]) < floatval(0.001)) {
                        $A[$i][$j] = 0;
                    }
        			$eigen[$i] = $A[$i][$j];
        		}
        	}
        }
        //sorting
        for ($out = count($eigen) - 1; $out > 1; $out--) {
        	 for ($in = 0; $in < $out; $in++) {
        	     if (abs($eigen[$in]) < abs($eigen[$in + 1])) {
        	    	 $temp = $eigen[$in];
        	    	 $eigen[$in] = $eigen[$in + 1];
        	    	 $eigen[$in + 1] = $temp;
        	      }
            }
        }
        for ($i = 0; $i < $n; $i++) {
            print(number_format($eigen[$i], 3));
        }
        print("Eigenvalues sorted\n");
        for ($i = 0; $i < $n; $i++) {
        	print(sqrt($eigen[$i]) . "  ");
        }
        $x = self::copy($l);
        for ($k = 0; $k < $n; $k++) {
        	for ($i = 0; $i < $n; $i++) {
        		for ($j = 0; $j < $n; $j++)  {
                    if ($i == $j) {
        				$x[$i][$j] = $x[$i][$j] - $eigen[$k];
        			}
        		}
            }
        	print("\n");
            $d = new Matrix($x);
     		$ff = self::inverse($d->data);
  			$fm = new Matrix($ff);
     		$fs = $fm->getNorm();
            $ev[] = $fs;
            $x = self::copy($l);
        }
        $eigenvectors = self::getV($ev);
        print("eigenvvvvvv\n");
        $eigenvectors->show();
        // transpose so that q can be in column vector,
        //right now it's horizontal
        $l_temp = new Matrix(self::$q);
        $lancvectors = $l_temp->transpose();
        $lancvectors->getRidOfNegativeZero();
        print("\n");
        $lancvectors->show();
        print("Vector U\n");
        $left = $lancvectors->times($eigenvectors->cClone());
        $left->show();
        print("Vector S\n");
        $singular = self::getS($eigen);
        $singular->show();
        print("Transpose of vector V\n");
        $a = new Matrix($o);
        $at = $a->transpose();
        $r = $at->times($left);
        $r = $r->getMultiNorm();
        $r = self::getTransposeOfV($r);
	    $r->show();
	    $sen = self::getSentences($r->data, 2);
   	    print("Summary: \n");
        self::printSentences($sen);
    }
    public static function checkSubDiagonal($A)
    {
    	//for(int i=0; i < A.length; i++)
    	//{
    	//	for(int j=i+1; j  0.001D)
        //
    	//		//if(Double.parseDouble(String.format("%.2f",A[j][i])) !=0)
    	//		return false;
    	//	}
    	//}
        //
    	//return true;
        for ($i = 0; $i < count($A); $i++) {
    		for ($j= $i + 1; $j < count($A); $j++) {
                    if (abs($A[$j][$i]) > floatval("0.001")) {
                        //if(Double.parseDouble(String.format("%.2f",A[j][i])) !=0)
                        return false;
                    }
                }
    	}
    	return true;
    }
    public static function getTransposeOfV($V)
	{
        //return V.transpose();
		return $V->transpose();
	}
    public static function getS($eigen)
    {
	    //double[][] S = new double[eigen.length][eigen.length];
	    //for(int i=0; i < eigen.length; i++)
		//{
		//   double val = Math.abs(eigen[i]);
		//   for(int j=0; j mitr = ev.listIterator();
        //
		//Matrix V = new Matrix(ev.size(), ev.size());
		//int j=0;
		//while(mitr.hasNext())
		//{
		//	Matrix e = mitr.next();
		//	for(int i=0;idata[$i][$j] = $e->data[$i][0];
			}
			$j++;
        }
		return $V;
	}
    public static function qRDecompose($M)
    {
        //double[][][] arrays = new double[2][][];
        //
        //int n = M.length;
        //double[][] A = (double[][]) M.clone();
        //double[][] Q = new double[n][];
        //double[][] R = new double[n][n];
        //
        //int i = 0;
        //int j = 1;
        //while(i-1)
        //    {
        //        while(k -1) {
            while ($k < $m) {
                $sum = $sum + $rm[$k][$i] * $x[$k];
                $k++;
            }
            $x[$i] = ($b[$i] - $sum) / $rm[$i][$i];
            $k = $i;
            $i = $i - 1;
            $sum = floatval("0.0");
        }
        return $x;
    }
    public static function transpose($M)
    {
        //int n = M.length;
        //int m = M[0].length;
        //double[][] A = new double[m][n];
        //
        //for (int i = 0; i < n; i++)
        //    for (int j = 0; j < m; j++)
        //        A[j][i] = M[i][j];
        //return A;
        $n = count($M);
        $m = count($M[0]);
        $A = array();
        for ($i = 0; $i < $n; $i++) {
            for ($j = 0; $j < $m; $j++) {
                $A[$j][$i] = $M[$i][$j];
            }
        }
        return $A;
     }
    public static function vectorPlus($u, $v)
    {
        //int n = u.length;
        //double[] A = new double[n];
        //for(int i=0; i list = new LinkedList();
        //
    	//int j=0;
    	//for(int i=0; i < num; i++)
    	//{
        //
    	//	max = M[i][j];
    	//	for(int k = j+1; k < M[0].length; k++)
    	//	{
    	//		 if(Math.abs(M[i][k]) > Math.abs(max))
    	//		 {
    	//			 j=k;
    	//			 max = M[i][k];
        //
    	//		 }
    	//	}
    	//	list.add(new Integer(j));
    	//	j=0;
    	//}
        //
    	//return list;
        $max = 0.0;
    	$list = array();
    	$j = 0;
    	for ($i = 0; $i < $num; $i++) {
    		$max = $M[$i][$j];
    		for ($k = $j + 1; $k < count($M[0]); $k++) {
                if(abs($M[$i][$k]) > abs($max)) {
				    $j = $k;
				    $max = $M[$i][$k];
			    }
            }
    		$list[] = $j;
    		$j = 0;
    	}
    	return $list;
    }
    public static function printSentences($sens)
    {
    	//for(Integer aa : sens)
		//{
		//	//System.out.println(aa.intValue());
    	//	System.out.println(senMatrix[aa.intValue()]);
		//}
        for ($i = 0; $i < count($sens); $i++) {
            print(self::$senMatrix[$sens[$i]] . "\n");
        }
    }
    public static function words($senStrings)
    {
		//words = new HashSet ();
        //
    	// for(int i=0; i < senStrings.length; i++)
    	// {
    	//	  StringTokenizer parser = new StringTokenizer(senStrings[i], " \t\n\r\f.,;:!?'-()");
    	//	  while (parser.hasMoreTokens())
    	//	  {
    	//		  final String currentWord = parser.nextToken();
    	//		  if(!ignores.contains(currentWord))
    	//			  words.add(currentWord);
        //
    	//	  }
        //
    	//}
        self::$words = array();
    	 for ($i = 0; $i < count($senStrings); $i++) {
    		$parser = strtok($senStrings[$i], " \t\n\r\f.,;:!?'-()");
            while ($parser !== false) {
                $currentWord = $parser;
                //this should check for case mb_strtolower()
                if (!in_array($currentWord, self::$ignores, true) &&
                        !in_array($currentWord, self::$words, true)) {
                    self::$words[] = $currentWord;
                }
                $parser = strtok(" \t\n\r\f.,;:!?'-()");
            }
    	 }
    }
	public static function makeWSMatrix($senStrings)
	{
		//SortedSet ss = new TreeSet(words);
		//Object [] slist = ss.toArray();
        //
		//for(int k=0; k < slist.length; k++)
		//{
        //
		//	for(int i=0; i < senStrings.length; i++)
	    //	 {
        //
		//		int count =0;
		//		String line = senStrings[i];
		//		//System.out.println((String)slist[k]);
		//		Pattern hunter = Pattern.compile((String)slist[k]);
		//		 Matcher fit = hunter.matcher(line);
		//		while(fit.find())
		//		{
		//			count++;
		//		}
	    //		 //System.out.println(count+"");
		//		matrix[k][i]= count;
	    //	 }
        //
        //
        //
		//}
        $slist = self::$words;
        asort($slist);
		//for ($k = 0; $k < count($slist); $k++) {
        $k = 0;
                foreach ($slist as $item) {
			for ($i = 0; $i < count($senStrings); $i++) {
				$line = $senStrings[$i];
				$count = preg_match_all("/" . $item . "/u",
                    $line, $matches);
	    		self::$matrix[$k][$i] = $count;
	    	 }
                 $k = $k + 1;
		}
	}
	//fills in the senMatrix string array and senten linked list
	private static function countLines($line)
	{
		  //String[] sentences = line.split("[.?!]+\\s*");
		  ///*
		  //for(String a : sentences){
		  //  System.out.println(a);
		  //}
		  //*/
		  //Pattern pat = Pattern.compile("[.?!]+\\s*");
		  //Matcher mat = pat.matcher(line);
          //
		  //  int start = 0, end = 0;
		  //  while(mat.find()) {
		  //    start = end;
		  //    end = mat.end();
		  //    senten.add(line.substring(start, end));
		  //  }
		  //senMatrix = new String[senten.size()];
		  //int in=0;
		  //for(String s: senten)
		  //{
		  //	  senMatrix[in] = s;
		  //	  in++;
		  //}
        self::$senten = preg_split(
            '/[.?!]+\\s*/ui',
            $line, 0, PREG_SPLIT_NO_EMPTY);
        self::$senMatrix = self::$senten;
	}
	public static function makeFinalWSMatrix()
	{
		//BufferedReader r = null;
		// String thisLine;
   	  	//try {
		//	r = new BufferedReader(new FileReader(".\\a.txt"));
   	  	//} catch (FileNotFoundException e1) {
   		//   e1.printStackTrace();
   	  	//}
   	  	//String lines ="";
   	  	//try
   	  	//{
   	  	//	while ((thisLine = r.readLine()) != null)
   	  	//	{
   	  	//		lines = lines+thisLine;
		//    }
   	    //}catch (IOException e)
   	    //{
   	    //	e.printStackTrace();
   	    //}
   	    //countLines(lines);
   	    ////System.out.println(senMatrix.length+"");
   	    //words(senMatrix);
   	    ////System.out.println(words.size()+"");
   	    //SortedSet ss = new TreeSet(words);
   	    //Object [] slist = ss.toArray();
        //
   	    ///*
   	    //for(Object a: slist)
   	    //{
   	    //	System.out.println(a);
   	    //}
		//*/
   	    //matrix = new double[words.size()][senMatrix.length];
   	    //makeWSMatrix(senMatrix);
        //
   	    //for(int m =0; m ();
		//BufferedReader r = null;
		//String thisLine;
  	  	//try {
		//	r = new BufferedReader(new FileReader(".\\ignore.txt"));
  	  	//} catch (FileNotFoundException e1) {
  		//   e1.printStackTrace();
  	  	//}
  	    //String lines ="";
  	  	//try
  	  	//{
  	  	//	while ((thisLine = r.readLine()) != null)
  	  	//	{
  	  	//		ignores.add(thisLine);
		//    }
  	    //}catch (IOException e)
  	    //{
  	    //	e.printStackTrace();
  	    //}
        
        //we need these to get the stop words based on the locale
        $r = file("c:/temp/ignore.txt");
        for ($i = 0; $i < count($r); $i++) {
            $thisLine = $r[$i];
            self::$ignores[] = trim($thisLine);
        }
       /* self::$ignores = ['a','able','about','above','abst',
        'accordance','according','based','accordingly','across','act',
        'actually','added','adj','affected','affecting','affects','after',
        'afterwards','again','against','ah','all','almost','alone','along',
        'already','also','although','always','am','among','amongst','an','and',
        'announce','another','any','anybody','anyhow','anymore','anyone',
        'anything','anyway','anyways','anywhere','apparently','approximately',
        'are','aren','arent','arise','around','as','aside','ask','asking','at',
        'auth','available','away','awfully','b','back','be','became','because',
        'become','becomes','becoming','been','before','beforehand','begin',
        'beginning','beginnings','begins','behind','being','believe','below',
        'beside','besides','between','beyond','biol','both','brief','briefly',
        'but','by','c','ca','came','can','cannot','cant','cause','causes',
        'certain','certainly','co','com','come','comes','contain','containing',
        'contains','could','couldnt','d','date','did','didnt',
        'different','do','does','doesnt','doing',
        'done','dont','down','downwards',
        'due','during','e','each','ed','edu','effect','eg','eight','eighty',
        'either','else','elsewhere','end',
        'ending','enough','especially','et',
        'et-al','etc','even','ever','every',
        'everybody','everyone','everything'
        ,'everywhere','ex','except','f','far','few','ff','fifth','first',
        'five','fix','followed','following','follows','for','former',
        'formerly','forth','found','four','from','further','furthermore',
        'g','gave','get','gets','getting','give','given','gives','giving','go',
        'goes','gone','got','gotten','h','had','happens','hardly','has','hasnt',
        'have','havent','having','he','hed','hence','her','here','hereafter',
        'hereby','herein','heres','hereupon','hers','herself','hes','hi','hid',
        'him','himself','his','hither','home','how','howbeit',
        'however', 'http', 'https', 'hundred','i','id','ie','if','ill',
        'im','immediate','immediately',
        'importance','important','in','inc','indeed','index','information',
        'instead','into','invention','inward','is','isnt','it','itd','itll',
        'its','itself','ive','j','just','k','keep','keeps',
        'kept','kg','km','know',
        'known','knows','l','largely','last','lately',
        'later','latter','latterly',
        'least','less','lest','let','lets','like','liked','likely','line',
        'little','ll','look','looking','looks','ltd','m','made','mainly','make',
        'makes','many','may','maybe','me','mean','means','meantime','meanwhile',
        'merely','mg','might','million','miss','ml','more','moreover','most',
        'mostly','mr','mrs','much','mug','must','my','myself','n','na','name',
        'namely','nay','nd','near','nearly','necessarily','necessary','need',
        'needs','neither','never','nevertheless','new','next',
        'nine','ninety','no',
        'nobody','non','none','nonetheless','noone',
        'nor','normally','nos','not',
        'noted','nothing','now','nowhere','o','obtain',
        'obtained','obviously','of',
        'off','often','oh','ok','okay','old','omitted','on','once','one','ones',
        'only','onto','or','ord','other','others',
        'otherwise','ought','our','ours',
        'ourselves','out','outside','over','overall','owing','own','p','page',
        'pages','part','particular','particularly',
        'past','per','perhaps','placed',
        'please','plus','poorly','possible','possibly','potentially','pp',
        'predominantly','present','previously',
        'primarily','probably','promptly',
        'proud','provides','put','q','que','quickly','quite','qv','r','ran',
        'rather','rd','re','readily','really','recent','recently','ref','refs',
        'regarding','regardless','regards','related','relatively','research',
        'respectively','resulted','resulting',
        'results','right','run','s','said',
        'same','saw','say','saying','says','sec',
        'section','see','seeing','seem',
        'seemed','seeming','seems',
        'seen','self','selves','sent','seven','several',
        'shall','she','shed','shell',
        'shes','should','shouldnt','show','showed','shown','showns','shows',
        'significant','significantly','similar','similarly','since',
        'six','slightly',
        'so','some','somebody','somehow','someone','somethan',
        'something','sometime',
        'sometimes','somewhat','somewhere','soon',
        'sorry','specifically','specified',
        'specify','specifying','still','stop','strongly','sub','substantially',
        'successfully','such','sufficiently','suggest','sup','sure','t','take',
        'taken','taking','tell','tends','th','than',
        'thank','thanks','thanx','that',
        'thatll','thats','thatve','the','their',
        'theirs','them','themselves','then',
        'thence','there','thereafter','thereby','thered','therefore','therein',
        'therell','thereof','therere','theres','thereto','thereupon','thereve',
        'these','they','theyd','theyll','theyre',
        'theyve','think','this','those',
        'thou','though','thoughh','thousand','throug',
        'through','throughout','thru',
        'thus','til','tip','to','together','too',
        'took','toward','towards','tried',
        'tries','truly','try','trying','ts','twice','two','u','un','under',
        'unfortunately','unless','unlike','unlikely','until','unto','up','upon',
        'ups','us','use','used','useful','usefully','usefulness','uses','using',
        'usually','v','value','various','ve','very',
        'via','viz','vol','vols','vs',
        'w','want','wants','was','wasnt','way','we',
        'wed','welcome','well','went',
        'were','werent','weve','what','whatever',
        'whatll','whats','when','whence',
        'whenever','where','whereafter','whereas','whereby','wherein','wheres',
        'whereupon','wherever','whether','which','while','whim','whither','who',
        'whod','whoever','whole','wholl','whom','whomever','whos','whose','why',
        'widely','willing','wish','with','within',
        'without','wont','words','world',
        'would','wouldnt','www','x','y','yes','yet','you','youd','youll','your',
        'youre','yours','yourself','yourselves','youve','z','zero'];*/
	}
}