A basic MediaWiki Parser Using JavaScript

<>
<h1>A basic MediaWiki Parser Using JavaScript</h1>
<>

Implemented a wiki to HTML parser to display the html counter part for the wiki content retrieved from the web service response.<br />

<p>Below is a demo & code used to parse wiki content into HTML on the client side.</p>
<pre>

<fieldset style="display: inline-block;">
<legend>CS298 Deliverable 3 : Wiki Parser Demo</legend>
 <textarea id="wikitext" cols="20" rows="10"><u>sdfsd</u>
'''fdssdfsd'''
<s>sdfsdfsdf</s>
[http://yioop.com Yioop]</textarea>
<br>
<input id="btnsave" type="button" value="Preview HTML" />
<br>
<hr>
<div id="html"></div>
<script src="http://www.cs.sjsu.edu/faculty/pollett/masters/Semesters/Spring14/eswara/cs298/deliverable3/wikiparser.js"></script>

</fieldset>

<p> Code snippet</p>

/**
 * This is a JS function to convert yioop wiki markup to
 * html.
 * @param {String} wiki_text tobe parsed as HTML
 * @returns {String} parsed html
 */
function parseWikiContent(wiki_text, group_id, page_id)
{
    var html = wiki_text;
    
    //note that line breaks from a text area are sent
    //as \r\n , so make sure we clean them up to replace
    //all \r\n with \n
    html = html.replace(/\r\n/g, "\n");

    html = parseLists(html);

    //Regex replace normal links
    html = html.replace(/[^\[](http[^\[\s]*)/g, function (m, l) {
    // normal link
        return '&lt;a href="' + l + '"&gt;' + l + '&lt;/a&gt;';
    });

    //Regex replace for external links
    html = html.replace(/[\[](http.*)[!\]]/g, function (m, l) {
    // external link
        var p = l.replace(/[\[\]]/g, '').split(/ /);
        var link = p.shift();
        return '&lt;a href="' + link + '"&gt;'
        + (p.length ? p.join(' ') : link) + '&lt;/a&gt;';
    });

    // Basic MediaWiki Syntax.
    // Replace newlines with &lt;br /&gt;
    html = html.replace(/\n/gi, "&lt;br /&gt;");

    //Regex replace for headings
    html = html.replace(/(?:^|\n)([=]+)(.*)\1/g,
            function (match, contents, t) {
                return '&lt;h'
                        + contents.length + '&gt;'
                        + t
                        + '&lt;/h'
                        + contents.length
                        + '&gt;';
            });

    //Regex replace for Bold characters
    html = html.replace(/'''(.*?)'''/g, function (match, contents) {
        return '&lt;b&gt;' + contents + '&lt;/b&gt;';
    });

    //Regex replace for Italic characters
    html = html.replace(/''(.*?)''/g, function (match, contents) {
        return '&lt;i&gt;' + contents + '&lt;/i&gt;';
    });

    //Regex for resource extraction.
    html = html.replace(/{{resource:(.+?)\|(.+?)}}/g,
            function (match, contents, desc) {
                return '&lt;img src="' + "?c=resource&a=get&f=resources&g="
                + group_id
                + "&p=" + page_id
                + "&n=" + contents + '" alt="' + desc
                + '" class="wiki-resource-image"/&gt;';
            });

    //Regex replace for HR
    html = html.replace(/----(.*?)/g, function (match, contents) {
        return '&lt;hr&gt;' + contents + '&lt;/hr&gt;';
    });

    

    //Regex replace for blocks
    html = html.replace(/(?:^|\n+)([^# =\*&lt;].+)(?:\n+|$)/gm,
            function (match, contents) {
                if (contents.match(/^\^+$/))
                    return contents;
                return "\n&lt;div&gt;" + contents + "&lt;/div&gt;\n";
            });

    return html;
}

/**
 * Lists need to be recursively parsed. So the below function is used
 * to recursively convert wiki markup to html.
 * @param {String} str
 * @returns {String}
 */
function parseLists(str)
{
    return str.replace(/(?:(?:(?:^|\n)[\*#].*)+)/g, function (match) {
        var listType = match.match(/(^|\n)#/) ? 'ol' : 'ul';
        match = match.replace(/(^|\n)[\*#][ ]{0,1}/g, "$1");
        match = parseLists(match);
        return '&lt;'
                + listType + '&gt;&lt;li&gt;'
                + match.replace(/^\n/, '')
                .split(/\n/).join('&lt;/li&gt;&lt;li&gt;')
                + '&lt;/li&gt;&lt;/' + listType
                + '&gt;';
    });
}
</pre>