<>

A basic MediaWiki Parser Using JavaScript

<> Implemented a wiki to HTML parser to display the html counter part for the wiki content retrieved from the web service response.

Below is a demo & code used to parse wiki content into HTML on the client side.


CS298 Deliverable 3 : Wiki Parser Demo


Code snippet

/** * This is a JS function to convert yioop wiki markup to * html. * @param {String} wiki_text tobe parsed as HTML * @returns {String} parsed html */ function parseWikiContent(wiki_text, group_id, page_id) { var html = wiki_text; //note that line breaks from a text area are sent //as \r\n , so make sure we clean them up to replace //all \r\n with \n html = html.replace(/\r\n/g, "\n"); html = parseLists(html); //Regex replace normal links html = html.replace(/[^\[](http[^\[\s]*)/g, function (m, l) { // normal link return '<a href="' + l + '">' + l + '</a>'; }); //Regex replace for external links html = html.replace(/[\[](http.*)[!\]]/g, function (m, l) { // external link var p = l.replace(/[\[\]]/g, '').split(/ /); var link = p.shift(); return '<a href="' + link + '">' + (p.length ? p.join(' ') : link) + '</a>'; }); // Basic MediaWiki Syntax. // Replace newlines with <br /> html = html.replace(/\n/gi, "<br />"); //Regex replace for headings html = html.replace(/(?:^|\n)([=]+)(.*)\1/g, function (match, contents, t) { return '<h' + contents.length + '>' + t + '</h' + contents.length + '>'; }); //Regex replace for Bold characters html = html.replace(/'''(.*?)'''/g, function (match, contents) { return '<b>' + contents + '</b>'; }); //Regex replace for Italic characters html = html.replace(/''(.*?)''/g, function (match, contents) { return '<i>' + contents + '</i>'; }); //Regex for resource extraction. html = html.replace(/{{resource:(.+?)\|(.+?)}}/g, function (match, contents, desc) { return '<img src="' + "?c=resource&a=get&f=resources&g=" + group_id + "&p=" + page_id + "&n=" + contents + '" alt="' + desc + '" class="wiki-resource-image"/>'; }); //Regex replace for HR html = html.replace(/----(.*?)/g, function (match, contents) { return '<hr>' + contents + '</hr>'; }); //Regex replace for blocks html = html.replace(/(?:^|\n+)([^# =\*<].+)(?:\n+|$)/gm, function (match, contents) { if (contents.match(/^\^+$/)) return contents; return "\n<div>" + contents + "</div>\n"; }); return html; } /** * Lists need to be recursively parsed. So the below function is used * to recursively convert wiki markup to html. * @param {String} str * @returns {String} */ function parseLists(str) { return str.replace(/(?:(?:(?:^|\n)[\*#].*)+)/g, function (match) { var listType = match.match(/(^|\n)#/) ? 'ol' : 'ul'; match = match.replace(/(^|\n)[\*#][ ]{0,1}/g, "$1"); match = parseLists(match); return '<' + listType + '><li>' + match.replace(/^\n/, '') .split(/\n/).join('</li><li>') + '</li></' + listType + '>'; }); }