/* --------------------------------------------------
	Title:							Bulletin Board and Basic HTML comment filter
	Description:					Filters comments allowing only valid HTML tags and converts BBCode to HTML
	Author:							Colin Nolan with the BBC2HTML method used on GPL from http://ufku.com/personal/bbc2html
	Created:						13/06/10
	
	
	The bb_converter Object:
		allowed_tags				Array holding all tags allowed to be used by user
		disabled_properties			Properties (of tags) not allowed to be used by user
	
		filter						Method to be called which pushes string through filters and then returns the end result
			comment					String (comment)
		
		BBC2HTML					Converts BBcode to basic HTML. Written by the guys from http://ufku.com/
			S						String
		
		checkTags					Ensures all tags, if opened are closed. (Closes valid tags only as invalid ones are replaced with charactor entities)
			str						String
		
		filterTags					Removes invalid tags and any invalid properties they may have. Also stops XSS attacks
			str						String

		
	Usage:
		bb_converter.filter(comment);
		
	
	Note:
		The bb_converter is to be used in conjunction with the offensive word filter and with a measure to stop excessivly large elements (eg images) been set.
		
*/




// The bb_converter object
var bb_converter = new Object();

// Settings --------------------
// Tags allowed in comments 
bb_converter.allowed_tags  = ["img", "code", "pre", "div", "a", "h1", "h2", "h3", "h4", "h5", "h6", "br", "b", "i", "u", "s", "blockquote", "font", "ul", "li"];

// Properties of tags that are not allowed
bb_converter.disabled_properties =
[
	// Onload events
	"onload", "onblur", "onchange", "onclick", "ondblclick", "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onresize", "onselect", "onunload",
	// Style attributes
	"position", "margin", "margin-top", "margin-left", "margin-right", "margin-bottom", "padding", "padding_top", "padding_left", "padding_right", "padding_bottom", "top", "left", "right",
	// Misc (not really a property but can be found like one due to implementation "javascript:" like "position:"
	"javascript"
];




// Method to be called with comment - coordinates all filters and returns the end result --------------------
bb_converter.filter = function(comment)
{
	var str = bb_converter.BBC2HTML(comment);
		str = bb_converter.checkTags(str);
		str = bb_converter.filterTags(str);
	return str;
};




// Bulletin board to HTML converter - used on GPL from http://ufku.com/ --------------------
bb_converter.BBC2HTML = function(S)
{
	if (S.indexOf('[') < 0) return S;
	
	function X(p, f) {return new RegExp(p, f)}
	function D(s) {return rD.exec(s)}
	function R(s) {return s.replace(rB, P)}
	function A(s, p) {for (var i in p) s = s.replace(X(i, 'g'), p[i]); return s;}
	
	function P($0, $1, $2, $3)
	{
		if ($3 && $3.indexOf('[') > -1) $3 = R($3);
		switch ($1) {
			case 'url':case 'anchor':case 'email': return '<a '+ L[$1] + ($2||$3) +'">'+ $3 +'</a>';
			case 'img': var d = D($2); return '<img src="'+ $3 +'"'+ (d ? ' width="'+ d[1] +'" height="'+ d[2] +'"' : '') +' alt="'+ (d ? '' : $2) +'" />';
			case 'flash':case 'youtube': var d = D($2)||[0, 425, 366]; return '<object type="application/x-shockwave-flash" data="'+ Y[$1] + $3 +'" width="'+ d[1] +'" height="'+ d[2] +'"><param name="movie" value="'+ Y[$1] + $3 +'" /></object>';
			case 'float': return '<span style="float: '+ $2 +'">'+ $3 +'</span>';
			case 'left':case 'right':case 'center':case 'justify': return '<div style="text-align: '+ $1 +'">'+ $3 +'</div>';
			case 'google':case 'wikipedia': return '<a href="'+ G[$1] + $3 +'">'+ $3 +'</a>';
			case 'b':case 'i':case 'u':case 's':case 'sup':case 'sub':case 'h1':case 'h2':case 'h3':case 'h4':case 'h5':case 'h6':case 'table':case 'tr':case 'th':case 'td': return '<'+ $1 +'>'+ $3 +'</'+ $1 +'>';
			case 'row': case 'r':case 'header':case 'head':case 'h':case 'col':case 'c': return '<'+ T[$1] +'>'+ $3 +'</'+ T[$1] +'>';
			case 'acronym':case 'abbr': return '<'+ $1 +' title="'+ $2 +'">'+ $3 +'</'+ $1 +'>';
		}
		return '['+ $1 + ($2 ? '='+ $2 : '') +']'+ $3 +'[/'+ $1 +']';
	}
	
	var rB = X('\\[([a-z][a-z0-9]*)(?:=([^\\]]+))?]((?:.|[\r\n])*?)\\[/\\1]', 'g'), rD = X('^(\\d+)x(\\d+)$');
	var L = {url: 'href="', 'anchor': 'name="', email: 'href="mailto: '};
	var G = {google: 'http://www.google.com/search?q=', wikipedia: 'http://www.wikipedia.org/wiki/'};
	var Y = {youtube: 'http://www.youtube.com/v/', flash: ''};
	var T = {row: 'tr', r: 'tr', header: 'th', head: 'th', h: 'th', col: 'td', c: 'td'};
	var C = {notag: [{'\\[': '&#91;', ']': '&#93;'}, '', ''], code: [{'<': '&lt;'}, '<code><pre>', '</pre></code>']};
		C.php = [C.code[0], C.code[1]+ '&lt;?php ', '?>'+ C.code[2]];
	var F = {font: 'font-family:$1', size: 'font-size:$1px', color: 'color:$1'};
	var U = {c: 'circle', d: 'disc', s: 'square', '1': 'decimal', a: 'lower-alpha', A: 'upper-alpha', i: 'lower-roman', I: 'upper-roman'};
	var I = {}, B = {};
	
	for (var i in C) I['\\[('+ i +')]((?:.|[\r\n])*?)\\[/\\1]'] = function($0, $1, $2) {return C[$1][1] + A($2, C[$1][0]) + C[$1][2]};
	for (var i in F) {B['\\['+ i +'=([^\\]]+)]'] = '<span style="'+ F[i] +'">'; B['\\[/'+ i +']'] = '</span>';}
	B['\\[list]'] = '<ul>'; B['\\[list=(\\w)]'] = function($0, $1) {return '<ul style="list-style-type: '+ (U[$1]||'disc') +'">'}; B['\\[/list]'] = '</ul>'; B['\\[\\*]'] = '<li>';
	B['\\[quote(?:=([^\\]]+))?]'] = function($0, $1) {return '<div>'+ ($1 ? $1 +' wrote' : 'Quote') +':<blockquote>'}; B['\\[/quote]'] = '</blockquote></div>';
	B['\\[(hr|br)]'] = '<$1 />'; B['\\[sp]'] = '&nbsp;';
	
	return R(A(A(S, I), B));
};




// Ensures all valid tags are closed if opened --------------------
bb_converter.checkTags = function(str)
{
	// Tags which "self-close" ie <tag/> is valid
	var selfClosingTags = ["img", "input", "br", "hr"];
	
	// Stores all tags not yet closed in string
	var openTags = new Array();
	
	// Deals with break tags as <br/> not <br>
	str = str.replace(/<br>/gi, "<br\/>");
		
	for (var i = 0 ; i < str.length ; i++)	{
		if (str.charAt(i) == "<")	{
			var tagEnd = str.indexOf(">", i);
			if (tagEnd == -1)						// No ending tag so not real tag
				break;
			var closingTag = str.charAt(i + 1) == "\/";
			var selfClose = str.charAt(tagEnd - 1) == "/";
			var tag = str.substring(i + 1 + +closingTag, tagEnd - +selfClose).toLowerCase();
				tag = tag.indexOf(" ") != -1 ? tag.substring(0, tag.indexOf(" ")) : tag;
			if (selfClose)	{
				// Tag identified as self closing
				var selfClosing = false;
				// Tag has been self closed
				for (var m = 0 ; m < selfClosingTags.length ; m++)	{
					if (tag == selfClosingTags[m])	{
						// Tag is self closing
						selfClosing = true;
						break;
					}
				}
				if (selfClosing)	{
					// Does not perform further checks as tag is self closing
					continue;
					i = tagEnd + 1;
				}
				else
					// Removes self close as not valid
					str = str.substr(0, tagEnd - 1) + str.substring(tagEnd);
			}
				
			if (tagEnd == -1 || str.charAt(i + 1) == " ")	{
				// The tag wasn't closed at end of string so escapes tag or space after tag open hence not tag (prehaps operator)
				str = str.substr(0, i) + "&#60;" + str.substr(i + 1, str.length);
				i += 5;
				continue;
			}
					
			if (closingTag)	{
				// Checks that the close tag was even opened. If not, opens immediately before
				var pairFound = false;
				for (var j = 0 ; j < openTags.length ; j++)	{
					if (openTags[j] == tag)	{
						openTags.splice(j, 1);
						pairFound = true;
						break;
					}
				}
				if (!pairFound)	{
					// Adds opening to a close pair tag without an open
					str = str.substr(0, i) + "<" + tag + ">" + str.substring(i);
					i = tagEnd + tag.length + 2;
				}
			}
			else
				// Stores the tag as open
				openTags.push(tag);
		}
	}
		
	if (openTags.length != 0)
		// Closes all (accepted) tags left open
		for (var k = openTags.length - 1 ; k >= 0 ; k--)
			for (var m = 0 ; m < bb_converter.allowed_tags.length ; m++)
				if (bb_converter.allowed_tags[m] == openTags[k])					// Only closes valid tags
					str += "<\/"+ openTags[k] +">";
	return str;
};




// Filters disallowed tags and properties along with patching XSS loopholes --------------------
bb_converter.filterTags = function(str)
{
	// Only runs if tags other than <br/> exist
	if (str.replace(/<br\/>/gi, "").indexOf("<") == -1)
		return str;

	// Stores locally
	var allowed_tags  = bb_converter.allowed_tags;
	var disabled_properties = bb_converter.disabled_properties;
	
	// Checks all tags
	var openTag = j = 0;
	while (j < str.length)	{
		var openTag = str.indexOf("<", j);
		if (openTag == -1)
			break;
		var endTag = str.indexOf(">", openTag);
		var closingTag = str.charAt(openTag + 1) == "\/";
		var selfClose = str.charAt(endTag - 1) == "/";
		var tag = str.substring(openTag + 1 + +closingTag, endTag - +selfClose).toLowerCase();
			tag = tag.indexOf(" ") != -1 ? tag.substring(0, tag.indexOf(" ")) : tag;
		var changed = false;
		
		// Checks if < even represents a tag
		if (endTag == -1)	{
			str = str.substring(0, openTag) + "&#60;" + str.substring(openTag + 1);
			j++;
			continue;
		}
		
		// Checks if tag is allowed
		var validTag = false;
		for (var i = 0 ; i < allowed_tags.length ; i++)	{
			if (tag == allowed_tags[i])	{
				validTag = true;
				break;
			}
		}
		if (!validTag)	{
			// Makes tag safe and goes to next tag
			str = str.substring(0, openTag) + "&#60;" + str.substring(openTag + 1, endTag) + "&#62;" + str.substring(endTag + 1);
			j = endTag + 2 + tag.length;
			continue;
		}

		// Searches for disabled properties in tag (if not close tag)
		for (var k = 0 ; k < disabled_properties.length && str.charAt(openTag + 1) != "/"; k++)	{
			var property = disabled_properties[k];
			var tagValue = str.substring(openTag + 1 + tag.length, endTag);
			var found = tagValue.indexOf(property);
			if (found != -1)	{
				// Disabled property has been found in the tag. Finds the string to replace.
				var m = 0;
				var invalid = false;
				var equator = "";
				while (m < tagValue.length)	{
					var nextChr = tagValue.charAt(found + property.length + m);
					if (nextChr == "=" || nextChr == ":")	{
						equator += nextChr;
						invalid = true;
						break;
					}
					else if (nextChr == " ")
						equator += " ";
					else
						break;
					m++;
				}
				if (invalid)	{
					// Removes disabled property
					str = str.substr(0, openTag + found + 1 + tag.length) + "_disabled" + str.substring(openTag + found + tag.length + (property + equator).length);
					changed = true;
					break;
				}
			}
		}
		if (!changed)
			// Searches next tag, else rechecks same tag for more disabled properties
			j = endTag + 1;
	}
	return str;
};






