File: src/vdom/partials/html-parser.js

"use strict";

/**
 * Exports `htmlToVNodes` which transforms html-text into vnodes.
 *
 *
 * <i>Copyright (c) 2014 ITSA - https://github.com/itsa</i>
 * <br>
 * New BSD License - http://choosealicense.com/licenses/bsd-3-clause/
 *
 * @module vdom
 * @submodule html-parser
 * @since 0.0.1
*/

require('polyfill');
require('js-ext/lib/object.js');

var createHashMap = require('js-ext/extra/hashmap.js').createMap;

module.exports = function (window) {

    window._ITSAmodules || Object.protectedProp(window, '_ITSAmodules', createHashMap());

    if (window._ITSAmodules.HtmlParser) {
        return window._ITSAmodules.HtmlParser; // HtmlParser was already created
    }

    var NS = require('./vdom-ns.js')(window),
        extractor = require('./attribute-extractor.js')(window),
        DOCUMENT = window.document,
        xmlNS = NS.xmlNS,
        voidElements = NS.voidElements,
        nonVoidElements = NS.nonVoidElements,

        TAG_OR_ATTR_START_CHARACTERS = createHashMap({
            a: true,
            b: true,
            c: true,
            d: true,
            e: true,
            f: true,
            g: true,
            h: true,
            i: true,
            j: true,
            k: true,
            l: true,
            m: true,
            n: true,
            o: true,
            p: true,
            q: true,
            r: true,
            s: true,
            t: true,
            u: true,
            v: true,
            w: true,
            x: true,
            y: true,
            z: true,
            A: true,
            B: true,
            C: true,
            D: true,
            E: true,
            F: true,
            G: true,
            H: true,
            I: true,
            J: true,
            K: true,
            L: true,
            M: true,
            N: true,
            O: true,
            P: true,
            Q: true,
            R: true,
            S: true,
            T: true,
            U: true,
            V: true,
            W: true,
            X: true,
            Y: true,
            Z: true
        }),
        STARTTAG_OR_ATTR_VALUE_ENDS_CHARACTERS = createHashMap({
            ' ': true,
            '>': true,
            '/': true
        }),
        ATTRUBUTE_NAME_ENDS_CHARACTER = createHashMap({
            ' ': true,
            '=': true,
            '>': true
        }),

        /**
         * Transforms html-text into a vnodes-Array.
         *
         * @method htmlToVNodes
         * @param htmlString {String} plain html as string
         * @return {Array} array with `vnodes`
         * @since 0.0.1
         */
        htmlToVNodes = window._ITSAmodules.HtmlParser = function(htmlString, vNodeProto, nameSpace, parentVNode, suppressItagRender, allowScripts) {
            var i = 0,
                vnodes = [],
                insideTagDefinition, insideComment, innerText, endTagCount, stringMarker, attributeisString, attribute, attributeValue, nestedComments,
                len, j, character, character2, vnode, tag, isBeginTag, isEndTag, scriptVNode, extractClass, extractStyle, tagdefinition, is;

            if (typeof htmlString!=='string') {
                htmlString = (htmlString===undefined) ? '' : String(htmlString);
            }
            len = htmlString.length;

            while (i<len) {
                character = htmlString[i];
                character2 = htmlString[i+1];
                if (insideTagDefinition) {

                    vnode.attrs = {};
                    if (character!=='>') {
                        // fill attributes until tagdefinition is over:
                        // NOTE: we need to DOUBLE check for "(character!=='>')" because the loop might set the position to '>' where an i++ would miss it!
                        while ((character!=='>') && (++i<len) && (character=htmlString[i]) && (character!=='>')) {
                            // when starting to read an attribute, finish reading until it is completely ready.
                            // this is, because attributes can have a '>' which shouldn't be noticed as an end-of-tag definition
                            if (TAG_OR_ATTR_START_CHARACTERS[character]) {
                                attribute = character;
                                while ((++i<len) && (character=htmlString[i]) && !ATTRUBUTE_NAME_ENDS_CHARACTER[character]) {
                                    attribute += character;
                                }
                                if (character==='=') {
                                    stringMarker = htmlString[i+1];
                                    attributeisString = (stringMarker==='"') || (stringMarker==="'");

                                    attributeValue = '';
                                    if (attributeisString) {
                                        i++;
                                        while ((++i<len) && (character=htmlString[i]) && ((character!==stringMarker) || (htmlString[i-1]==='\\'))) {
                                            ((htmlString[i+1]!==stringMarker) || (character!=='\\')) && (attributeValue+=character);
                                        }
                                    }
                                    else {
                                        while ((++i<len) && (character=htmlString[i]) && !STARTTAG_OR_ATTR_VALUE_ENDS_CHARACTERS[character]) {
                                            attributeValue += character;
                                        }
                                        // need to set the position one step behind --> the attributeloop will increase it and would otherwise miss a character
                                        i--;
                                    }
                                }
                                else {
                                    attributeValue = "";
                                }
                                // always store the `is` attribute in lowercase:
                                (attribute.length===2) && (attribute.toLowerCase()==='is') && (attribute='is');
                                vnode.attrs[attribute] = attributeValue;
                            }
                        }
                        vnode.id = vnode.attrs.id;

                        extractClass = extractor.extractClass(vnode.attrs['class']);
                        extractClass.attrClass && (vnode.attrs['class']=extractClass.attrClass);
                        vnode.classNames = extractClass.classNames;

                        extractStyle = extractor.extractStyle(vnode.attrs.style);
                        extractStyle.attrStyle && (vnode.attrs.style=extractStyle.attrStyle);
                        vnode.styles = extractStyle.styles;

                        (vnode.attrs.is==='system-node') && (vnode._systemNode=true);
                    }

                    if (!vnode.isVoid) {
                        innerText = '';
                        endTagCount = 1;
                        // fill innerText until end-tagdefinition:
                        while ((endTagCount>0) && (++i<len) && (character=htmlString[i])) {
                            if (character==='<') {
                                if ((character2=htmlString[i+1]) && (character2==='/')) {
                                    // possible end-tag
                                    j = i+1;
                                    isEndTag = true;
                                    while (isEndTag && (++j<len) && (htmlString[j]!=='>')) {
                                        if (htmlString[j].toUpperCase()!==tag[j-i-2]) {
                                            isEndTag = false;
                                        }
                                    }
                                    isEndTag && (endTagCount--);
                                }
                                else {
                                    // possible begin-tag of the same tag (an innertag with the same tagname)
                                    j = i;
                                    isBeginTag = true;
                                    while (isBeginTag && (++j<len) && (character2=htmlString[j]) && (character2!=='>') && (character2!==' ')) {
                                        if (htmlString[j].toUpperCase()!==tag[j-i-1]) {
                                            isBeginTag = false;
                                        }
                                    }
                                    isBeginTag && (endTagCount++);
                                }
                            }
                            if (endTagCount>0) {
                                innerText += character;
                            }
                        }
                        (endTagCount===0) && (i=i+tag.length+3);
                        // in case of 'SCRIPT' or 'STYLE' tags --> just use the innertext, all other tags need to be extracted

                        if (NS.SCRIPT_OR_STYLE_TAG[vnode.tag]) {
                            // CREATE INNER TEXTNODE
                            scriptVNode = Object.create(vNodeProto);
                            scriptVNode.ns = nameSpace;
                            scriptVNode.nodeType = 3;
                            scriptVNode.domNode = DOCUMENT.createTextNode(innerText);
                            // create circular reference:
                            scriptVNode.domNode._vnode = scriptVNode;
                            scriptVNode.text = innerText;
                            scriptVNode.vParent = vnode;
                            vnode.vChildNodes = [scriptVNode];
                        }
                        else {
                            vnode.vChildNodes = (innerText!=='') ? htmlToVNodes(innerText, vNodeProto, vnode.ns, vnode, suppressItagRender, allowScripts) : [];
                        }
                    }
                    else {
                        i++; // compensate for the '>'
                    }

                    // just to be sure there won't be a `script`-tag passed inside the argument (something modern browsers never let happen):
                    (tag==='SCRIPT') && (tag='XSCRIPT');

                    // the string-parser expects </xscript> for `script`-tags
                    if ((tag==='XSCRIPT') && allowScripts) {
                        tagdefinition = 'script';
                        vnode.tag = 'SCRIPT';
                    }
                    else {
                        tagdefinition = tag.toLowerCase();
                        //vnode.domNode can only be set after inspecting the attributes --> there might be an `is` attribute
                        if (vnode.isItag && (is=vnode.attrs.is) && !is.contains('-')) {
                            tagdefinition = tag + '#' + is;
                        }
                    }
                    // cautious: DOCUMENT.createElement(tagdefinition, undefined) will render differently than DOCUMENT.createElement(tagdefinition) -->
                    // it will set the attribute `is="undefined".
                    // therefore the second conditional:
                    vnode.domNode = vnode.ns ? DOCUMENT.createElementNS(vnode.ns, tagdefinition) : (suppressItagRender ? DOCUMENT.createElement(tagdefinition, suppressItagRender) : DOCUMENT.createElement(tagdefinition));
                    // create circular reference:
                    vnode.domNode._vnode = vnode;
                    vnodes[vnodes.length] = vnode;
                    // reset vnode to force create a new one
                    vnode = null;
                    insideTagDefinition = false;
                }

                else if (insideComment) {
                    if (character+character2+htmlString[i+2]+htmlString[i+3]==='<!--') {
                        nestedComments++;
                    }
                    if (character+character2+htmlString[i+2]==='-->') {
                        // should we close  the vnode?
                        nestedComments--;
                        if (nestedComments<0) {
                            // yes close the commentnode
                            // move index to last character of comment
                            i = i+2;
                            vnode.domNode = DOCUMENT.createComment('');
                            // create circular reference:
                            vnode.domNode._vnode = vnode;
                            vnodes[vnodes.length] = vnode;
                            // reset vnode to force create a new one
                            vnode = null;
                            insideComment = false;
                        }
                        else {
                            vnode.text += character;
                        }
                    }
                    else {
                        vnode.text += character;
                    }
                    i++;
                }

                else {
                    // inside TextNode which could go over into an Element or CommentNode
                    if ((character==='<') && TAG_OR_ATTR_START_CHARACTERS[character2] && (htmlString.lastIndexOf('>')>i)) {
                        // begin of opening Element
                        // first: store current vnode:
                        if (vnode) {
                            vnode.domNode = DOCUMENT.createTextNode('');
                            // create circular reference:
                            vnode.domNode._vnode = vnode;
                            vnodes[vnodes.length] = vnode;
                        }
                        vnode = Object.create(vNodeProto);
                        vnode.ns = nameSpace;
                        vnode.nodeType = 1;
                        vnode.vParent = parentVNode;
                        vnode.tag = '';
                        vnode.classNames ={};

                        // find tagname:
                        while ((++i<len) && (character=htmlString[i]) && (!STARTTAG_OR_ATTR_VALUE_ENDS_CHARACTERS[character])) {
                            vnode.tag += character.toUpperCase();
                        }

                        tag = vnode.tag;
                        vnode.isItag = ((tag[0]==='I') && (tag[1]==='-'));
                        vnode.ns = xmlNS[tag] || nameSpace;

                        //vnode.domNode can only be set after inspecting the attributes --> there might be an `is` attribute

                        // check if it is a void-tag, but only need to do the regexp once per tag-element:
                        if (voidElements[tag]) {
                            vnode.isVoid = true;
                        }
                        else if (nonVoidElements[tag]) {
                            vnode.isVoid = false;
                        }
                        else {
                            vnode.isVoid = vnode.isItag ? false : !(new RegExp('</'+tag+'>', 'i')).test(htmlString);
                            vnode.isVoid ? (voidElements[tag]=true) : (nonVoidElements[tag]=true);
                        }
                        insideTagDefinition = true;
                    }
                    else if (character+character2+htmlString[i+2]+htmlString[i+3]==='<!--') {
                        // begin of CommentNode
                        if (vnode) {
                            vnode.domNode = DOCUMENT.createTextNode('');
                            // create circular reference:
                            vnode.domNode._vnode = vnode;
                            vnodes[vnodes.length] = vnode;
                        }
                        vnode = Object.create(vNodeProto);
                        vnode.ns = nameSpace;
                        vnode.nodeType = 8;
                        vnode.text = '';
                        vnode.vParent = parentVNode;
                        // move index to first character of comment
                        i = i+4;
                        insideComment = true;
                        nestedComments = 0;
                    }
                    else {
                        if (!vnode) {
                            // no current vnode --> create a TextNode:
                            vnode = Object.create(vNodeProto);
                            vnode.ns = nameSpace;
                            vnode.nodeType = 3;
                            vnode.text = '';
                            vnode.vParent = parentVNode;
                        }
                        vnode.text += character;
                        i++;
                    }
                }
            }

            if (vnode) {
                vnode.domNode = DOCUMENT.createTextNode('');
                // create circular reference:
                vnode.domNode._vnode = vnode;
                vnodes[vnodes.length] = vnode;
            }
            return vnodes;
        };

    return htmlToVNodes;

};
APIs

File: src/vdom/partials/html-parser.js