Even faster String.prototype.trim() implementation in JavaScript.

I’ll begin with a quote:

Some people, when confronted with a problem, think “I know, I’ll use regular expressions.” Now they have two problems.

Those are the words of one of my favorite hackers, Jamie Zawinski.

Most JavaScript trim() implementations you see around are based on regular expressions and work fairly well for infrequent use on short strings. Some of them are well-crafted, but I think plain old loops can do better. Back in 2007, Steve Levithan covered some really fast implementations of trim() followed by a few more articles by others, including Luca Guidi.

Many of these implementations were based on regular expressions and a few were exceptionally brilliant. But did they answer the question of speed? In one of the comments posted on Steve Levithan’s blog there was a little gem written by Michael Lee Finney, and truly that little function proves to be the fastest of all of the implementations mentioned in those articles. Finney claimed to better previous implementations by 20x, and he was right. His code totally smoked the regular-expression based implementations.

The Details

Arrays in JavaScript behave like hash-tables and hence exhibit the property of being sparse.  Finney’s implementation exploits this feature of JavaScript to its fullest.  Here is a lookup table he uses to mark characters as whitespace:

String.whiteSpace = [];
String.whiteSpace[0x0009] = true;
String.whiteSpace[0x000a] = true;
String.whiteSpace[0x000b] = true;
String.whiteSpace[0x000c] = true;
String.whiteSpace[0x000d] = true;
String.whiteSpace[0x0020] = true;
String.whiteSpace[0x0085] = true;
String.whiteSpace[0x00a0] = true;
String.whiteSpace[0x1680] = true;
String.whiteSpace[0x180e] = true;
String.whiteSpace[0x2000] = true;
String.whiteSpace[0x2001] = true;
String.whiteSpace[0x2002] = true;
String.whiteSpace[0x2003] = true;
String.whiteSpace[0x2004] = true;
String.whiteSpace[0x2005] = true;
String.whiteSpace[0x2006] = true;
String.whiteSpace[0x2007] = true;
String.whiteSpace[0x2008] = true;
String.whiteSpace[0x2009] = true;
String.whiteSpace[0x200a] = true;
String.whiteSpace[0x200b] = true;
String.whiteSpace[0x2028] = true;
String.whiteSpace[0x2029] = true;
String.whiteSpace[0x202f] = true;
String.whiteSpace[0x205f] = true;
String.whiteSpace[0x3000] = true;

The implementation of the function simply indexes into this array to check whether any character encountered while traversing the string is valid whitespace. The function runs noticeably faster. Here’s his code:

function trim14(str) {
    var len = str.length, whiteSpace, i;

    if (!len) {
        return str;
    }

    whiteSpace = String.whiteSpace;

    if (len && whiteSpace[str.charCodeAt(len-1)])
    {
        do{
            --len;
        }
        while (len && whiteSpace[str.charCodeAt(len - 1)]);

        if (len && whiteSpace[str.charCodeAt(0)]){
            i = 1;
            while (i < len && whiteSpace[str.charCodeAt(i)]){
                ++i;
            }
        }
        return str.substring(i, len);
    }
    if (len && whiteSpace[str.charCodeAt(0)]) {
        i = 1;
        while (i < len && whiteSpace[str.charAt(i)]){
            ++i;
        }
        return str.substring(i, len);
    }
    return str;
};

Neatly written. I did spot a few unnecessary checks and removed them for a retest. The following code shaved off a couple more milliseconds sometimes.

function trim16(str) {
    var len = str.length, whiteSpace = String.whiteSpace, i = 0;

    if (len) {
        if (whiteSpace[str.charCodeAt(len - 1)]) {
            // Remove from the end.
            while (--len && whiteSpace[str.charCodeAt(len - 1)]);

            // Remove from the beginning.
            if (len && whiteSpace[str.charCodeAt(0)]){
                i = 1;
                while (i < len && whiteSpace[str.charCodeAt(i)]){
                    ++i;  // Keep this here.
                }
            }
            return str.substring(i, len);
        }

        // Remove from the beginning.
        if (whiteSpace[str.charCodeAt(0)]) {
            i = 1;
            while (i < len && whiteSpace[str.charAt(i)]){
                ++i;
            }
            return str.substring(i, len);
        }
    }

    return str;
};

Can this be any better and faster?

I think it can. There’s repetition in the code that could be removed as well.  I rewrote the function to use Finney’s look up table and ended up with this:

function trim17(str){
    var len = str.length;
    if (len){
        var whiteSpace = String.whiteSpace, i = 0;
        while (whiteSpace[str.charCodeAt(--len)]);
        if (++len){
            while (whiteSpace[str.charCodeAt(i)]){ ++i; }
        }
        str = str.substring(i, len);
    }
    return str;
}

Guess what? Here are the benchmark results from a Chrome developer build running on Linux with 100000 iterations:

Original length: 27663
trim10: 363ms (length: 27656)
trim11: 4668ms (length: 27656)
trim12: 4671ms (length: 27656)
trim13: 322ms (length: 27656)
trim14: 197ms (length: 27656)
trim15: 195ms (length: 27656)
trim16: 197ms (length: 27656)
trim17: 187ms (length: 27656)

trim17 is indeed even faster and shorter.

Updates

Nikolay “MadRabbit” V. Nemshilov, the author of RightJS (a beautiful JavaScript library), provides another implementation of the function:

function trim19(str){
    var str = str.replace(/^\s\s*/, ''),
        ws = /\s/,
        i = str.length;
    while (ws.test(str.charAt(--i)));
    return str.slice(0, i + 1);
}

which is faster than many other regexp-based implementations.

I have included that in the benchmark as well and generated a couple graphs.

Average performance of all trim() implementations across major platforms and browsers.
Average performance of all trim() implementations across major platforms and browsers (Smaller is better).
Average performance of the latter half of the set of trim() implementations.
Average performance of the latter half of the set of trim() implementations (Smaller is better).

Feel free to use my code under the terms of the MIT license. As usual, suggestions and constructive criticism are most welcome.

Advertisements

Making the browser download scripts in parallel.

Current browsers download scripts serially.  What that means is that if you use:

<script type="text/javascript" src="foobar.js"></script>
<script type="text/javascript" src="quuxdoo.js"></script>

The browser will not fetch quuxdoo.js until it has finished downloading foobar.js.   In fact, it will not download any other resource until that script is downloaded.  This is really a sad state of affairs.

An existing “solution”

An article “Downloading JavaScript files in Parallel” by Rakesh Pai explains the issue with a couple screenshots displaying the Firebug Net panel.

<script type="text/javascript">
    (function() {
        var s = [
            "/javascripts/script1.js",
            "/javascripts/script2.js",
            "/javascripts/script3.js",
            "/javascripts/script4.js",
            "/javascripts/script5.js"
        ];

        var sc = "script", tp = "text/javascript", sa = "setAttribute";
        for(var i=0, l=s.length; i<l; ++i) {
            if(window.navigator.userAgent.indexOf("MSIE")!==-1 || window.navigator.userAgent.indexOf("WebKit")!==-1) {
                document.writeln("<" + sc + " type=\"" + tp + "\" src=\"" + s[i] + "\" defer></" + sc + ">");
            } else {
                var t=document.createElement(sc);
                t[sa]("src", s[i]);
                t[sa]("type", tp);
                document.getElementsByTagName("head")[0].appendChild(t);
            }
        }
    })();
</script>

He provides a solution that seems to work well, but contains a few things I didn’t like or consider bugs:

  1. Browser sniffing.  Do Firefox and Opera not support the document.write() method? No, really, I’m asking here.  I don’t have a copy of FF 1.5 or 2.0 lying around to verify.  The latest version of Opera does seem to indeed.  And not just that, the script checks for the browser per iteration of the loop, so if you have 10 scripts that you load with this method, the browser check will occur 10 times!
  2. The code tries to optimize for space but ends up being harder to read instead.  This should ideally be the job of a minifier like YUI Compressor, Douglas Crockford’s JSMIN, Dojo Toolkit’s ShrinkSafe, or Dean Edwards’ Packer.  A couple of those space-saving tricks can be used to hint the minifier better while keeping the code readable.
  3. Take HTML 5.  New script element attributes like async and defer aren’t handled.  Both are defined as boolean attributes and async=”true” means the script would execute asynchronously as soon as it is available. What about scripts that aren’t of mime type text/javascript?  A better approach would have been to consume an array of attribute objects and fill those attributes into the created script element.
  4. Loop invariants should be outside the loop especially in a language like JavaScript where linear scope chains determine how quickly a variable can be accessed.  The code negligently accesses global objects and repeatedly goes down nested objects, which it really doesn’t need to per iteration.
  5. XHTML isn’t dead just yet.  Attribute minimization (ref. “<script … defer></script>”) is not valid XHTML.  Language purists may cringe.

Where’s the code?

Here is a non-browser sniffing version that offers the above flexibility.  Please note, as of this time I do not have access to any version of the-browser-that-shall-not-be-named (blah, pottermania), so I’m relying on the community to test this piece of code in the-browser-that-shall-not-be-named.  Enough said.  Here’s my attempt at the code:

/**
 * Loading scripts in parallel.
 * Copyright (C) 2009 Yesudeep Mangalapilly.
 *
 * Licensed under the terms of the MIT License.
 */
function getScripts(scripts){
    var lt = "%3C",
        gt = "%3E",
        doc = document,
        len = scripts.length,
        html = [];

    for (var i = 0, script = null, attribute_string = null; i < len; ++i){
        script = scripts[i];
        switch (typeof script){
        case 'string':
            attribute_string = ['src=\"', decodeURI(script), '\"'].join('');
            break;
        case 'object':
            var attrs = [];
            for (var key in script){
                var value = script[key];
                switch(key){
                    case 'src':
                    case 'SRC':
                        value = decodeURI(value);
                        break;
                    default: break;
                }
                attrs.push([key, '\"' + value + '\"'].join('='));
            }
            attribute_string = attrs.join(' ');
            break;
        default:
            continue;
        }
        html.push(lt, 'script ', attribute_string, gt, lt, '/script', gt);
    }
    html = unescape(html.join(''));
    doc.write(html);
}

Usage

Here is how you can use the code:

var scripts = [
    {
        src: "foobar.js",
        async: true
    },
    "quuxdoo.js"
];

getScripts(scripts);

Hope that helps.  Suggestions and constructive criticism most welcome.