Performance Zone is brought to you in partnership with:

I've been in the world of software development for 14+ years working with most mainstream programming languages at some point. Chad is a DZone MVB and is not an employee of DZone and has posted 38 posts at DZone. You can read more from them at their website. View Full User Profile

Build a Clustered ATOM Server with Node.js

05.10.2012
| 3720 views |
  • submit to reddit

From the Node.js documentation: A single instance of Node runs in a single thread. To take advantage of multi-core systems the user will sometimes want to launch a cluster of Node processes to handle the load. Today I’ll show you how to use Cluster for Node.js to serve out a simple static ATOM feed.

Note: Cluster is currently (as of the date of this article) marked as: Stability: 1 Experimental – Drastic changes in future versions. So keep in mind changes to the code below will probably be needed in the future. For a peek at where Cluster is headed you can already examine the (future) docs located here.

To get started I borrowed some code from the Node.js Cluster docs, this blog, and this GitHub repo. I then changed and in some cases fixed the code to work as expected.

First, I modified the ATOM.js from here. Keep in mind there is plenty more you can add to this to make the file below more robust and to support additional ATOM features. Also, I originally used NPM to install it but ran into several issues so that is how I ended up just copying the actual file to my project folder.

/*
 Borrowed from: https://github.com/dylang/node-atom/blob/master/lib/atom.js
 Author: https://github.com/dylang
 */
 
var XML = require('xml');
 
function ATOM (options, items) {
    options = options || {};
 
    this.title          = options.title || 'Untitled ATOM Feed';
    this.description    = options.description || '';
    this.feed_url       = options.feed_url;
    this.site_url       = options.site_url;
    this.image_url      = options.image_url;
    this.author         = options.author;
    this.items          = items || [];
 
    this.item = function (options) {
        options = options || {};
        var item = {
            title:          options.title || 'No title',
            description:    options.description || '',
            url:            options.url,
            guid:           options.guid,
            categories:     options.categories || [],
            author:         options.author,
            date:           options.date
        };
 
        this.items.push(item);
        return this;
    };
 
    this.xml = function(indent) {
        return '<?xml version="1.0" encoding="UTF-8"?>\n'
            + XML(generateXML(this), indent);
    }
 
}
 
function ifTruePush(bool, array, data) {
    if (bool) {
        array.push(data);
    }
}
 
function generateXML (data){
 
    var feed =  [
        { _attr: {
            'xmlns':         'http://www.w3.org/2005/Atom',
            'xml:lang':      'en-US'
        } },
        { id:           'urn:uuid:90c76b31-d399-21d2-b93C-0004449e0ca7' },
        { link:         { _attr: { type: 'text/html', rel: 'alternate', href: data.site_url } } },
        { link:         { _attr: { type: 'application/atom+xml', rel: 'self', href: data.feed_url } } },
        { title:        data.title },
        { updated: new Date().toISOString() }
    ];
 
    data.items.forEach(function(item) {
        var entry = [
            { id:        'urn:uuid:77c76b31-d549-21d2-b93C-8829942e4b5f' }
        ];
        ifTruePush(item.date,    entry, { published:    new Date(item.date).toISOString() });
        ifTruePush(item.updated, entry, { updated:      new Date(item.updated).toISOString() });
        ifTruePush(item.link,    entry, { link:         { _attr: { type: 'text/html', rel: 'alternate', href: item.url } } });
        ifTruePush(item.title,   entry, { title:        item.title });
        ifTruePush(item.description, entry, { content:  { _attr: { type: 'xhtml', 'xml:lang': 'en' }, _cdata: item.description } });
        //ifTruePush(item.author || data.author, entry, { 'dc:creator': { _cdata: item.author || data.author } });
        feed.push({ entry: entry });
    });
 
    return { feed: feed };
}
 
module.exports = ATOM;

Again, keep in mind this is just for the example I’m building, obviously you wouldn’t want hardcoded IDs, etc.

Make sure to save the above file as atom.js.

The next step is to use NPM to install an XML package (local to my project’s folder).

$ npm install xml

Create a file in your project’s folder called app.js and add the following code to it:

var ATOM = require('./atom');
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
 
var feed = new ATOM({
    title: 'ATOM Feed',
    description: 'This is an ATOM feed',
    feed_url: 'http://localhost:8888/',
    site_url: 'http://localhost:8888/',
    author: 'GiantFlyingSaucer'
});
 
feed.item({
    title:  'ATOM Entry',
    description: 'Hello World',
    url: 'http://localhost:8888/',
    guid: 'urn:uuid:60a76c80-d399-11d2-b93C-0003939e0cf1',
    author: 'Guest Author',
    date: '2012-05-20T21:50:02Z'
});
 
var xml = feed.xml();
var workers = [];
 
var server = require('http').createServer(function (req,res){
    if(req.url == '/favicon.ico') return;
    res.writeHead(200);
    res.end(xml);
    console.log('HTTP request answered by Worker (PID): ' + process.pid);
 
    // Kill the process after a second
    setInterval(function() {
        process.exit(0);
    }, 1000);
});
 
if(cluster.isMaster){
    for (var i = 0; i < numCPUs; i++) {
        var worker = cluster.fork();
        workers.push(worker);
        console.log('Starting Worker (PID): ' + worker.pid);
    }
 
    cluster.on('death', function (worker){
        console.log('Worker (PID) ' + worker.pid + ' has stopped');
        for (var i = 0; i < workers.length; i++) {
            var tmpWorker = workers[i];
            if(worker.pid === tmpWorker.pid) {
                workers.splice(i, 1);
                console.log('Workers array length is now: ' + workers.length);
            }
        }
    });
}else{
    server.listen(8888);
}

Well thats a lot of code so let’s walk through some of it.

I create a simple ATOM feed with one entry:

var feed = new ATOM({
    title: 'ATOM Feed',
    description: 'This is an ATOM feed',
    feed_url: 'http://localhost:8888/',
    site_url: 'http://localhost:8888/',
    author: 'GiantFlyingSaucer'
});
 
feed.item({
    title:  'ATOM Entry',
    description: 'Hello World',
    url: 'http://localhost:8888/',
    guid: 'urn:uuid:60a76c80-d399-11d2-b93C-0003939e0cf1',
    author: 'Guest Author',
    date: '2012-05-20T21:50:02Z'
});

That would be a bare minimum at best but enough for this example. From there I create an array to hold all the workers that will be spun up when cluster.fork() is called per CPU. I’m on a Mac Mini with 4 cores so my machine will spin up four workers. Your computer will differ depending on the amount of your CPU cores.

var workers = [];

The real magic happens in here:

if(cluster.isMaster){
    for (var i = 0; i < numCPUs; i++) {
        var worker = cluster.fork();
        workers.push(worker);
        console.log('Starting Worker (PID): ' + worker.pid);
    }
 
    cluster.on('death', function (worker){
        console.log('Worker (PID) ' + worker.pid + ' has stopped');
        for (var i = 0; i < workers.length; i++) {
            var tmpWorker = workers[i];
            if(worker.pid === tmpWorker.pid) {
                workers.splice(i, 1);
                console.log('Workers array length is now: ' + workers.length);
            }
        }
    });
}else{
    server.listen(8888);
}

A check is done to see if we are the master, if so, then a loop is called and runs based upon how many CPU cores I have. I store those new workers for later usage. A “death” event is added to the code to capture any workers that suddenly die for whatever reason. In that case I remove them from the array of workers and log some info. If this is not the master then we simply spin up a basic HTTP server and wait for incoming requests. The neat thing using Cluster is all of the workers can share the same port, in this case 8888.

To make this a better example I added code to kill off a process after it answers an HTTP request. You can remove that part obviously but I’d recommend you see how it works first. Also, you can watch the on “death” event and simply spin up a replacement worker when one dies. In my scenario I just let it die.

Time to run the example:

$ node app.js

Expect results when I hit refresh periodically on the following URL: http://localhost:8888/

<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-US">
  <id>urn:uuid:90c76b31-d399-21d2-b93C-0004449e0ca7</id>
  <link type="text/html" rel="alternate" href="http://localhost:8888/"/>
  <link type="application/atom+xml" rel="self" href="http://localhost:8888/"/>
  <title>ATOM Feed</title>
  <updated>2012-04-21T16:16:32.759Z</updated>
  <entry>
    <id>urn:uuid:77c76b31-d549-21d2-b93C-8829942e4b5f</id>
    <published>2012-05-20T21:50:02.000Z</published>
    <title>ATOM Entry</title>
    <content type="xhtml" xml:lang="en">
      <![CDATA[ Hello World ]]>
    </content>
  </entry>
</feed>

The terminal output:

Starting Worker (PID): 311
Starting Worker (PID): 312
Starting Worker (PID): 313
Starting Worker (PID): 314
HTTP request answered by Worker (PID): 314
Worker (PID) 314 has stopped
Workers array length is now: 3
HTTP request answered by Worker (PID): 313
Worker (PID) 313 has stopped
Workers array length is now: 2
HTTP request answered by Worker (PID): 311
Worker (PID) 311 has stopped
Workers array length is now: 1
HTTP request answered by Worker (PID): 312
Worker (PID) 312 has stopped
Workers array length is now: 0

Simply hit CTRL-C to terminate the master.

 

 

 

 

 

 

 

 

 

 

Published at DZone with permission of Chad Lung, author and DZone MVB. (source)

(Note: Opinions expressed in this article and its replies are the opinions of their respective authors and not those of DZone, Inc.)