node.js performance with zeromq vs. Python vs. Java

Scott A picture Scott A · Jul 11, 2011 · Viewed 25.5k times · Source

I've written a simple echo request/reply test for zeromq using node.js, Python, and Java. The code runs a loop of 100K requests. The platform is a 5yo MacBook Pro with 2 cores and 3G of RAM running Snow Leopard.

node.js is consistently an order of magnitude slower than the other two platforms.

Java: real 0m18.823s user 0m2.735s sys 0m6.042s

Python: real 0m18.600s user 0m2.656s sys 0m5.857s

node.js: real 3m19.034s user 2m43.460s sys 0m24.668s

Interestingly, with Python and Java the client and server processes both use about half of a CPU. The client for node.js uses just about a full CPU and the server uses about 30% of a CPU. The client process also has an enormous number of page faults leading me to believe this is a memory issue. Also, at 10K requests node is only 3 times slower; it definitely slows down more the longer it runs.

Here's the client code (note that the process.exit() line doesn't work, either, which is why I included an internal timer in addition to using the time command):

var zeromq = require("zeromq");

var counter = 0;
var startTime = new Date();

var maxnum = 10000;

var socket = zeromq.createSocket('req');

socket.connect("tcp://127.0.0.1:5502");
console.log("Connected to port 5502.");

function moo()
{
    process.nextTick(function(){
        socket.send('Hello');
        if (counter < maxnum)
        {
            moo();
        }
    });
}

moo();

socket.on('message',
          function(data)
          {
              if (counter % 1000 == 0)
              {
                  console.log(data.toString('utf8'), counter);
              }

              if (counter >= maxnum)
              {
                  var endTime = new Date();
                  console.log("Time: ", startTime, endTime);
                  console.log("ms  : ", endTime - startTime);
                  process.exit(0);
              }

              //console.log("Received: " + data);
              counter += 1;

          }
);

socket.on('error', function(error) {
  console.log("Error: "+error);
});

Server code:

var zeromq = require("zeromq");

var socket = zeromq.createSocket('rep');

socket.bind("tcp://127.0.0.1:5502",
            function(err)
            {
                if (err) throw err;
                console.log("Bound to port 5502.");

                socket.on('message', function(envelope, blank, data)
                          {
                              socket.send(envelope.toString('utf8') + " Blancmange!");
                          });

                socket.on('error', function(err) {
                    console.log("Error: "+err);
                });
            }
);

For comparison, the Python client and server code:

import zmq

context = zmq.Context()
socket = context.socket(zmq.REQ)
socket.connect("tcp://127.0.0.1:5502")

for counter in range(0, 100001):
    socket.send("Hello")
    message = socket.recv()

    if counter % 1000 == 0:
        print message, counter



import zmq

context = zmq.Context()
socket = context.socket(zmq.REP)

socket.bind("tcp://127.0.0.1:5502")
print "Bound to port 5502."

while True:
    message = socket.recv()
    socket.send(message + " Blancmange!")

And the Java client and server code:

package com.moo.test;

import org.zeromq.ZMQ;
import org.zeromq.ZMQ.Context;
import org.zeromq.ZMQ.Socket;

public class TestClient
{
    public static void main (String[] args)
    {
        Context context = ZMQ.context(1);

        Socket requester = context.socket(ZMQ.REQ);
        requester.connect("tcp://127.0.0.1:5502");

        System.out.println("Connected to port 5502.");

        for (int counter = 0; counter < 100001; counter++)
        {
            if (!requester.send("Hello".getBytes(), 0))
            {
                throw new RuntimeException("Error on send.");
            }

            byte[] reply = requester.recv(0);
            if (reply == null)
            {
                throw new RuntimeException("Error on receive.");
            }

            if (counter % 1000 == 0)
            {
                String replyValue = new String(reply);
                System.out.println((new String(reply)) + " " + counter);
            }
        }

        requester.close();
        context.term();
    }
}

package com.moo.test;

import org.zeromq.ZMQ;
import org.zeromq.ZMQ.Context;
import org.zeromq.ZMQ.Socket;

public class TestServer
{
    public static void main (String[] args) {
        Context context = ZMQ.context(1);

        Socket socket  = context.socket(ZMQ.REP);
        socket.bind("tcp://127.0.0.1:5502");

        System.out.println("Bound to port 5502.");

        while (!Thread.currentThread().isInterrupted())
        {
            byte[] request = socket.recv(0);
            if (request == null)
            {
                throw new RuntimeException("Error on receive.");
            }

            if (!socket.send(" Blancmange!".getBytes(), 0))
            {
                throw new RuntimeException("Error on send.");
            }
        }

        socket.close();
        context.term();
    }
}

I would like to like node, but with the vast difference in code size, simplicity, and performance, I'd have a hard time convincing myself at this point.

So, has anyone seen behavior like this before, or did I do something asinine in the code?

Answer

chjj picture chjj · Aug 26, 2011

You're using a third party C++ binding. As far as I understand it, the crossover between v8's "js-land" and bindings to v8 written in "c++ land", is very expensive. If you notice, some popular database bindings for node are implemented entirely in JS (although, partly I'm sure, because people don't want to compile things, but also because it has the potential to be very fast).

If I remember correctly, when Ryan Dahl was writing the Buffer objects for node, he noticed that they were actually a lot faster if he implemented them mostly in JS as opposed to C++. He ended up writing what he had to in C++, and did everything else in pure javascript.

So, I'm guessing part of the performance issue here has to do with that particular module being a c++ binding.

Judging node's performance based on a third party module is not a good medium for determining its speed or quality. You would do a lot better to benchmark node's native TCP interface.