python sqlite3 vs. node-sqlite3: performance test

I am recently learning node.js and trying to duplicate a small website from flask/python to Node.js. Due to memory constraint I ended up with using SQLite as my production database – although it says never to use SQLite for production XD.

One thing I noticed is that node-sqlite3 (the most active module from Node.js community for SQLite access) seems to a little bit sluggish compared to python, especially when doing pagination. Really? Everything in Node.js is async and shouldn’t it be more responsive? Baffled and curious, I did follow comparison: do multiple SELECT on a small database file (223 megabyes) and time it. Below is my code for each:

Node.js

var sqlite3 = require('sqlite3').verbose();
var util=require('util');
var dbfn = './db.sqlite';

var db = new sqlite3.Database(dbfn,sqlite3.OPEN_READONLY, function(err) {
  if(err) {
    console.log(err);
  } else {
    var perpage=10,max=500,table='data2012',kw='sa';
    for(var offset=1;offset<max;offset+=perpage) {
      stmt = util.format('select * from %s where xxx like "%s%" limit %d offset %d',table, kw,  perpage, offset);
      db.all(stmt);
   }
 }
});

Python:

import sqlite3
DATABASE = './db.sqlite'
db=None
try:
  db = sqlite3.connect(DATABASE)
except:
  sys.stderr.write(os.getcwd())

table='data2011'
kw=('sa%',)
perpage=10
max=500
for offset in xrange(1,max,perpage):
  stmt = 'select * from %s where xxx like ? limit %d offset %d'%(table, perpage, offset)
  rs = db.execute(stmt, kw)
  rs.fetchall()
db.close()

Ignore the table/columns the point is that it has more than enough data to complete the loop. Each was run 3 times and here’s the average:

  • Node-sqlite3: 2.945s
  • Python 1.542s

Looks like Python module is a lot faster. To eliminate any possible uncertainty I increased the loop# to be 1000. Again I ran each for 3 times for an average:

  • Node-sqlite3: 10.085s
  • Python: 6.040s

Python is still faster, but the gap is shrinking. How about 3000 iterations?

  • Node-sqlite3:1m5s
  • Python: 56.154s

5000?

  • Node-sqlite3: 2m23s
  • Python: 2m28s

So the observation so far is that python-sqlite3 beats node-sqlite3 when offset is small (remember sqlite doesn’t really support offset; it reads off all the data and discard  unneeded ones). When offset is getting large, both are getting equally slow. Maybe Node.js overall has a higher throughput its sqlite3 module is less satisfying, at least as of today.

BTW, one trick to optimize Sqlite access performance is to VACUUM the database. This made huge  difference on my database  file.

Coding in the node.js way

First of all node.js is about asynchronousness. So instead of writing up your functions one by one, all lined up with same indentation which are supposed to execute sequentially, with node.js, after a while I found myself to soon jump around method definitions, and inevitably embed functions with deeper and deeper indentation…

It’s getting funny.

to be updated.

prototype, constructor function, etc. in Javascript

Javascript is a loose-typed language and sometimes it can be very confusing. I wrote some quick code as below. In conclusion:

  • constructor functions are like classes in other languages
  • prototype only changes objects through operator ‘new’. so any methods attached through prototype can only be invoked from objects. also they are shared across different objects (just like other languages).
  • there’s a way to define “class” methods as well: defined directly on the constructor function. this is like a public static method to other languages

some helpful urls

http://sporto.github.io/blog/2013/02/22/a-plain-english-guide-to-javascript-prototypes/

http://stackoverflow.com/questions/572897/how-does-javascript-prototype-work

function f() {
this.greeting='hello';
myname:'fooname'
}
f.var1 = 'world';
f.method1 = function() {
console.log('hello from method1');
}

var foo = new f();

console.log('f.myname:'+f.myname); //undefined
console.log('foo.myname:'+foo.myname);//undefined</pre>
console.log('f.var1:'+f.var1); //world
console.log('foo.var1:'+foo.var1);//undefined
console.log('f.greeting:'+f.greeting);//undefined
console.log('foo.greeting:'+foo.greeting);//hello

console.log('f.prototype:'+f.prototype);//object
console.log('foo.prototype:'+foo.prototype);//undefined
console.log('foo.__proto__:'+foo.__proto__);//undefined

f.prototype.age=13;
console.log('f.age:'+f.age);//undefined
console.log('foo.age:'+foo.age);//13

f.method1(); //hello from method1
// foo.method1(); TypeError: Object #&lt;f&gt; has no method 'method1'

var car = {
myname:'i am a car'
}

var ford = {
kind:'i am a ford'
}

console.log('ford.myname:'+ford.myname);//undefined
ford.__proto__ = car;
console.log('ford.myname:'+ford.myname);//i am a car

node.js backward compatibility with node-sqlite3

Node.js is so radically evolving that there’s many compatibility issues such that packages that used to work now simply fail. E.g. node-sqlite3 stopped working (the sample code on its homepage says ‘segfault’ under node 0.10.x). Fortunately after complaining on its github I saw it got fixed quickly: https://github.com/developmentseed/node-sqlite3/commit/25db957806ae651dcd31292c6b932c379cfd2662 . I don’t use boost so I am not sure how that is related.

An alternative is to use node 0.8.x.

How to draw a circle – 2

Since we know what a circle looks like, why don’t we directly plot each point since we know x must span from [x-x0,x+x0]?

  y = y0 +/- sqrt(r*r-(x-x0)(x-x0))

<!DOCTYPE html>
<html>
<head>
<script>
function plotPoint(ctx,x,y) {
  ctx.fillRect(x,y,1,1);
}

function getRandomInt (min, max) {
    return Math.floor(Math.random() * (max - min + 1)) + min;
}

function drawRandom() {
   var c=document.getElementById("myCanvas");
  var ctx=c.getContext("2d");
  drawCircle(getRandomInt(100,800),getRandomInt(100,400),getRandomInt(50,100),ctx);
}
function drawCircle(x0,y0,r,ctx) {
  var r2=r*r;
  var step=.1;
  var y;
  for(var x=x0-r;x<=x0+r;x+=step) {
    var dx2 = (x-x0)*(x-x0);
    y = y0 + Math.sqrt(r2-dx2);
    plotPoint(ctx,x,y);
    y = y0 - Math.sqrt(r2-dx2);
    plotPoint(ctx,x,y);
  }
}

function draw() {
  var c=document.getElementById("myCanvas");
  var ctx=c.getContext("2d");

try {
  var x0 = parseInt(document.getElementById('x0').value);
  var y0 = parseInt(document.getElementById('y0').value);
  var r = parseInt(document.getElementById('r').value);
  if ('NaN' == x0||'NaN'==y0||'NaN'==r) return;
  drawCircle(x0,y0,r,ctx);
  
} catch(err) {
  alert(err);
}

}
</script>
</head>
<body>

X0: <input type=text id=x0 value=300 size=3 />
Y0: <input type=text id=y0 value=300 size=3 />
R: <input type=text id=r value=100 size=3 />
<input type=button value="draw a circle" onclick="javascript:draw()"/>
<input type=button value="draw a random circle" onclick="javascript:drawRandom()"/>

<p>

<canvas id="myCanvas" width="800" height="600" style="border:1px solid #d3d3d3;">
Your browser does not support the HTML5 canvas tag.</canvas>

</p>


</body>
</html>

How to draw a circle

update: turns out this is really a dumb idea.

Here’s the code to draw a circle in relatively OK time (apparently not the optimal solution but I don’t have enough graphics background to come up with one).

We know the equation for circles are:

  (x-x0)(x-x0) + (y-y0)(y-yo) = r*r

(x0,y0) being the center and r being the radius.


<!DOCTYPE html>
<html>
<head>
<script>
function plotPoint(ctx,x,y) {
  ctx.fillRect(x,y,1,1);
}
function onCircle(x,y,x0,y0,r) {
  var dx=x-x0;
  var dy=y-y0;
  var eps=20;
  if( Math.abs(r*r-dx*dx-dy*dy)<=eps) return true;
  return false;
}

function getRandomInt (min, max) {
    return Math.floor(Math.random() * (max - min + 1)) + min;
}

function drawRandom() {
   var c=document.getElementById("myCanvas");
  var ctx=c.getContext("2d");
  drawCircle(getRandomInt(100,800),getRandomInt(100,400),getRandomInt(50,100),ctx);
}

function drawCircle(x0,y0,r,ctx) {
  var r2=r/Math.sqrt(2);
  var step=.2;
  for(var x=x0-r;x<=x0-r2;x+=step) {
     for(var y=y0-r;y<=y0+r;y+=step) {
       if(onCircle(x,y,x0,y0,r))
         plotPoint(ctx,x,y);
     }
  }
    for(var x=x0+r2;x<=x0+r;x+=step) {
     for(var y=y0-r;y<=y0+r;y+=step) {
       if(onCircle(x,y,x0,y0,r))
         plotPoint(ctx,x,y);
     }
  }
    for(var x=x0-r;x<=x0+r;x+=step) {
     for(var y=y0-r;y<=y0-r2;y+=step) {
       if(onCircle(x,y,x0,y0,r))
         plotPoint(ctx,x,y);
     }
  }
    for(var x=x0-r;x<=x0+r;x+=step) {
     for(var y=y0+r2;y<=y0+r;y+=step) {
       if(onCircle(x,y,x0,y0,r))
         plotPoint(ctx,x,y);
     }
  }
}

function draw() {
  var c=document.getElementById("myCanvas");
  var ctx=c.getContext("2d");

try {
  var x0 = parseInt(document.getElementById('x0').value);
  var y0 = parseInt(document.getElementById('y0').value);
  var r = parseInt(document.getElementById('r').value);
  if ('NaN' == x0||'NaN'==y0||'NaN'==r) return;
  drawCircle(x0,y0,r,ctx);
  
} catch(err) {
  alert(err);
}

}
</script>
</head>
<body>

X0: <input type=text id=x0 value=300 size=3 />
Y0: <input type=text id=y0 value=300 size=3 />
R: <input type=text id=r value=100 size=3 />
<input type=button value="draw a circle" onclick="javascript:draw()"/>
<input type=button value="draw a random circle" onclick="javascript:drawRandom()"/>

<p>

<canvas id="myCanvas" width="800" height="600" style="border:1px solid #d3d3d3;">
Your browser does not support the HTML5 canvas tag.</canvas>

<script>


</script> 
</p>

</body>
</html>