In my node.js, express app, I am making an ajax call with the superagent middleware. The call fetches database data in a complex array using the node-mysql middleware through quite a few database queries.
Before pasting the code, I am trying to explain in words what I am trying to do although the code would suffice to say what it wants to do with the addition that all the asynchronous things inside the first callback should be done in the synchronous way.
Explanation:
Inside the callback of the first query , a for loop is executed to run the second query multiple times and after each loop, the next loop is to be called only after the callback of the second query is complete. Things are same for the next code lines as well.
Code:
You can however skip the innards( marked in comments) of the for loops to make things brief and easy if you want.
conn.query("SELECT * FROM `super_cats`",function(error, results, fields) {
if(error){console.log("erro while fetching products for homepage "+ error);}
for(var i in results) { // FIRST FOR LOOP INSIDE THE FIRST QUERY CALLBACK
/*Innards of for loop starts*/
var elem = new Object();
var supcat_id=results[i].id;
elem.super_id =supcat_id;
elem.cats=new Array();
var cat= '';
/*Innards of for loop ends*/
conn.query("SELECT * FROM `categories` WHERE `supcat_id`="+supcat_id,function(error_cats, results_cats, fields_cats) {
if (error_cats) {console.log("erro while fetching cats for menu " + error_cats);}
for(var j in results_cats) {
/*Innards of for loop starts*/
cat= new Object();
var cat_id=results_cats[j].id;
cat.cat_id=cat_id;
cat.cat_name=results_cats[j].cat_name;
cat.subcats=new Array();
/*Innards of for loop starts*/
conn.query("SELECT * FROM `subcategories` WHERE `category`="+cat_id,function(error_subcats, results_subcats, fields_subcats) {
if (error_subcats) {console.log("erro while fetching subcats for menu " + error_subcats);}
for(var k in results_subcats ){
/*Innards of for loop starts*/
var subcat=new Object();
var subcat_id=results_subcats[k].id;
subcat.subcat_id=subcat_id;
subcat.subcat_name=results_subcats[k].subcategory;
cat.subcats.push(subcat);
elem.cats.push(cat);
/*Innards of for loop starts*/
}// end of for loop for results_subcats
});
}// end of for loop for result_cats
});
super_cats.push(elem);
}// end of for supercat results
res.send(super_cats)
});
I tried with the async middleware but in vain as I just could not figure out which function to use in this case .
To be brief, requirements are :
1) All the asynchronous things inside the first callback should be done in the synchronous way.
2) the response should be sent to the ajax call only after all the calculations are done and not before that (as it would probably happen if things were asynchronous as they are in the existing code, wouldn't it ?)
It may be just semantics, but it's important to understand that you cannot run this in a synchronous way. You have to run it asynchronously, and manage the order of the processing to get the desired effect. I find it useful to think about these kinds of problems more in terms of how I want to transform the data (à la functional programming) rather than the imperative code I would write in a more synchronous environment.
From what I can tell by the code, you want to end up with a data structure in super_cats
that looks something like this:
[
{
super_id: 1,
cats: [
{
cat_id: 2,
cat_name: "Category",
subcats: [
{
subcat_id: 3,
subcat_name: "Subcategory"
},
...
]
},
...
]
},
...
]
Let's start by extracting this into a single function call with a single callback.
function getCategoryTree(callback) {
}
Now, then, let's take it from the top. You want to run a single asynchronous function (an SQL query), and you want to produce an array with one entry per result. That sounds like a map
operation to me. However, since we want one of the values (cats
) to be determined asynchronously, we need to use an asynchronous map, which the async
library provides.
Let's just fill in the async.map
signature for now; we want to map over our results
(this is the functional equivalent of our for
loop), and for each one we want to turn the result into something—the asynchronous function that does the something is called the iterator. Finally, once we have all our transformed array elements, we want to call the callback given to our function.
function getCategoryTree(callback) {
conn.query("SELECT * FROM `super_cats`", function(error, results, fields) {
async.map(results, iterator, callback);
});
}
Let's create a new function for getting the top-level category information, and use its name in place of our iterator
placeholder.
function getCategoryTree(callback) {
conn.query("SELECT * FROM `super_cats`", function(error, results, fields) {
async.map(results, getSuperCategory, callback);
});
}
function getSuperCategory(resultRow, callback) {
}
Now we need to decide what we want to give back for each resultRow
. Based on our diagram above, we want an object with super_id
equal to the row's ID, and cats
equal to all the categories in the top-level category. However, since cats
is also determined asynchronously, we need to run the next query and transform those results before we can move on.
Similar to last time, we want each item in our cats
array to be an object with some information from the query's result, but we also want a subcats
array, which is again determined asynchronously, so we'll use async.map
again. This time, however, we'll use an anonymous function for the callback, since we want to do something with the results before we give them to the higher-level callback.
function getSuperCategory(resultItem, callback) {
var supcat_id = resultItem.id;
conn.query("SELECT * FROM `categories` WHERE supcat_id` = " + supcat_id, function(error, results, fields) {
async.map(results, getCategory, function(err, categories) {
callback(err, { super_id: supcat_id, cats: categories });
});
});
}
As you can see, once this async.map
is done, it means we have all the categories under this this super-category; thus, we can call our callback
with the object we want to be in the array.
Now that that's done, we just need to implement getCategory
. It will look very similar to getSuperCategory
, because we want to do basically the same thing—for each result, return an object that has some data from the query, but also an asynchronous component.
function getCategory(resultItem, callback) {
var cat_id = resultItem.id;
var cat_name = resultItem.cat_name;
conn.query("SELECT * FROM `subcategories` WHERE `category` = " + cat_id, function(error, results, fields) {
async.map(results, getSubCategory, function(err, subcategories) {
callback(err, { cat_id: cat_id, cat_name: cat_name, subcats: subcategories });
});
});
}
Now, we just need to implement getSubCategory
.
function getSubCategory(resultItem, callback) {
callback(null, {
subcat_id: resultItem.id,
subcat_name: resultItem.subcategory
});
}
Oops! The data we need from getSubCategory
doesn't have an asynchronous component! It turns out we didn't need that last async.map
at all; we could have used a regular array map; let's change getCategory
and getSubCategory
to work that way.
function getCategory(resultItem, callback) {
var cat_id = resultItem.id;
var cat_name = resultItem.cat_name;
conn.query("SELECT * FROM `subcategories` WHERE `category` = " + cat_id, function(error, results, fields) {
var subcategories = results.map(getSubCategory);
callback(error, { cat_id: cat_id, cat_name: cat_name, subcats: subcategories });
});
}
function getSubCategory(resultItem) {
return {
subcat_id: resultItem.id,
subcat_name: resultItem.subcategory
};
}
It's worth noting that our original method worked fine; if there's a chance getSubCategory
ever has an async component, you could just leave it as it was.
And that's it! Here's the code that I wrote as I was writing this answer; note that I had to fake out the SQL a bit, but I think the idea is there:
var async = require("async");
// fake out sql queries
queryNum = 0;
var conn = {
query: function(query, callback) {
queryNum++;
var results = [1, 2, 3, 4, 5].map(function(elem) {
return {
id: queryNum + "-" + elem,
cat_name: "catname-" + queryNum + "-" + elem,
subcategory: "subcategory-" + queryNum + "-" + elem
};
});
callback(null, results, null);
}
};
function getCategoryTree(callback) {
conn.query("SELECT * FROM `super_cats`", function(error, results, fields) {
async.map(results, getSuperCategory, callback);
});
}
function getSuperCategory(resultItem, callback) {
var supcat_id = resultItem.id;
conn.query("SELECT * FROM `categories` WHERE supcat_id` = " + supcat_id, function(error, results, fields) {
async.map(results, getCategory, function(err, categories) {
callback(err, { super_id: supcat_id, cats: categories });
});
});
}
function getCategory(resultItem, callback) {
var cat_id = resultItem.id;
var cat_name = resultItem.cat_name;
conn.query("SELECT * FROM `subcategories` WHERE `category` = " + cat_id, function(error, results, fields) {
var subcategories = results.map(getSubCategory);
callback(error, { cat_id: cat_id, cat_name: cat_name, subcats: subcategories });
});
}
function getSubCategory(resultItem) {
return {
subcat_id: resultItem.id,
subcat_name: resultItem.subcategory
};
}
getCategoryTree(function(err, result) {
console.log(JSON.stringify(result, null, " "));
});
There are some inefficiencies here, but for simplicity's sake I've glossed over them. For example, rather than running the second sub-query over and over, you could query at once for all the category IDs, then query all the categories at once, etc. Then, once you have all the data, you could loop over each array synchronously to pull out the pieces you need.
In addition, there are better ways to store tree structures in relational databases; in particular, take a look at Modified Preorder Tree Traversal.