I am using node-xml2js to parse XML. I am having trouble parsing XML from a URL. I've used this SO answer as a model. But when I use the code below, I get null
as the result. Any thoughts on what I'm doing wrong?
UPDATE: I've updated the url for the xml to the actual site used.
var eyes = require('eyes');
var https = require('https');
var fs = require('fs');
var xml2js = require('xml2js');
var parser = new xml2js.Parser();
parser.addListener('end', function(result) {
eyes.inspect(result);
console.log('Done.');
});
https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(result) {
result.on('data', function (data) {
parser.parseString(data);
});
}).on('error', function(e) {
console.log('Got error: ' + e.message);
});
I don't see xml2js being able to parse xml by its chunks, so we need to buffer entire http response. To do that I have used global here, but it is better to use something like concat-stream
(I'll post that sometime later).
I have tried this one and it works for me:
var eyes = require('eyes');
var https = require('https');
var fs = require('fs');
var xml2js = require('xml2js');
var parser = new xml2js.Parser();
parser.on('error', function(err) { console.log('Parser error', err); });
var data = '';
https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(res) {
if (res.statusCode >= 200 && res.statusCode < 400) {
res.on('data', function(data_) { data += data_.toString(); });
res.on('end', function() {
console.log('data', data);
parser.parseString(data, function(err, result) {
console.log('FINISHED', err, result);
});
});
}
});
Only when response ends sending, then we parse XML. xml2js
uses sax
which seems to have streaming support but not sure if xml2js
takes advantage of it.
I have created small example which uses chunk-by-chunk parsing (similar like your example) but it fails giving error when parsing because in a chunk invalid xml arrives - that's why we need to buffer entire response.
If your xml is very big, try different parsers like sax
which have stream support.
You can also add error
handler to parser
to be able to print errors if it encounters them.
With concat stream you can more elegantly concat all .on('data'...)
calls:
var https = require('https');
var xml2js = require('xml2js');
var parser = new xml2js.Parser();
var concat = require('concat-stream');
parser.on('error', function(err) { console.log('Parser error', err); });
https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(resp) {
resp.on('error', function(err) {
console.log('Error while reading', err);
});
resp.pipe(concat(function(buffer) {
var str = buffer.toString();
parser.parseString(str, function(err, result) {
console.log('Finished parsing:', err, result);
});
}));
});
You can use sax
to be able to not buffer entire file (in case your xml files are big), but it is more low level however, piping it as a stream is very similar.