Trying to write a function in node.js that will get the element by xpath.
I have an xpath of the desired dom element like
xpath = '/html/body/div/div[2]/div/h1/span'
My DOM is loaded in cheerio via fs module (because I have this webpage stored locally):
var file = fs.readFileSync( "aaa.html" )
var inDom = cheerio.load( file )
Then I am trying to iterate via each xpath part, get the element of the dom tree, check it's children if the name and element number matches, and if they do, store rez as this mathed element. Then I do continue to dig down with new xpath part. The code looks like this, but it fails to get what I want because just after I get the first mach and set rez as the matched element, in the next for loop cycle this new element seems not to have any children elements.
var rez = inDom('html');
var xpath = inXpath.split( "/" );
for( var i = iterateStart; i < xpath.length; i++ ) {
var selector = xpath[ i ].split('[')[0];
var matches = xpath[ i ].match(/\[(.*?)\]/);
var child = 0;
if( matches ) {
child = matches[ 1 ];
}
for( var k = 0; k < rez.length; k++ ) {
var found = false
var curE = rez[ k ]
for( var p = 0; p < curE.children.length; p++ ) {
var curE_child = curE.children[ p ]
if( curE_child.name = selector ) {
if( child > 0 ) {
child--
}
else {
rez = curE_child
found = true
break
}
}
}
if( found ) {
break
}
}
}
Can anybody help me with the code using mentioned node.js modules?
It seems like you are doing way more work then you need to find the desired element. Can you post a sample html page?
Cheerio provides a higher-level api for finding elements that you should use.
var html = fs.readFileSync('aaa.html')
var $ = cheerio.load(html)
var selector = 'div' // some selector here which I can tune to the example html page
var parent = $(selector)
var childSelector = 'p' // some other selector
var children = parent.find(childSelector)