Phantomjs has these two really handy callbacks onLoadStarted
and onLoadFinished
which allow you to essentially pause execution while the page is loading. But I've been searching and I can't find an equivalent for if you click()
a submit button or hyperlink. A similar page load happens but onLoadStarted
doesn't get called for this event I guess because there isn't an explicit page.open()
that happens. I'm trying to figure out a clean way to suspend execution while this load takes place.
One solution is obviously nested setTimeout's but I'd like to avoid this scenario because it's hacky and relies on trial and error instead of something reliable and more robust like testing against something or waiting for an event.
Is there a specific callback for this kind of page load that I missed? Or maybe there's some kind of generic code pattern that can deal with this sort of thing?
EDIT:
I still haven't figured out how to get it to pause. Here's the code that doesn't call the onLoadStarted()
function when I call the click()
command:
var loadInProgress = false;
page.onLoadStarted = function() {
loadInProgress = true;
console.log("load started");
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log("load finished");
};
page.open(loginPage.url, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
fs.write(filePath + errorState, 1, 'w');
phantom.exit();
} else {
page.evaluate(function (loginPage, credentials) {
console.log('inside loginPage evaluate function...\n')
document.querySelector('input[id=' + loginPage.userId + ']').value = credentials.username;
document.querySelector('input[id=' + loginPage.passId + ']').value = credentials.password;
document.querySelector('input[id=' + loginPage.submitId + ']').click();
//var aTags = document.getElementsByTagName('a')
//aTags[1].click();
}, loginPage, credentials);
page.render(renderPath + 'postLogin.png');
console.log('rendered post-login');
I double checked that the id is correct. The page.render()
will show that the information is submitted, but only if I put it in a setTimeout(), otherwise it renders it immediately and I only see the credentials inputted, before the page redirect. Maybe I'm missing something else?
I think the onLoadStarted
and onLoadFinished
functions are everything you need. Take for example the following script:
var page = require('webpage').create();
page.onResourceReceived = function(response) {
if (response.stage !== "end") return;
console.log('Response (#' + response.id + ', stage "' + response.stage + '"): ' + response.url);
};
page.onResourceRequested = function(requestData, networkRequest) {
console.log('Request (#' + requestData.id + '): ' + requestData.url);
};
page.onUrlChanged = function(targetUrl) {
console.log('New URL: ' + targetUrl);
};
page.onLoadFinished = function(status) {
console.log('Load Finished: ' + status);
};
page.onLoadStarted = function() {
console.log('Load Started');
};
page.onNavigationRequested = function(url, type, willNavigate, main) {
console.log('Trying to navigate to: ' + url);
};
page.open("http://example.com", function(status){
page.evaluate(function(){
// click
var e = document.createEvent('MouseEvents');
e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
document.querySelector("a").dispatchEvent(e);
});
setTimeout(function(){
phantom.exit();
}, 10000);
});
It prints
Trying to navigate to: http://example.com/ Request (#1): http://example.com/ Load Started New URL: http://example.com/ Response (#1, stage "end"): http://example.com/ Load Finished: success Trying to navigate to: http://www.iana.org/domains/example Request (#2): http://www.iana.org/domains/example Load Started Trying to navigate to: http://www.iana.org/domains/reserved Request (#3): http://www.iana.org/domains/reserved Response (#2, stage "end"): http://www.iana.org/domains/example New URL: http://www.iana.org/domains/reserved Request (#4): http://www.iana.org/_css/2013.1/screen.css Request (#5): http://www.iana.org/_js/2013.1/jquery.js Request (#6): http://www.iana.org/_js/2013.1/iana.js Response (#3, stage "end"): http://www.iana.org/domains/reserved Response (#6, stage "end"): http://www.iana.org/_js/2013.1/iana.js Response (#4, stage "end"): http://www.iana.org/_css/2013.1/screen.css Response (#5, stage "end"): http://www.iana.org/_js/2013.1/jquery.js Request (#7): http://www.iana.org/_img/2013.1/iana-logo-header.svg Request (#8): http://www.iana.org/_img/2013.1/icann-logo.svg Response (#8, stage "end"): http://www.iana.org/_img/2013.1/icann-logo.svg Response (#7, stage "end"): http://www.iana.org/_img/2013.1/iana-logo-header.svg Request (#9): http://www.iana.org/_css/2013.1/print.css Response (#9, stage "end"): http://www.iana.org/_css/2013.1/print.css Load Finished: success
It shows that clicking a link emits the LoadStarted event once and NavigationRequested event twice, because there is a redirect. The trick is to add the event handlers before doing the action:
var page = require('webpage').create();
page.open("http://example.com", function(status){
page.onLoadFinished = function(status) {
console.log('Load Finished: ' + status);
page.render("test37_next_page.png");
phantom.exit();
};
page.onLoadStarted = function() {
console.log('Load Started');
};
page.evaluate(function(){
var e = document.createEvent('MouseEvents');
e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
document.querySelector("a").dispatchEvent(e);
});
});
If you need to do those things, maybe it is time to try something else like CasperJS. It runs on top of PhantomJS, but has a much better API for navigating web pages.