How to wait for a click() event to load in phantomjs before continuing?

Jpaji Rajnish picture Jpaji Rajnish · Oct 9, 2014 · Viewed 18k times · Source

Phantomjs has these two really handy callbacks onLoadStarted and onLoadFinished which allow you to essentially pause execution while the page is loading. But I've been searching and I can't find an equivalent for if you click() a submit button or hyperlink. A similar page load happens but onLoadStarted doesn't get called for this event I guess because there isn't an explicit page.open() that happens. I'm trying to figure out a clean way to suspend execution while this load takes place.

One solution is obviously nested setTimeout's but I'd like to avoid this scenario because it's hacky and relies on trial and error instead of something reliable and more robust like testing against something or waiting for an event.

Is there a specific callback for this kind of page load that I missed? Or maybe there's some kind of generic code pattern that can deal with this sort of thing?

EDIT:

I still haven't figured out how to get it to pause. Here's the code that doesn't call the onLoadStarted() function when I call the click() command:

var loadInProgress = false;

page.onLoadStarted = function() {
  loadInProgress = true;
  console.log("load started");
};

page.onLoadFinished = function() {
  loadInProgress = false;
  console.log("load finished");
};

page.open(loginPage.url, function (status) {
    if (status !== 'success') {
        console.log('Unable to access network');
        fs.write(filePath + errorState, 1, 'w');
        phantom.exit();
    } else {
        page.evaluate(function (loginPage, credentials) {
            console.log('inside loginPage evaluate function...\n')
            document.querySelector('input[id=' + loginPage.userId + ']').value = credentials.username;
            document.querySelector('input[id=' + loginPage.passId + ']').value = credentials.password;      
            document.querySelector('input[id=' + loginPage.submitId + ']').click();
            //var aTags = document.getElementsByTagName('a')
            //aTags[1].click();
        }, loginPage, credentials);

        page.render(renderPath + 'postLogin.png');
        console.log('rendered post-login');

I double checked that the id is correct. The page.render() will show that the information is submitted, but only if I put it in a setTimeout(), otherwise it renders it immediately and I only see the credentials inputted, before the page redirect. Maybe I'm missing something else?

Answer

Artjom B. picture Artjom B. · Oct 9, 2014

I think the onLoadStarted and onLoadFinished functions are everything you need. Take for example the following script:

var page = require('webpage').create();

page.onResourceReceived = function(response) {
    if (response.stage !== "end") return;
    console.log('Response (#' + response.id + ', stage "' + response.stage + '"): ' + response.url);
};
page.onResourceRequested = function(requestData, networkRequest) {
    console.log('Request (#' + requestData.id + '): ' + requestData.url);
};
page.onUrlChanged = function(targetUrl) {
    console.log('New URL: ' + targetUrl);
};
page.onLoadFinished = function(status) {
    console.log('Load Finished: ' + status);
};
page.onLoadStarted = function() {
    console.log('Load Started');
};
page.onNavigationRequested = function(url, type, willNavigate, main) {
    console.log('Trying to navigate to: ' + url);
};

page.open("http://example.com", function(status){
    page.evaluate(function(){
        // click
        var e = document.createEvent('MouseEvents');
        e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
        document.querySelector("a").dispatchEvent(e);
    });
    setTimeout(function(){
        phantom.exit();
    }, 10000);
});

It prints

Trying to navigate to: http://example.com/
Request (#1): http://example.com/
Load Started
New URL: http://example.com/
Response (#1, stage "end"): http://example.com/
Load Finished: success
Trying to navigate to: http://www.iana.org/domains/example
Request (#2): http://www.iana.org/domains/example
Load Started
Trying to navigate to: http://www.iana.org/domains/reserved
Request (#3): http://www.iana.org/domains/reserved
Response (#2, stage "end"): http://www.iana.org/domains/example
New URL: http://www.iana.org/domains/reserved
Request (#4): http://www.iana.org/_css/2013.1/screen.css
Request (#5): http://www.iana.org/_js/2013.1/jquery.js
Request (#6): http://www.iana.org/_js/2013.1/iana.js
Response (#3, stage "end"): http://www.iana.org/domains/reserved
Response (#6, stage "end"): http://www.iana.org/_js/2013.1/iana.js
Response (#4, stage "end"): http://www.iana.org/_css/2013.1/screen.css
Response (#5, stage "end"): http://www.iana.org/_js/2013.1/jquery.js
Request (#7): http://www.iana.org/_img/2013.1/iana-logo-header.svg
Request (#8): http://www.iana.org/_img/2013.1/icann-logo.svg
Response (#8, stage "end"): http://www.iana.org/_img/2013.1/icann-logo.svg
Response (#7, stage "end"): http://www.iana.org/_img/2013.1/iana-logo-header.svg
Request (#9): http://www.iana.org/_css/2013.1/print.css
Response (#9, stage "end"): http://www.iana.org/_css/2013.1/print.css
Load Finished: success

It shows that clicking a link emits the LoadStarted event once and NavigationRequested event twice, because there is a redirect. The trick is to add the event handlers before doing the action:

var page = require('webpage').create();

page.open("http://example.com", function(status){
    page.onLoadFinished = function(status) {
        console.log('Load Finished: ' + status);
        page.render("test37_next_page.png");
        phantom.exit();
    };
    page.onLoadStarted = function() {
        console.log('Load Started');
    };

    page.evaluate(function(){
        var e = document.createEvent('MouseEvents');
        e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
        document.querySelector("a").dispatchEvent(e);
    });
});

If you need to do those things, maybe it is time to try something else like CasperJS. It runs on top of PhantomJS, but has a much better API for navigating web pages.