I'm trying to write an Alexa skill that uses request.js & cheerio.js to do some web scraping for data.
When I run it locally it works fine, as expected but on the AWS lambda instance I keep getting a timeout error as it takes longer than 3 seconds to finish running. The error log is this:
START RequestId: 57f84024-6ef5-11e8-b49f-37c904d87c93 Version: $LATEST
2018-06-13T10:34:26.850Z 57f84024-6ef5-11e8-b49f-37c904d87c93 NEW SESSION
2018-06-13T10:34:26.851Z 57f84024-6ef5-11e8-b49f-37c904d87c93 INTENT REQUEST
2018-06-13T10:34:26.851Z 57f84024-6ef5-11e8-b49f-37c904d87c93 Sid started
2018-06-13T10:34:26.851Z 57f84024-6ef5-11e8-b49f-37c904d87c93 INTENT FOR SID HAS STARTED
2018-06-13T10:34:26.851Z 57f84024-6ef5-11e8-b49f-37c904d87c93 https://en.wikipedia.org/wiki/Chloroform
2018-06-13T10:34:26.950Z 57f84024-6ef5-11e8-b49f-37c904d87c93 SESSION END REQUEST
2018-06-13T10:34:27.731Z 57f84024-6ef5-11e8-b49f-37c904d87c93 REQUEST MADE
2018-06-13T10:34:27.731Z 57f84024-6ef5-11e8-b49f-37c904d87c93 trying to use cheerio to load page now
END RequestId: 57f84024-6ef5-11e8-b49f-37c904d87c93
REPORT RequestId: 57f84024-6ef5-11e8-b49f-37c904d87c93 Duration: 3003.16 ms Billed Duration: 3000 ms Memory Size: 128 MB Max Memory Used: 48 MB
2018-06-13T10:34:29.499Z 57f84024-6ef5-11e8-b49f-37c904d87c93 Task timed out after 3.00 seconds
The snippet of code causing the problem is:
var URL = 'https://en.wikipedia.org/wiki/' + searchTerm;
console.log(URL)
request(URL, function(error, response, html){
console.log('REQUEST MADE');
try {
console.log( 'trying to use cheerio to load page now')
if (html){
var $ = cheerio.load(html);
console.log('PAGE LOADED');
var infobox = $('table.infobox').children('tbody');
infobox.children().each(function(i, element){
var row = $(this);
if(row.children().first().children().first().text() == 'GHS hazard statements') {
var hCodes = row.first().first().text().toString();
var pPhrases = row.first().next().text().toString();
context.succeed(
generateResponse(
buildSpeechletResponse(`I found the following ` + hCodes + ` ` + pPhrases, true),
{}
))
}
})
} else {
console.log('There was no HTML loaded. Please try again')
}
} catch(error){
console.log(error)
console.log('THERE WAS AN ERROR PLEASE TRY AGAIN')
}
})
Also I've tried using response.on() but I get the error "Cannot do .on" of undefined.
Has anyone had anything like this before?