I'm new to web scraping and want to download all images on a webpage using puppeteer:
const puppeteer = require('puppeteer');
let scrape = async () => {
// Actual Scraping goes Here...
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto('https://memeculture69.tumblr.com/');
// Right click and save images
};
scrape().then((value) => {
console.log(value); // Success!
});
I have looked at the API docs but could not figure out how to acheive this. So appreciate your help.
Here is another example. It goes to a generic search in google and downloads the google image at the top left.
const puppeteer = require('puppeteer');
const fs = require('fs');
async function run() {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.setViewport({ width: 1200, height: 1200 });
await page.goto('https://www.google.com/search?q=.net+core&rlz=1C1GGRV_enUS785US785&oq=.net+core&aqs=chrome..69i57j69i60l3j69i65j69i60.999j0j7&sourceid=chrome&ie=UTF-8');
const IMAGE_SELECTOR = '#tsf > div:nth-child(2) > div > div.logo > a > img';
let imageHref = await page.evaluate((sel) => {
return document.querySelector(sel).getAttribute('src').replace('/', '');
}, IMAGE_SELECTOR);
console.log("https://www.google.com/" + imageHref);
var viewSource = await page.goto("https://www.google.com/" + imageHref);
fs.writeFile(".googles-20th-birthday-us-5142672481189888-s.png", await viewSource.buffer(), function (err) {
if (err) {
return console.log(err);
}
console.log("The file was saved!");
});
browser.close();
}
run();
If you have a list of images you want to download then you could change the selector to programatically change as needed and go down the list of images downloading them one at a time.