In the Puppeteer browser, unexpected closing behavior after clicking the Cookie Accept button
P粉883278265
P粉883278265 2023-08-16 21:40:51
0
1
402
<p>I'm trying to use node.js and puppeteer to crawl a website for real-time football information, but after console.log("trying to select team element"), my browser closes immediately. </p> <pre class="brush:php;toolbar:false;">const puppeteer = require("puppeteer"); async function openPage() { const browser = await puppeteer.launch( {headless: true} ); const page = await browser.newPage(); await page.setViewport({ width: 1000, height: 926 }); await page.goto("https://www.livescore.com/en/"); //Accept cookies const button = await page.waitForSelector('#onetrust-accept-btn-handler'); if (button) { await button.click(); console.log("clicked cookie button"); }; return page; } async function scrapeData(page) { let content = []; // Get the competition elements let elements = await page.waitForSelector(".Ip") console.log("trying to select team element") for (let i=0; i < elements.length; i ) { let homeTeamElement = await elements[i].$(".Ip") if (homeTeamElement) { const homeTeamText = await homeTeamElement.evaluate(node ​​=> node.textContent); content.push(homeTeamText); } }; return content; }; (async () => { const page = await openPage(); const dataScraped = await scrapeData(page); console.log(dataScraped) await page.browser().close(); })();</pre> <p>Any ideas as to why this is the case and further criticism of my code are welcome! </p>
P粉883278265
P粉883278265

reply all(1)
P粉798010441

await page.waitForSelector(".Ip")Only returns one element, not an array, so it cannot be looped. There should be a clear error message explaining the problem. Instead, use page.$$eval (or if you want to try the latest locator API) to extract the data.

const puppeteer = require("puppeteer"); // ^21.0.2

const url = "";

let browser;
(async () => {
  browser = await puppeteer.launch();
  const [page] = await browser.pages();
  await page.setViewport({width: 1000, height: 926});
  await page.goto(url, {waitUntil: "domcontentloaded"});

  // not really necessary
  const button = await page.waitForSelector("#onetrust-accept-btn-handler");
  await button.click();

  await page.waitForSelector(".Ip");
  const content = await page.$$eval(".Ip", els =>
    els.map(e => {
      const text = id =>
        e.querySelector(`[id*=${id}]`).textContent.trim();
      return {
        time: text("status-or-time"),
        home: text("home-team-name"),
        away: text("away-team-name"),
        homeTeamScore: +text("home-team-score"),
        awaitTeamScore: +text("away-team-score"),
      };
    })
  );
  console.log(content);
})()
  .catch(err => console.error(err))
  .finally(() => browser?.close());

Note:

  • Turning off the cookie banner is not necessary for crawling, but if you use it, there is no need to check the return value of waitForSelector. It is guaranteed to be the element, otherwise it will throw an exception if it is not found within the specified time.
  • await elements[i].$(".Ip") won't help you access anything because there is no ## inside the .Ip element you already hold #.Ip.
  • Avoid using ElementHandles, they are slow, unreliable and verbose.
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template
About us Disclaimer Sitemap
php.cn:Public welfare online PHP training,Help PHP learners grow quickly!