Прокрутка нескольких страниц с помощью кукловода NodeJS, но прокрутилась только 1 страница, а остальные-нет

#javascript #asynchronous #scroll #puppeteer

Вопрос:

Этот код здесь успешно прокручивает 1 страницу, затем завершается и лишь на короткое время касается других, а затем быстро игнорирует их. Я думаю, что это может быть как-то связано с page переменной, и каким-то образом она перепуталась во время асинхронного выполнения.

 const puppeteer = require('puppeteer')

const scrollElement = 'document.getElementsByClassName("item-view")[0]'
const urls = ["https://batdongsan.com.vn/ban-do-nha-dat-ban#ptype=38amp;cat=amp;city=amp;district=amp;area=amp;price=amp;ward=amp;street=amp;room=amp;direction=amp;project=amp;points=10.77819980508061:106.71007824963644,10.783511697967592:106.7051000697048,10.78570388039108:106.70115185803488,10.779042968942315:106.68965054577902,10.770864179835455:106.68192578381613,10.762769489461716:106.67849255627706,10.755686457050816:106.68235493725851,10.752819468066056:106.68595982617452,10.753662702948837:106.69231129712179,10.761504674449494:106.70063687390402,10.769683717701703:106.70904828137472,10.775417347579069:106.71230984753683amp;zoom=14amp;center=10.764076459398622:106.70570231730571amp;page=1amp;product=amp;searchtype=1","https://batdongsan.com.vn/ban-do-nha-dat-ban#ptype=38amp;cat=amp;city=amp;district=amp;area=amp;price=amp;ward=amp;street=amp;room=amp;direction=amp;project=amp;points=21.036610658726687:105.85618505276071,21.03933437995124:105.85223684109079,21.038373072264537:105.84657201565133,21.0334863289415:105.84245214260446,21.029160225699314:105.84090719021188,21.026195971345416:105.8433962801777,21.0240328296118:105.84983358181344,21.02411294653207:105.8547259310566,21.026676665245493:105.85721502102243,21.03124317997471:105.85807332790719,21.034688001997072:105.856957528957amp;zoom=14amp;center=21.027144004643514:105.86811695124052amp;page=1amp;product=amp;searchtype=1"];
let alias = {};
alias[urls[0]] = "URL1";
alias[urls[1]] = "URL2";

(async () => {
  const browser = await puppeteer.launch({headless:false, slowMo:0})
  await Promise.all(urls.map(async (url)=>{
    const page = await browser.newPage()
    await page.goto(url)
    await page.waitForSelector(".detail-title")
    let previousHeight
    while (true) {
      try {
        previousHeight = await page.evaluate(`${scrollElement}.scrollHeight`);
        console.log(`Scroll ${alias[url]} to ${previousHeight}`);
        await page.evaluate(`${scrollElement}.scrollTo(0, ${scrollElement}.scrollHeight)`);
        await page.waitForFunction(`${scrollElement}.scrollHeight > ${previousHeight}`, {timeout:1000})
      } catch (e) {
        if (e instanceof puppeteer.errors.TimeoutError) {
          console.log("Finished scrolling")
          break
        } else {
          throw(e)
        }
      }
    }
    parseList = await page.$("li.detail-item")
    console.log(parseList.length);
  }))
  await browser.close()
})();
 

Войдите в консоль на моем компьютере

 Scroll URL2 to 588
Scroll URL2 to 1176
Scroll URL2 to 1782
Scroll URL2 to 2320
Scroll URL2 to 2890
Scroll URL2 to 3452
Scroll URL2 to 4040
Scroll URL1 to 624 // scroll once then never again
Scroll URL2 to 4628
Scroll URL2 to 5162
Scroll URL2 to 5722
Scroll URL2 to 6262
Scroll URL2 to 6848
Scroll URL2 to 7420
Scroll URL2 to 8008
Scroll URL2 to 8614
Scroll URL2 to 9202
Scroll URL2 to 9762
Scroll URL2 to 10368
Scroll URL2 to 10974
Scroll URL2 to 11544
Scroll URL2 to 12122
Scroll URL2 to 12694
Scroll URL2 to 13264
Scroll URL2 to 13836
Scroll URL2 to 14390
Scroll URL2 to 14996
Scroll URL2 to 15602
Scroll URL2 to 16198
Scroll URL2 to 16786
Scroll URL2 to 17356
Scroll URL2 to 17908
Scroll URL2 to 18486
Scroll URL2 to 19074
Scroll URL2 to 19680
Scroll URL2 to 20200
Scroll URL2 to 20770
Scroll URL2 to 21330
Scroll URL2 to 21918
Scroll URL2 to 22524
Scroll URL2 to 23112
Scroll URL2 to 23684
Scroll URL2 to 24238
Scroll URL2 to 24726
Scroll URL2 to 25296
Scroll URL2 to 25920
Scroll URL2 to 26508
Scroll URL2 to 27060
Scroll URL2 to 27620
Scroll URL2 to 28180
Scroll URL2 to 28758
Scroll URL2 to 29312
Scroll URL2 to 29866
Scroll URL2 to 30472
Scroll URL2 to 31008
Scroll URL2 to 31578
Scroll URL2 to 32120
Scroll URL2 to 32690
Scroll URL2 to 33258
Scroll URL2 to 33818
Scroll URL2 to 34424
Scroll URL2 to 35012
Scroll URL2 to 35564
Scroll URL2 to 36152
Finished scrolling
4 // there should be more than 500 items here
Scroll URL2 to 36740
Scroll URL2 to 37318
Scroll URL2 to 37878
Scroll URL2 to 38456
Scroll URL2 to 39044
Scroll URL2 to 39598
Scroll URL2 to 40102
Scroll URL2 to 40606
Scroll URL2 to 41176
Scroll URL2 to 41764
Scroll URL2 to 42336
Scroll URL2 to 42890
Scroll URL2 to 43460
Scroll URL2 to 44004
Scroll URL2 to 44592
Scroll URL2 to 45134
Scroll URL2 to 45676
Scroll URL2 to 46212
Scroll URL2 to 46782
Scroll URL2 to 47360
Scroll URL2 to 47912
Scroll URL2 to 48466
Scroll URL2 to 49054
Scroll URL2 to 49624
Scroll URL2 to 50194
Scroll URL2 to 50772
Scroll URL2 to 51360
Scroll URL2 to 51902
Scroll URL2 to 52396
Scroll URL2 to 52932
Scroll URL2 to 53458
Scroll URL2 to 53994
Scroll URL2 to 54504
Scroll URL2 to 55092
Scroll URL2 to 55662
Scroll URL2 to 56232
Scroll URL2 to 56802
Scroll URL2 to 57390
Scroll URL2 to 57960
Scroll URL2 to 58530
Scroll URL2 to 59082
Scroll URL2 to 59634
Scroll URL2 to 60194
Scroll URL2 to 60800
Scroll URL2 to 61354
Scroll URL2 to 61908
Scroll URL2 to 62462
Scroll URL2 to 62996
Scroll URL2 to 63584
Scroll URL2 to 64122
Scroll URL2 to 64642
Scroll URL2 to 65230
Scroll URL2 to 65818
Scroll URL2 to 66350
Scroll URL2 to 66920
Scroll URL2 to 67472
Scroll URL2 to 68050
Scroll URL2 to 68604
Scroll URL2 to 68748
Finished scrolling
485
 

Комментарии:

1. нашел решение в драматурге, но мне все еще очень интересно, почему оно работает в драматурге firefox, но не в кукловоде chromium

Ответ №1:

Каким-то образом он заработал в драматурге firefox с точно таким же кодом …

 const playwright = require('playwright')

const scrollElement = 'document.getElementsByClassName("item-view")[0]'
const urls = ["https://batdongsan.com.vn/ban-do-nha-dat-ban#ptype=38amp;cat=amp;city=amp;district=amp;area=amp;price=amp;ward=amp;street=amp;room=amp;direction=amp;project=amp;points=10.77819980508061:106.71007824963644,10.783511697967592:106.7051000697048,10.78570388039108:106.70115185803488,10.779042968942315:106.68965054577902,10.770864179835455:106.68192578381613,10.762769489461716:106.67849255627706,10.755686457050816:106.68235493725851,10.752819468066056:106.68595982617452,10.753662702948837:106.69231129712179,10.761504674449494:106.70063687390402,10.769683717701703:106.70904828137472,10.775417347579069:106.71230984753683amp;zoom=14amp;center=10.764076459398622:106.70570231730571amp;page=1amp;product=amp;searchtype=1","https://batdongsan.com.vn/ban-do-nha-dat-ban#ptype=38amp;cat=amp;city=amp;district=amp;area=amp;price=amp;ward=amp;street=amp;room=amp;direction=amp;project=amp;points=21.036610658726687:105.85618505276071,21.03933437995124:105.85223684109079,21.038373072264537:105.84657201565133,21.0334863289415:105.84245214260446,21.029160225699314:105.84090719021188,21.026195971345416:105.8433962801777,21.0240328296118:105.84983358181344,21.02411294653207:105.8547259310566,21.026676665245493:105.85721502102243,21.03124317997471:105.85807332790719,21.034688001997072:105.856957528957amp;zoom=14amp;center=21.027144004643514:105.86811695124052amp;page=1amp;product=amp;searchtype=1"];
let alias = {};
alias[urls[0]] = "URL1";
alias[urls[1]] = "URL2";

(async () => {
  const browser = await playwright.firefox.launch({headless:false, slowMo:0})
  await Promise.all(urls.map(async (url)=>{
    const page = await browser.newPage()
    await page.goto(url)
    await page.waitForSelector(".detail-title")
    let previousHeight
    while (true) {
      try {
        previousHeight = await page.evaluate(`${scrollElement}.scrollHeight`)
        console.log(`Scroll ${alias[url]} to ${previousHeight}`)
        await page.evaluate(`${scrollElement}.scrollTo(0, ${scrollElement}.scrollHeight)`)
        await page.waitForFunction(`${scrollElement}.scrollHeight > ${previousHeight}`, 69 , {timeout:1000})
      } catch (e) {
        if (e instanceof playwright.errors.TimeoutError) {
          console.log(`Finished scrolling ${alias[url]}`)
          break
        } else {
          throw(e)
        }
      }
    }
    parseList = await page.$("li.detail-item")
    console.log(parseList.length);
  }))
  await browser.close()
})();
 

Журнал результатов

 Scroll URL2 to 587
Scroll URL2 to 1155
Scroll URL2 to 1760
Scroll URL2 to 2297
Scroll URL2 to 2883
Scroll URL2 to 3426
Scroll URL2 to 4031
Scroll URL2 to 4599
Scroll URL2 to 5168
Scroll URL2 to 5745
Scroll URL2 to 6287
Scroll URL2 to 6808
Scroll URL2 to 7375
Scroll URL2 to 7987
Scroll URL2 to 8540
Scroll URL2 to 9145
Scroll URL2 to 9731
Scroll URL2 to 10290
Scroll URL2 to 10895
Scroll URL2 to 11481
Scroll URL2 to 12086
Scroll URL2 to 12655
Scroll URL2 to 13249
Scroll URL2 to 13802
Scroll URL2 to 14389
Scroll URL2 to 14925
Scroll URL2 to 15512
Scroll URL2 to 16117
Scroll URL2 to 16721
Scroll URL2 to 17298
Scroll URL2 to 17867
Scroll URL2 to 18435
Scroll URL2 to 18994
Scroll URL2 to 19581
Scroll URL2 to 20167
Scroll URL2 to 20722
Scroll URL1 to 623
Scroll URL2 to 21291
Scroll URL2 to 21831
Scroll URL2 to 22400
Scroll URL2 to 23023
Scroll URL1 to 1245
Scroll URL1 to 1850
Scroll URL2 to 23609
Scroll URL1 to 2455
Scroll URL2 to 24196
Scroll URL2 to 24749
Scroll URL2 to 25269
Scroll URL2 to 25788
Scroll URL2 to 26393
Scroll URL2 to 26997
Scroll URL2 to 27566
Scroll URL2 to 28107
Scroll URL2 to 28693
Scroll URL2 to 29224
Scroll URL2 to 29795
Scroll URL1 to 3041
Scroll URL2 to 30363
Scroll URL2 to 30952
Scroll URL2 to 31521
Scroll URL2 to 32073
Scroll URL1 to 3610
Scroll URL2 to 32614
Scroll URL1 to 4145
Scroll URL2 to 33165
Scroll URL1 to 4749
Scroll URL2 to 33741
Scroll URL1 to 5302
Scroll URL2 to 34308
Scroll URL1 to 5855
Scroll URL1 to 6477
Scroll URL2 to 34877
Scroll URL2 to 35481
Scroll URL1 to 7100
Scroll URL2 to 36050
Scroll URL1 to 7723
Scroll URL1 to 8327
Scroll URL2 to 36637
Scroll URL1 to 8914
Scroll URL2 to 37205
Scroll URL1 to 9483
Scroll URL2 to 37782
Scroll URL1 to 10069
Scroll URL2 to 38359
Scroll URL1 to 10656
Scroll URL2 to 38927
Scroll URL1 to 11261
Scroll URL2 to 39522
Scroll URL1 to 11883
Scroll URL2 to 40075
Scroll URL1 to 12470
Scroll URL2 to 40609
Scroll URL1 to 13059
Scroll URL2 to 41096
Scroll URL1 to 13681
Scroll URL2 to 41631
Scroll URL1 to 14286
Scroll URL2 to 42199
Scroll URL1 to 14855
Scroll URL2 to 42822
Scroll URL1 to 15441
Scroll URL2 to 43375
Scroll URL1 to 16012
Scroll URL2 to 43909
Scroll URL1 to 16599
Scroll URL1 to 17139
Scroll URL2 to 44462
Scroll URL1 to 17726
Scroll URL2 to 45039
Scroll URL1 to 18313
Scroll URL2 to 45597
Scroll URL2 to 46138
Scroll URL2 to 46689
Scroll URL2 to 47223
Scroll URL2 to 47810
Scroll URL1 to 18917
Scroll URL2 to 48369
Scroll URL1 to 19494
Scroll URL1 to 20071
Scroll URL1 to 20675
Scroll URL2 to 48937
Scroll URL1 to 21244
Scroll URL1 to 21815
Scroll URL1 to 22401
Scroll URL2 to 49490
Scroll URL1 to 22988
Scroll URL2 to 50077
Scroll URL1 to 23557
Scroll URL1 to 24161
Scroll URL2 to 50627
Scroll URL1 to 24748
Scroll URL2 to 51204
Scroll URL1 to 25335
Scroll URL2 to 51791
Scroll URL1 to 25957
Scroll URL2 to 52359
Scroll URL1 to 26528
Scroll URL2 to 52868
Scroll URL1 to 27151
Scroll URL2 to 53393
Scroll URL1 to 27737
Scroll URL2 to 53901
Scroll URL1 to 28324
Scroll URL2 to 54452
Scroll URL1 to 28893
Scroll URL2 to 54971
Scroll URL1 to 29479
Scroll URL2 to 55513
Scroll URL1 to 30066
Scroll URL1 to 30637
Scroll URL2 to 56082
Scroll URL1 to 31241
Scroll URL2 to 56651
Scroll URL1 to 31846
Scroll URL2 to 57219
Scroll URL1 to 32441
Scroll URL2 to 57806
Scroll URL1 to 33045
Scroll URL2 to 58393
Scroll URL1 to 33606
Scroll URL1 to 34193
Scroll URL2 to 58961
Scroll URL1 to 34727
Scroll URL2 to 59512
Scroll URL1 to 35288
Scroll URL1 to 35857
Scroll URL2 to 60063
Scroll URL1 to 36415
Scroll URL2 to 60621
Scroll URL1 to 37020
Scroll URL2 to 61226
Scroll URL1 to 37607
Scroll URL2 to 61779
Scroll URL1 to 38157
Scroll URL2 to 62331
Scroll URL1 to 38762
Scroll URL1 to 39323
Scroll URL2 to 62884
Scroll URL1 to 39927
Scroll URL2 to 63417
Scroll URL1 to 40532
Scroll URL2 to 64003
Scroll URL1 to 41137
Scroll URL2 to 64540
Scroll URL1 to 41741
Scroll URL2 to 65059
Scroll URL1 to 42328
Scroll URL1 to 42933
Scroll URL2 to 65645
Scroll URL1 to 43501
Scroll URL1 to 44070
Scroll URL2 to 66232
Scroll URL1 to 44657
Scroll URL2 to 66763
Scroll URL2 to 67331
Scroll URL1 to 45243
Scroll URL2 to 67882
Scroll URL1 to 45848
Scroll URL2 to 68459
Scroll URL1 to 46419
Scroll URL2 to 69011
Scroll URL1 to 47023
Scroll URL2 to 69155
Scroll URL1 to 47600
Scroll URL1 to 48169
Scroll URL1 to 48739
Scroll URL1 to 49344
Scroll URL1 to 49931
Scroll URL1 to 50517
Scroll URL1 to 51122
Scroll URL1 to 51709
Scroll URL1 to 52295
Scroll URL1 to 52864
Scroll URL1 to 53469
Scroll URL1 to 54055
Scroll URL1 to 54642
Scroll URL1 to 55247
Scroll URL1 to 55799
Scroll URL1 to 56386
Scroll URL1 to 56991
Scroll URL1 to 57515
Scroll URL1 to 58120
Scroll URL1 to 58725
Scroll URL1 to 59347
Scroll URL1 to 59952
Scroll URL1 to 60523
Scroll URL1 to 61073
Scroll URL1 to 61696
Scroll URL1 to 62301
Scroll URL1 to 62887
Finished scrolling URL2
Scroll URL1 to 63492
489
Scroll URL1 to 64079
Scroll URL1 to 64665
Scroll URL1 to 65270
Scroll URL1 to 65795
Scroll URL1 to 66329
Scroll URL1 to 66870
Scroll URL1 to 67475
Scroll URL1 to 68097
Scroll URL1 to 68720
Scroll URL1 to 69289
Scroll URL1 to 69813
Scroll URL1 to 70418
Scroll URL1 to 71023
Scroll URL1 to 71591
Scroll URL1 to 72214
Scroll URL1 to 72837
Scroll URL1 to 73430
Finished scrolling URL1
500