Невозможно получить полное тело веб-страницы через headless chromium — PHP

#php #laravel #web-scraping #google-chrome-headless #headless-browser

Вопрос:

Я использую BrowserFactory для PHP, чтобы удалять изображения с торговых сайтов. До сих пор это работало почти на каждом веб-сайте. Обычно я визуализирую веб-страницу, используя безголовую среду, и ищу элементы, чтобы я мог инициировать процесс очистки. Но я не могу получить полный текст для этой страницы продукта

вот код

 $user_agents = [
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11) AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 Safari/601.1.56',
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
        'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
        'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)',
    ];


$browserFactory = new HeadlessChromiumBrowserFactory('/usr/bin/google-chrome');
$random_array_key = array_rand($user_agents);
$user_agent = $user_agents[$random_array_key];
$browser = $browserFactory->createBrowser([
'windowSize' => [1920, 780],
'enableImages' => false,
'userAgent' => $user_agent,
'ignoreCertificateErrors'=> true,
// 'customFlags' => ['--proxy-server=http://50.252.42.253:31820']
]);
$page = $browser->createPage();

$cookies = $page->getAllCookies();

// destination can be specified
$uri = 'https://ba-sh.com/us/p/dress-vick-bleunuit-3664784406693.html';
// $page = $browser->createPage($uri);
$navigation = $page->navigate($uri);

   // wait for the page to be loaded
   $navigation->waitForNavigation();

   // evaluate script in the browser
   // $evaluation = $page->evaluate('document.documentElement.outerHTML');
   $pageBody = $page->evaluate('document.body.innerHTML')->getReturnValue();

   $screenshot = $page->screenshot([
    'format'  => 'jpeg',  // default to 'png' - possible values: 'png', 'jpeg',
    'quality' => 80,      // only if format is 'jpeg' - default 100
    ]);
   $browser->close();
 

Когда dd($pageBody); я получаю следующий вывод (частично отрисованная веб-страница?)

 """
<div id="INDWrap" lang="en" dir="ltr" data-ind-version="3.0.0" style="display: block;"><div id="INDblindNotif" tabindex="-1" class="INDhiddenText">Please note:  ▶
Explore your accessibility options" class="INDcircle-btn INDoutline-btn" style="top: 70%; --indscale:0.5; --indmaincolor:#000000; --indsecondcolor:#ffffff;" dat ▶
ttt<svg id="INDmenu-btn-moveArrow" version="1.2" baseProfile="tiny" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y=" ▶
tttt<path fill="#010101" d="M25.904,13.439c-0.189,0.022-0.364,0.109-0.498,0.244l-3.352,3.352c-0.346,0.307-0.378,0.836-0.07,1.183
tttttc0.307,0.347,0.836,0.378,1.182,0.071c0.027-0.023,0.053-0.049,0.076-0.076l1.92-1.92v6.361C25.156,23.098,25.557,23.51,26,23.51
tttttc0.442,0,0.844-0.412,0.838-0.855v-6.361l1.92,1.92c0.305,0.348,0.834,0.385,1.183,0.08s0.384-0.834,0.079-1.182
tttttc-0.023-0.027-0.049-0.053-0.075-0.076l-3.351-3.352C26.414,13.502,26.159,13.41,25.904,13.439z M17.525,21.809
tttttc-0.189,0.025-0.365,0.114-0.496,0.253l-3.352,3.351c-0.323,0.326-0.323,0.853,0,1.178l3.352,3.353
tttttc0.304,0.348,0.833,0.384,1.182,0.079c0.348-0.304,0.385-0.833,0.08-1.182c-0.023-0.026-0.049-0.053-0.075-0.077l-1.921-1.919h6.354
tttttc0.443,0.008,0.848-0.395,0.848-0.838c0-0.441-0.404-0.844-0.848-0.838h-6.354l1.921-1.928c0.331-0.324,0.337-0.854,0.015-1.186
tttttC18.047,21.867,17.787,21.775,17.525,21.809z M34.283,21.809c-0.462,0.05-0.794,0.463-0.745,0.922
tttttc0.02,0.193,0.108,0.373,0.247,0.51l1.919,1.928h-6.354c-0.029,0-0.058,0-0.086,0c-0.439,0.023-0.818,0.443-0.795,0.882
tttttc0.022,0.438,0.443,0.816,0.881,0.794h6.354l-1.919,1.919c-0.348,0.309-0.38,0.838-0.073,1.184c0.308,0.347,0.838,0.379,1.184,0.072
tttttc0.027-0.025,0.053-0.05,0.075-0.076l3.353-3.353c0.322-0.325,0.322-0.852,0-1.178l-3.353-3.351
tttttC34.793,21.875,34.538,21.782,34.283,21.809z M25.992,28.502c-0.442,0.005-0.839,0.413-0.83,0.855v6.353l-1.92-1.919
tttttc-0.178-0.187-0.433-0.279-0.689-0.254c-0.46,0.05-0.793,0.463-0.744,0.923c0.021,0.193,0.107,0.372,0.246,0.509l3.352,3.352
tttttc0.327,0.327,0.857,0.328,1.186,0.001c0,0,0.001-0.001,0.002-0.001l3.351-3.352c0.347-0.307,0.379-0.836,0.072-1.183
ttttts-0.838-0.379-1.184-0.071c-0.027,0.023-0.053,0.049-0.075,0.076l-1.92,1.919v-6.353C26.848,28.91,26.438,28.498,25.992,28.502z"></path>
ttt</svg><svg version="1.2" baseProfile="tiny" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 11. ▶
<path fill="#010101" d="M11.271,3.656c-0.117-0.442-0.619-0.708-1.061-0.59c0,0-3.361,1.09-4.57,1.09
tc-1.178,0-4.509-1.061-4.509-1.061C0.689,2.978,0.188,3.184,0.041,3.626c-0.147,0.471,0.118,0.972,0.59,1.09l3.625,1.09
tc0.177,0.06,0.325,0.236,0.296,0.442c-0.03,0.59-0.089,1.858-0.207,2.329c-0.295,1.356-1.916,5.07-1.916,5.07
tc-0.207,0.442,0,0.943,0.413,1.15c0.118,0.059,0.236,0.088,0.354,0.088c0.324,0,0.648-0.178,0.766-0.501L5.7,10.848l1.651,3.567
tc0.147,0.293,0.441,0.471,0.767,0.471c0.118,0,0.235-0.029,0.354-0.088c0.44-0.207,0.617-0.708,0.412-1.15
tc0,0-1.621-3.714-1.917-5.07C6.849,8.106,6.791,6.839,6.761,6.249c0-0.206,0.118-0.382,0.295-0.442l3.625-1.09
tC11.124,4.599,11.388,4.097,11.271,3.656z M5.7,3.302c0.912,0,1.651-0.739,1.651-1.65S6.612,0,5.7,0C4.788,0,4.049,0.74,4.049,1.651
tS4.788,3.302,5.7,3.302z"></path>
</svg><span class="INDhiddenText">Accessibility</span></button></div><div id="INDmenu" aria-hidden="true"></div><div id="INDpopup" style="display:none" aria-hid ▶
#INDmenu-btn {
t--indmaincolor:      #067d8d;
t--indsecondcolor:    White;
}

</style><style>.INDMobile.INDbtn-draging{overflow:hidden}#INDmenu-btn-moveArrow :first-child{stroke:none!important}#INDWrap{position:relative;position:absolute; ▶

#INDmenu-btn{
t--indscale-contra: calc(1 - var(--indscale));
t--indscale-margin: calc( var(--indscale-contra) * 10 * -4px );
}

.INDpositionRight #INDmenu-btn{
tright: 5px !important;
tmargin-right: var(--indscale-margin) !important;
}

.INDpositionLeft #INDmenu-btn{
tleft: 5px !important;
tmargin-left: var(--indscale-margin) !important;
}

#INDmenu-btn{
tpadding: 15px;
tbackground: var( --indmaincolor ) !important;
}

#INDmenu-btn > svg{
ttransform: none !important;
twidth: 50px !important;
theight: 50px  !important;
tdisplay:block !important;
}
#INDmenu-btn svg * {
tstroke: none !important;
tfill: var( --indsecondcolor ) !important;
}

#INDmenu-btn > span{
ttext-align: center;
tfont-size: 1.7rem;
tcolor: var( --indsecondcolor ) !important;
}
/* Arrows */
#INDmenu-btn.INDarrow-btn.INDarrow-before-btn > span{
tpadding-right: 0px;
tpadding-left: 20px;
}

#INDmenu-btn.INDarrow-btn.INDarrow-after-btn > span{
tpadding-right: 20px;
tpadding-left: 0px;
}

#INDmenu-btn.INDarrow-btn.INDarrow-before-btn > span:before,
#INDmenu-btn.INDarrow-btn.INDarrow-after-btn > span:after{
tcontent: '';
}

#INDmenu-btn.INDarrow-btn > span:before,
#INDmenu-btn.INDarrow-btn > span:after{
tbackground: transparent !important;
tposition: absolute;
ttop: 26px;
theight: 11px;
twidth: 11px;
tborder: 2px solid var(--indsecondcolor) !important;
tborder-bottom-left-radius: 2px;
tborder-top: none !important;
tborder-right: none !important;
}

#INDmenu-btn.INDarrow-btn > span:before{
tleft: 15px !important;;
}

#INDmenu-btn.INDarrow-btn> span:after{
tright: 15px !important;;
}

#INDmenu-btn.INDarrow-right-btn > span:before,
#INDmenu-btn.INDarrow-right-btn > span:after{
    transform: rotate(225deg);
}

#INDmenu-btn.INDarrow-left-btn > span:before,
#INDmenu-btn.INDarrow-left-btn > span:after{
    transform: rotate(45deg);
}

#INDmenu-btn.INDarrow-up-btn > span:before,
#INDmenu-btn.INDarrow-up-btn > span:after{
    transform: rotate(135deg);
}
#INDmenu-btn.INDarrow-down-btn > span:before,
#INDmenu-btn.INDarrow-down-btn > span:after{
    transform: rotate(-45deg);
}

/*Outlined btn*/
#INDmenu-btn.INDoutline-btn{
tcolor: var( --indmaincolor ) !important;
tborder: 5px solid var( --indsecondcolor ) !important;
}

/*Rounded btn*/
#INDmenu-btn.INDrounded-btn{
tborder-radius: 15px !important;
}

/*Circle btn*/
#INDmenu-btn.INDcircle-btn {
tborder-radius: 100px !important;
}

/*Semi-Circle btn
#INDmenu-btn.INDsemicircle-btn{
tmargin: 0 !important;
}
*/

.INDpositionLeft #INDmenu-btn.INDsemicircle-btn{
tleft: 0 !important;
tborder-radius: 0 100px 100px 0 !important;
tpadding-left: 12px !important;
}
.INDpositionRight #INDmenu-btn.INDsemicircle-btn{ 
tright: 0 !important;
tborder-radius: 100px 0 0 100px !important;
tpadding-right: 12px !important;
}
/*Kano btn :)*/
.INDpositionLeft #INDmenu-btn.INDkano-btn{
tborder-radius: 0 100px !important;
}

.INDpositionLeft #INDmenu-btn.INDkano-btn > svg{
ttransform: rotate(-45deg) !important;
}

.INDpositionRight #INDmenu-btn.INDkano-btn{
tborder-radius: 100px 0 !important;
}

.INDpositionRight #INDmenu-btn.INDkano-btn > svg{
ttransform: rotate(45deg) !important;
}

/*Drag tooltip*/
.INDhasDragTooltip #INDWrap #INDmenu-btn-moveArrow{
tdisplay: blockttt!important;
}

.INDhasDragTooltip.INDpositionRight #INDmenu-btn:before{
tright: 0!important;;
}

.INDhasDragTooltip.INDpositionRight #INDmenu-btn:after{
tright: 100% !important;
}

.INDhasDragTooltip.INDpositionLeft #INDmenu-btn:before{
tleft: 0 !important;
}

.INDhasDragTooltip.INDpositionLeft #INDmenu-btn:after{
tleft: 100% !important;
}

.INDhasDragTooltip #INDmenu-btn.INDoutline-btn:hover,
.INDhasDragTooltip #INDmenu-btn.INDoutline-btn:focus{
tborder: none !important;
}</style></div>

<div class="PreHome">
<img src="/on/demandware.static/Sites-bash-Site/-/default/dw45dc591d/images/prehome.jpg" class="PreHome-bg" alt="img">
<div class="PreHome-content">
<img src="/on/demandware.static/Sites-bash-Site/-/default/dw061b9192/images/logo.svg" alt="BAamp;amp;SH" class="PreHome-logo">
<form class="PreHome-form Form" action="" method="get">
<div class="Form-row Form-row--labelTop">
<label class="Form-label" for="preHomeCount">Choose your country</label>
<div class="Form-field">
<div class="SelectBox">
<select id="preHomeCount" class="SelectBox-select"><option value="AT">Austria</option><option value="BE">Belgium</option><option value="BG">Bulgaria</option><op ▶
</div>
</div>
</div>
<div class="Form-row Form-row--labelTop" id="selectLang">
<label class="Form-label" for="preHomeLang">Choose your language</label>
<div class="Form-field">
<div class="SelectBox">
<select id="preHomeLang" class="SelectBox-select"><option value="en" data-site="bash">English</option><option value="fr" data-site="bash">Français</option></select> ◀
</div>
</div>
</div>
<button type="submit" class="Button PreHome-button" id="preHomeSubmit">Continue</button>
</form>
</div>
</div>

<!-- Demandware Analytics code 1.0 (body_end-analytics-tracking-asynch.js) -->
<script type="text/javascript">//<!--
/* <![CDATA[ */
function trackPage() {
    try{
        var trackingUrl = "https://ba-sh.com/on/demandware.store/Sites-bash-Site/en_FR/__Analytics-Start";
        var dwAnalytics = dw.__dwAnalytics.getTracker(trackingUrl);
        if (typeof dw.ac == "undefined") {
            dwAnalytics.trackPageView();
        } else {
            dw.ac.setDWAnalytics(dwAnalytics);
        }
    }catch(err) {};
}
/* ]]> */
// -->
</script>
<script type="text/javascript" src="/on/demandware.static/Sites-bash-Site/-/en_FR/v1636344167937/internal/jscript/dwanalytics-21.9.js" async="async" onload="trackPage()"></script> ◀
<!-- Demandware Active Data (body_end-active_data.js) -->
<script src="/on/demandware.static/Sites-bash-Site/-/en_FR/v1636344167937/internal/jscript/dwac-21.7.js" type="text/javascript" async="async"></script><!-- CQuotient Activity Tracking (body_end-cquotient.js) --> ◀
<script src="https://cdn.cquotient.com/js/v2/gretel.min.js" type="text/javascript" async="async"></script>
<!-- Demandware Apple Pay -->
<script type="text/javascript">//<!--
/* <![CDATA[ */
if (!window.dw) dw = {};
dw.applepay = {"action":{"cancel":"https://ba-sh.com/on/demandware.store/Sites-bash-Site/en_FR/__SYSTEM__ApplePay-Cancel","getRequest":"https://ba-sh.com/on/demandware.store/Sites-bash-Site/en_FR/__SYSTEM__ApplePay-GetRequest","onshippingcontactselected":"https://ba-sh.com/on/demandware.store/Sites-bash-Site/en_FR/__SYSTEM__ApplePay-ShippingContactSelected","onpaymentmethodselected":"https://ba-sh.com/on/demandware.store/Sites-bash-Site/en_FR/__SYSTEM__ApplePay-PaymentMethodSelected","onvalidatemerchant":"https://ba-sh.com/on/demandware.store/Sites-bash-Site/en_FR/__SYSTEM__ApplePay-ValidateMerchant","onpaymentauthorized":"https://ba-sh.com/on/demandware.store/Sites-bash-Site/en_FR/__SYSTEM__ApplePay-PaymentAuthorized","prepareBasket":"https://ba-sh.com/on/demandware.store/Sites-bash-Site/en_FR/__SYSTEM__ApplePay-PrepareBasket","onshippingmethodselected":"https://ba-sh.com/on/demandware.store/Sites-bash-Site/en_FR/__SYSTEM__ApplePay-ShippingMethodSelected"},"inject":{"directives":[{"css":"dw-apple-pay-button","query":"ISAPPLEPAY","action":"replace","copy":true},{"css":"dw-apple-pay-button dw-apple-pay-cart dw-apple-pay-logo-white dw-apple-pay-border","query":"button[name=dwfrm_cart_checkoutCart]","action":"after","style":{"ref":"this","attr":["width","margin"]}}]}}; ◀
/* ]]> */
// -->
</script>
<script type="text/javascript" src="/on/demandware.static/Sites-bash-Site/-/en_FR/v1636344167937/internal/jscript/applepay.js" async="true"></script>

<div style="display: none; visibility: hidden;">

<script src="https://cdn.cookielaw.org/scripttemplates/otSDKStub.js" type="text/javascript" charset="UTF-8" data-domain-script="118927c2-d514-48d0-97c9-2f781cf574b4"></script> ◀
<script type="text/javascript">function OptanonWrapper(){window.dataLayer.push({event:"OneTrustGroupsUpdated"})};</script>




<button id="ot-sdk-btn" class="ot-sdk-show-settings"> Cookie Settings</button>


 --amp;>
<div id="ot-sdk-cookie-policy"></div>




</div><script type="text/javascript" id="">window.interdeal={sitekey:"6778819a23b0fb2f108756fce4c689ce",Position:"Left",Menulang:google_tag_manager["GTM-K6PPTPK"].macro(4),domains:{js:"https://aacdn.nagich.com/",acc:"https://access.nagich.com/"},btnStyle:{vPosition:["70%","30%"],scale:["0.5","0.4"],icon:{type:11,shape:"circle",outline:!0},color:{main:"#000000",second:"#ffffff"}}};</script> ◀
<script type="text/javascript" id="">(function(a,c,b){a=a.createElement("script");a.src="https://aacdn.nagich.com/core/3.0.0/accessibility.js";a.defer=!0;a.integrity="sha512-Z/Fp/le5EMHTTNnZV1RBa6aoSrTkOjXLSdL6nHqG8c70vKlDKjeuUCs7l5Nxt66oSkEnlcZAX6gUUNqjhN9Hewx3dx3d";a.crossOrigin="anonymous";a.setAttribute("data-cfasync",!0);b?b.appendChild(a):c.appendChild(a)})(document,document.head,document.body);</script><script src="https://aacdn.nagich.com/core/3.0.0/accessibility.js" defer="" integrity="sha512-Z/Fp/le5EMHTTNnZV1RBa6aoSrTkOjXLSdL6nHqG8c70vKlDKjeuUCs7l5Nxt66oSkEnlcZAX6gUUNqjhN9Hew==" crossorigin="anonymous" data-cfasync="true"></script><iframe title="IND - Default Props" style="display: none;"></iframe> ◀
"""