#python #html #beautifulsoup
Вопрос:
У меня есть следующий html-код, который мне нужно проанализировать, как получить доступ ко всем элементам window.WIZ_global_data
в обычном словаре? Является window.WIZ_global_data
ли это также атрибутом тега скрипта?
Я могу получить доступ к script
тегу через soup.head.script
lt;!DOCTYPE htmlgt; lt;html dir="ltr" lang="no"gt; lt;headgt; lt;base href="https://consent.google.com/"/gt; lt;meta content="origin" name="referrer"/gt; lt;link href="https://consent.google.com/m" rel="canonical"/gt; lt;meta content="initial-scale=1,minimum-scale=1,maximum-scale=5,width=device-width" name="viewport"/gt; lt;link href="//www.google.com/favicon.ico" rel="shortcut icon"/gt; lt;script data-id="_gd" nonce="QI9 XeJ9TwHcCspiFyqIIQ"gt; window.WIZ_global_data = {"DndLYb":"","DpimGf":false,"EP1ykd":["/_/*"],"FdrFJe":"2318287307032857584","GVlsxf":"www.google.com","Im6cmf":"/_/ConsentUi","LVIXXb":1,"LoQv7e":false,"MT7f9b":[],"Mypbod":"https://www.googleapis.com/reauth","PYFuDc":"DUMMY_X_CLIENT_DATA_WIZ_GLOBAL_KEY_DO_NOT_USE","QrtxK":"","R6pIad":"%.@.]","S06Grb":"","TTHqvb":"https://kidsmanagement-pa.googleapis.com","Yllh3e":"%.@.1637316536530302,170695567,503968613]","cfb2h":"boq_identityfrontenduiserver_20211111.08_p0","eNnkwf":"1637316536","ejMLCd":"DUMMY_X_GEO_WIZ_GLOBAL_KEY_DO_NOT_USE","eptZe":"/_/ConsentUi/","fPDxwd":[1763433,1772879,1782333,45814370],"gGcLoe":false,"ksKYzf":"%.@.false,true,false,false,null,false,null,"",null,[["RelayState","SAMLRequest","SigAlg","Signature","TL","af","alwf","btmpl","c","cbflow","cd","checkConnection","checkedDomains","client_id","continue","cpbps","dsh","emr","faa","flowEntry","flowName","followup","forceOsidOriginForTest","gae","go","hd","hide_status_bar","hl","idvToken","ifkv","ifr","ignoreShadow","kdi","kid_continue","ltmpl","marl","migrate","multilogin","next","oauth","osid","pageId","passwdsession","platform_variant","pstMsg","rart","rip","rm","sarp","scc","scope","secure","sendvemail","service","session","skipShadow","skipvpage","source","ss","ss_mode","sspa","t","target","theme","ul"]],null,null,[],[[null,null,"https://accounts.google.com/AccountChooser?continue\u003dhttps://www.google.com/maps/place/Hauges+gate+66,+3019+Drammen/@59.7476727,10.1924839,17z/data=!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\u0026hl\u003dno"],null,[null,null,"https://accounts.google.com/signin/recovery?continue\u003dhttps://www.google.com/maps/place/Hauges+gate+66,+3019+Drammen/@59.7476727,10.1924839,17z/data=!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\u0026hl\u003dno"],[null,null,"https://accounts.google.com/restart?continue\u003dhttps://www.google.com/maps/place/Hauges+gate+66,+3019+Drammen/@59.7476727,10.1924839,17z/data=!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\u0026hl\u003dno"]],null,false,[]]","nQyAE":{"vEMF5e":"false","LlkYkf":"false","WBBR0d":"true","ViN5Xd":"false","p6p11":"true","EoymAc":"false","FbHgvb":"false","P1ceCf":"false","tBSlob":"false","XqMd3":"false","Ee3RQb":"false","a76Enc":"false","G25Msb":"false","VL5wad":"false","XSgnJf":"false","APYQvd":"false","rONKDd":"false","Dpi3Gf":"false","lttELb":"false","YDrknb":"false","kQrwQd":"false","ihOA2":"false","Ozjmee":"false"},"qwAQke":"ConsentUi","qymVe":"YNnh99mAz0-TXbG5yo-NTKQ-0l4","rtQCxc":-60,"thykhd":"AKH95euIF6P3sAglrRifxgEcfcB9vtI_XXJFaZ5Oty53KFviM2VkkPyRfmncVF-cgLGCK0LYtfquTSFWlA3cjJWXXNjjUICQ5v3brxMomscQEgs9760XyDgPgf0u003d","unNRMb":"AKJVzcpKsW1gHA6-l3WGdp1MQsU_eTjL-Ufc5L6-PjxIndj8D2t6PFy3SRwl7yc0EXpFbqSSCh50","vAyiz":"ChUI/o793Lf0vtdFEK6ek/ubmfGdvgEu003d","w2btAe":"%.@.null,null,"",false,null,null,true,false]","zChJod":"%.@.]"}; lt;/scriptgt;
Редактировать: Я также могу использовать soup.head.script.text
для получения содержимого в виде строки, но хотел бы получить их в виде словаря и более питоническим способом.
Ответ №1:
Вы можете перейти re
к поиску переменной, извлекая ее значение, а затем json.loads()
получить к ней доступ, как к dictionary
:
json.loads(re.search(r'window.WIZ_global_data = ({.*})', html).group(1))
Пример
html=r''' lt;!DOCTYPE htmlgt; lt;html dir="ltr" lang="no"gt; lt;headgt; lt;base href="https://consent.google.com/"/gt; lt;meta content="origin" name="referrer"/gt; lt;link href="https://consent.google.com/m" rel="canonical"/gt; lt;meta content="initial-scale=1,minimum-scale=1,maximum-scale=5,width=device-width" name="viewport"/gt; lt;link href="//www.google.com/favicon.ico" rel="shortcut icon"/gt; lt;script data-id="_gd" nonce="QI9 XeJ9TwHcCspiFyqIIQ"gt; window.WIZ_global_data = {"DndLYb":"","DpimGf":false,"EP1ykd":["/_/*"],"FdrFJe":"2318287307032857584","GVlsxf":"www.google.com","Im6cmf":"/_/ConsentUi","LVIXXb":1,"LoQv7e":false,"MT7f9b":[],"Mypbod":"https://www.googleapis.com/reauth","PYFuDc":"DUMMY_X_CLIENT_DATA_WIZ_GLOBAL_KEY_DO_NOT_USE","QrtxK":"","R6pIad":"%.@.]","S06Grb":"","TTHqvb":"https://kidsmanagement-pa.googleapis.com","Yllh3e":"%.@.1637316536530302,170695567,503968613]","cfb2h":"boq_identityfrontenduiserver_20211111.08_p0","eNnkwf":"1637316536","ejMLCd":"DUMMY_X_GEO_WIZ_GLOBAL_KEY_DO_NOT_USE","eptZe":"/_/ConsentUi/","fPDxwd":[1763433,1772879,1782333,45814370],"gGcLoe":false,"ksKYzf":"%.@.false,true,false,false,null,false,null,"",null,[["RelayState","SAMLRequest","SigAlg","Signature","TL","af","alwf","btmpl","c","cbflow","cd","checkConnection","checkedDomains","client_id","continue","cpbps","dsh","emr","faa","flowEntry","flowName","followup","forceOsidOriginForTest","gae","go","hd","hide_status_bar","hl","idvToken","ifkv","ifr","ignoreShadow","kdi","kid_continue","ltmpl","marl","migrate","multilogin","next","oauth","osid","pageId","passwdsession","platform_variant","pstMsg","rart","rip","rm","sarp","scc","scope","secure","sendvemail","service","session","skipShadow","skipvpage","source","ss","ss_mode","sspa","t","target","theme","ul"]],null,null,[],[[null,null,"https://accounts.google.com/AccountChooser?continue\u003dhttps://www.google.com/maps/place/Hauges+gate+66,+3019+Drammen/@59.7476727,10.1924839,17z/data=!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\u0026hl\u003dno"],null,[null,null,"https://accounts.google.com/signin/recovery?continue\u003dhttps://www.google.com/maps/place/Hauges+gate+66,+3019+Drammen/@59.7476727,10.1924839,17z/data=!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\u0026hl\u003dno"],[null,null,"https://accounts.google.com/restart?continue\u003dhttps://www.google.com/maps/place/Hauges+gate+66,+3019+Drammen/@59.7476727,10.1924839,17z/data=!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\u0026hl\u003dno"]],null,false,[]]","nQyAE":{"vEMF5e":"false","LlkYkf":"false","WBBR0d":"true","ViN5Xd":"false","p6p11":"true","EoymAc":"false","FbHgvb":"false","P1ceCf":"false","tBSlob":"false","XqMd3":"false","Ee3RQb":"false","a76Enc":"false","G25Msb":"false","VL5wad":"false","XSgnJf":"false","APYQvd":"false","rONKDd":"false","Dpi3Gf":"false","lttELb":"false","YDrknb":"false","kQrwQd":"false","ihOA2":"false","Ozjmee":"false"},"qwAQke":"ConsentUi","qymVe":"YNnh99mAz0-TXbG5yo-NTKQ-0l4","rtQCxc":-60,"thykhd":"AKH95euIF6P3sAglrRifxgEcfcB9vtI_XXJFaZ5Oty53KFviM2VkkPyRfmncVF-cgLGCK0LYtfquTSFWlA3cjJWXXNjjUICQ5v3brxMomscQEgs9760XyDgPgf0u003d","unNRMb":"AKJVzcpKsW1gHA6-l3WGdp1MQsU_eTjL-Ufc5L6-PjxIndj8D2t6PFy3SRwl7yc0EXpFbqSSCh50","vAyiz":"ChUI/o793Lf0vtdFEK6ek/ubmfGdvgEu003d","w2btAe":"%.@.null,null,"",false,null,null,true,false]","zChJod":"%.@.]"}; lt;/scriptgt; ''' import json,re json.loads(re.search(r'window.WIZ_global_data = ({.*})', html).group(1))