#python #web-scraping #beautifulsoup #python-requests
Вопрос:
Я выполняю следующий код для входа по назначенному URL loginUrl
-адресу . После аутентификации я хочу перейти на другую веб-страницу, на которой хранится ее URL portfolioUrl
-адрес . Однако, когда я print(portfolioPage.content)
, он печатает веб-страницу сразу после входа в систему, но не portfolioPage
то, что я хочу. Что не так с моим кодом?
from bs4 import BeautifulSoup
import requests
# create session
session = requests.Session()
loginUrl='https://www.investopedia.com/auth/realms/investopedia/protocol/openid-connect/auth?client_id=inv-simulatoramp;redirect_uri=https://www.investopedia.com/auth/realms/investopedia/shopify-auth/inv-simulator/login?&redirectUrl=https%3A%2F%2Fwww.investopedia.com%2Fauth%2Frealms%2Finvestopedia%2Fprotocol%2Fopenid-connect%2Fauth%3Fresponse_type%3Dcode%26approval_prompt%3Dauto%26redirect_uri%3Dhttps%253A%252F%252Fwww.investopedia.com%252Fsimulator%252Fhome.aspx%26client_id%3Dinv-simulator-confamp;state=7edda3b2-eb6a-441f-8589-b42b8b78accfamp;response_mode=fragmentamp;response_type=codeamp;scope=openidamp;nonce=cd558670-7ae3-4c14-8281-bc149d4987b3'
portfolioUrl = 'https://www.investopedia.com/simulator/trade/tradestock.aspx'
payload = {
'username': 'my email',
'password': 'my password'
}
authPage = session.get(loginUrl)
soup = BeautifulSoup(authPage.content, 'html.parser')
form = soup.find('form')
postUrl = form['action']
auth = session.post(postUrl, data=payload)
portfolioPage = session.get(portfolioUrl)
soup = BeautifulSoup(portfolioPage.content, 'html.parser')
print(portfolioPage.content)
Редактировать: ответ t4kq работает отлично; однако, когда я print(page.text)
, он не выводит HTML-код страницы, как ожидалось, а вместо этого выводит этот код:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" version="XHTML RDFa 1.0" dir="ltr">
<head profile="http://www.w3.org/1999/xhtml/vocab">
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="application-name" content="Investopedia"/>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="X-UA-Compatible" content="IE=9">
<!-- Page Taxonomy -->
<script type="text/javascript">
//<![CDATA[
var _pageTaxonomy = {
"Hashkey": "$simulator$trade$tradestock",
"Channel": "Simulator",
"SubChannel": "",
"Advertising": "Investing",
"SubAdvertising": "Simulator",
"AdTarget": "investopedia.com/simulator",
"DfpTarget": "Investing/Investing",
"Tags": null,
"Type": "Simulator",
"Lucrativeness": null,
"Timelessness": "Timeless",
"Feature": "",
"Design": "",
"InterestLevel": null,
"Path" : "/simulator/trade/tradestock.aspx",
};
//]]>
</script>
<!-- End Page Taxonomy -->
<script language="javascript" type="text/javascript">var idc_slots = {};
idc_slots.slots = ["AdSlot_AF-Top-Leaderboard","AdSlot_AF-Left-Multi","AdSlot_BF-Right-Button1","AdSlot_BF-Right-Button2","AdSlot_BF-Right-Button3","AdSlot_BF-Right-Button4"];
idc_slots.build = function(slot) {
return "/479/INV-NA/Investing/Investing/position/Simulator".replace("position", slot.position);
};</script><script type="text/javascript">
idc_slots.slots.push({
"AdSlot_AF-Top-Leaderboard" : {
sizeMappings: [
{
viewportSize: [1000, 1],
slotSizes: [[728, 90], [970, 90], [950, 90], [960, 90], [970, 66], [980, 90],"fluid"]
},
{
viewportSize: [700, 1],
slotSizes: [[728, 90], [468, 60]]
},
{
viewportSize: [400, 1],
slotSizes: [468, 60]
},
{
viewportSize: [0, 0],
slotSizes: [[320, 50], [320, 100]]
}
],
amzSizes : {
desktop: [[728 ,90]],
tablet: [[728 ,90]],
phone: [[728 ,90], [320,50]]
}
}});
</script>
<title>Investopedia Stock Simulator - Investopedia Stock Simulator - Trade a Stock</title><meta name="Description" content="Fantasyamp;#x20;stockamp;#x20;marketamp;#x20;gameamp;#x20;thatamp;#x20;simulatesamp;#x20;tradingamp;#x20;stocksamp;#x20;andamp;#x20;options.">
<meta name="viewport" content="widthamp;#x3D;device-width,amp;#x20;initial-scaleamp;#x3D;1">
<meta name="Description" content="Fantasyamp;#x20;stockamp;#x20;marketamp;#x20;gameamp;#x20;thatamp;#x20;simulatesamp;#x20;tradingamp;#x20;stocksamp;#x20;andamp;#x20;options.">
<meta name="viewport" content="widthamp;#x3D;device-width,amp;#x20;initial-scaleamp;#x3D;1"> <link rel="canonical" href="https://www.investopedia.com/simulator/trade/tradestock.aspx" />
<link href="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;publicamp;#x2F;imgamp;#x2F;favicon.ico" rel="shortcutamp;#x20;icon" type="imageamp;#x2F;vnd.microsoft.icon">
<link href="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;destamp;#x2F;cssamp;#x2F;simulator.cssamp;#x3F;vamp;#x3D;202102030915" media="screen" rel="stylesheet" type="textamp;#x2F;css">
<link href="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;publicamp;#x2F;imgamp;#x2F;favicon.ico" rel="shortcutamp;#x20;icon" type="imageamp;#x2F;vnd.microsoft.icon"><script language="javascript" type="text/javascript">
var googletag = googletag || {};
googletag.cmd = googletag.cmd || [];
</script><script language="javascript" type="text/javascript">
var sem_pageview = false;
var sem_ocode = '9999';
var sem_ldid = '';
var sem_sh = '';
function updateSemVariable(query) {
if (query[1] === undefined) {
return;
}
switch(query[0]) {
case 'o':
sem_ocode = query[1];
break;
case 'ldid':
sem_ldid = query[1];
break;
case 'sh':
sem_sh = query[1];
break;
}
}
function getCookie(cname) {
var name = cname "=";
var ca = document.cookie.split(';');
for (var i = 0; i < ca.length; i ) {
var c = ca[i];
while (c.charAt(0) == ' ') c = c.substring(1);
if (c.indexOf(name) == 0) return c.substring(name.length,c.length);
}
return "";
}
function getSemCookie() {
var queryStr = getCookie('semuser');
if (queryStr == "") {
return;
}
sem_pageview = true;
var queries = queryStr.split("amp;");
for (var i = 0, l = queries.length; i < l; i ) {
var query = queries[i].split('=');
updateSemVariable(query);
}
}
getSemCookie();
var updateAup = function(aUp) {
aUp = aUp.replace("INV-NA", "invsem-serp-ds");
var utms = null;
if (typeof getUrlParam === "function") {
try {
utms = getUrlParam("utm_source");
} catch (e) {}
}
var aUp_arr = aUp.split("/");
var last = aUp_arr.pop();
aUp_arr.push((utms !== null ? utms : "dir")
"_" (typeof sem_ocode !== "undefined" ?
sem_ocode : 0));
if (aUp_arr.length > 3) {
aUp_arr[3] = last;
}
return aUp_arr.join("/");
};
if (typeof googletag !== "undefined") {
googletag.cmd.push( function() {
if ((typeof sem_pageview !== 'undefined') amp;amp; (sem_pageview == true)) {
var processArgs = function(arguments) {
if (typeof arguments === "object") {
for (var i = 0; i < arguments.length; i ) {
if (arguments[i].indexOf("479") > -1) {
arguments[i] = updateAup(arguments[i]);
break;
}
}
}
return arguments;
};
googletag.defineSlot = (function() {
var orig_func = googletag.defineSlot;
return function() {
return orig_func.apply(this, processArgs(arguments));
};
})();
googletag.defineOutOfPageSlot = (function() {
var orig_func = googletag.defineOutOfPageSlot;
return function() {
return orig_func.apply(this, processArgs(arguments));
};
})();
}
});
}
</script><script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;publicamp;#x2F;simulatoramp;#x2F;jsamp;#x2F;jquery.min.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;jsamp;#x2F;jquery.mcs.min.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;publicamp;#x2F;simulatoramp;#x2F;jsamp;#x2F;cookie.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;publicamp;#x2F;simulatoramp;#x2F;jsamp;#x2F;cookiemix.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;publicamp;#x2F;simulatoramp;#x2F;jsamp;#x2F;g.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;publicamp;#x2F;simulatoramp;#x2F;jsamp;#x2F;microsoftAjax.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;publicamp;#x2F;simulatoramp;#x2F;jsamp;#x2F;microsoftAjaxWebForms.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;simulator_uiamp;#x2F;jsamp;#x2F;ScrollingTicker.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;cdn.jsdelivr.netamp;#x2F;npmamp;#x2F;promise-polyfillamp;#x40;7amp;#x2F;distamp;#x2F;polyfill.min.js"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;destamp;#x2F;jsamp;#x2F;inv.min.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;distamp;#x2F;simulator.min.js"></script>
<script type="textamp;#x2F;javascript" src="httpsamp;#x3A;amp;#x2F;amp;#x2F;i.investopedia.comamp;#x2F;distamp;#x2F;gdpr.min.jsamp;#x3F;vamp;#x3D;202102030915"></script>
<script type="text/javascript">
eval(function(p,a,c,k,e,d){e=function(c){return c.toString(36)};if(!''.replace(/^/,String)){while(c--){d[c.toString(a)]=k[c]||c.toString(a)}k=[function(e){return d[e]}];e=function(){return'\w '};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\b' e(c) '\b','g'),k[c])}}return p}('7 2(9){o d(9)}a 0={4:'',3:'e',6:'',5:''};a 8=f.c({h:2('i='),1:{g:2('j='),k:2('m')}});8.n(7(1){0.4=1['4']||0.4;0.3=1['3']||0.3;0.6=1['b']||0.6;0.5=1['l']||0.5});',25,25,'geoData|data|decode|country_code|city|FIN_zip|FIN_state|function|jqXHR|encoded|var|region_code|ajax|atob|FR|jQuery|access_key|url|aHR0cHM6Ly9hcGkuaXBzdGFjay5jb20vY2hlY2s|MTBlZjJlYjI2NzFhNjQ5MTQ5NDk1ODZjMzExMDdiYWQ|fields|zip|Y2l0eSxjb3VudHJ5X2NvZGUscmVnaW9uX2NvZGUsemlw|done|return'.split('|'),0,{}))
</script>
<script type="text/javascript">
(function(d) {
var e = d.createElement('script');
e.src = d.location.protocol '//tag.bounceexchange.com/2320/i.js';
e.async = true;
d.getElementsByTagName("head")[0].appendChild(e);
}(document));
</script>
</head>
<!--shift_source: 4824cfbe9ef0-->
<body class="simulator-page" onunload="SaveTickerPos();">
<div style="display: none;">
<!-- Start of DoubleClick Spotlight Tag: Please do not remove -->
<!-- Activity Name for this tag is:IP Simulator -->
<!-- Web site URL where tag should be placed: http://www.investopedia.com/simulator -->
<!-- This tag must be placed within the opening <body> tag, as close to the beginning of it as possible -->
<!-- Creation Date: Thu Jul 02 17:02:35 EDT 2009 -->
<script language="JavaScript">
function SaveTickerPos()
{
try
{
for (var obj in allTickers){
allTickers[obj].paused = true;
jQuery.cookie(allTickers[obj].cookieName, allTickers[obj].x, {path: '/'});
}
}
catch(e){}
}
var axel = Math.random() "";
var a = axel * 10000000000000;
document.write('<img src="https://ad.doubleclick.net/activity;src=2359949;type=ips;cat=ips;ord=1;num=' a '?" width=1 height=1 border=0>');
</script>
<noscript>
<img src="https://ad.doubleclick.net/activity;src=2359949;type=ips;cat=ips;ord=1;num=1?" width=1 height=1 border=0>
</noscript>
<!-- End of DoubleClick Spotlight Tag: Please do not remove -->
<!-- Begin comScore Tag -->
<script type="text/javascript" language="javascript">
var _comscore = _comscore || [];
_comscore.push({ c1: "2", c2: "18280457", c4: "https://www.investopedia.com/simulator/trade/tradestock.aspx" });
(function() {
var s = document.createElement("script"), el = document.getElementsByTagName("script")[0]; s.async = true;
s.src = (document.location.protocol == "https:" ? "https://sb" : "http://b") ".scorecardresearch.com/beacon.js";
el.parentNode.insertBefore(s, el);
})();
</script>
<noscript>
<img src="https://sb.scorecardresearch.com/p?c1=2amp;c2=18280457amp;c4=https://www.investopedia.com/simulator/trade/tradestock.aspxamp;cv=2.0amp;cj=1" />
</noscript>
<!-- End comScore Tag -->
</div>
<script type='text/javascript' language="JavaScript">
//<![CDATA[
if (getCookie('freenewsletterreg') == null) {
setCookie("freenewsletterreg", "ad", 30);
}
var user_info = $.parseJSON(decodeURIComponent(getCookie('user_info')).replace(/ /g, ' '));
//]]>
</script>
<!--<script type='text/javascript' src="https://www.investopedia.com/simulator/Common/VcidScript.ashx?u=e3bfd87f21d741578241089c9aa5f4c8"></script>-->
<!-- Google Tag Manager -->
<noscript>
<iframe src="//www.googletagmanager.com/ns.html?id=GTM-5V3WHJ"
height="0" width="0"
style="display:none;visibility:hidden"></iframe>
</noscript>
<script>(function (w, d, s, l, i) {
w[l] = w[l] || [];
w[l].push({'gtm.start': new Date().getTime(), event: 'gtm.js'});
var f = d.getElementsByTagName(s)[0],
j = d.createElement(s), dl = l != 'dataLayer' ? 'amp;l=' l : '';
j.async = true;
j.src =
'//www.googletagmanager.com/gtm.js?id=' i dl;
f.parentNode.insertBefore(j, f);
})(window, document, 'script', 'dataLayer', 'GTM-5V3WHJ');</script>
<!-- End Google Tag Manager -->
<script type="text/javascript">
dataLayer.push(_pageTaxonomy);
var pageviewID = genPageviewId();
dataLayer.push({'pageviewID' : pageviewID});
</script>
<!-- ================================= Header ================================= -->
<div id="Header">
<div class="mid">
<div class="brand clear layout-size">
<a href="//index.investopedia.com/"><div class="m-search-icon"><i></i></div></a>
<div class="logo-container">
<a href="/" class="logo"></a>
<div class="button-container">
<a class="button view-markets-btn inv-ga-link-tracking" href="/markets/" target="_blank" data-ga-label="blue-markets-cta">
View Markets
</a>
</div>
</div>
<div id="ctl00_AdLeaderBoard1_cgiAdTopLeaderboard" class="leader">
<div id='AdSlot_AF-Top-Leaderboard' adonis-marker></div>
</div>
</div>
</div>
</div>
<!-- ================================= Header //End ================================= -->
<!-- ================================= Content ================================= -->
<div id="Content" class="full">
<!-- ================================= Left Navigation ================================= -->
<div class="left-nav">
<div class="label">
Trade </div>
<ul>
<li class="">
<span></span>
<a href="https://www.i
Ответ №1:
Я не думаю, что вы размещаете свои данные правильно и не держите сеанс открытым после входа в систему. Попробуй это…
#using requests.Session() to close session automatically once done
with requests.Session() as login_request:
payload = {
'username': 'my email',
'password': 'my password'
}
login_request.post(loginUrl, data=payload)
#while logged in get the content of the portfolioUrl variable
source_code = login_request.get(portfolioUrl).content
#after this you can use soup to parse the source_code
soup = BeautifulSoup(source_code, 'html.parser')
print(soup) #to check if it's printing the logged in data
Комментарии:
1. Это просто выводит страницу входа в систему.
Ответ №2:
Вы можете попробовать это
import requests
from bs4 import BeautifulSoup
# create session
session = requests.Session()
url = 'https://investopedia.com/simulator/portfolio/'
payload = {
'username': 'your_email',
'password': 'your_password'
}
# get log in page
auth_page = session.get(url)
soup = BeautifulSoup(auth_page.content, 'html.parser')
# get form
form = soup.find('form')
# get post url
post_url = form['action']
# auth
session.post(post_url, data=payload)
# parse content
content_url = 'https://investopedia.com/simulator/trade/tradestock.aspx'
page = session.get(content_url)
page_soup = BeautifulSoup(page.content, 'html.parser')
# simulate page
sim_page = page_soup.find('div', {'class': 'sim-page'})
table = sim_page.find_all('table', {'class': 'table2'})[1]
rows = table.find_all('tr')
for row in rows:
print(row.find('th').text)
print(row.find('td').text)
print('----')
Value (USD)
$10,000.00
----
Buying Power
$10,000.00
----
Cash
$10,000.00
----
Комментарии:
1. Я знаю, что эта тема немного устарела, но у меня есть вопрос, если можно. Когда я печатаю(страница.текст), он не выводит ожидаемый HTML-код страницы, но вместо этого выводит другой странный код, который вы найдете в отредактированном сообщении. Почему это происходит?
2. @bassel2777 я не могу понять, что ты имеешь в виду
3. Я имею в виду, что я хочу напечатать HTML-код страницы, используя
print(page.text)
который должен печатать HTML-код страницы, но это не так!4. Это дает странный результат, который я привел в посте выше.
5. @bassel2777 значит, мой ответ тебя не устраивает?