For example, we need to implement a Baidu stock to query the current price and market value of the stock
We query the current price and market value of this stock "600754"
The implementation steps are as follows:
1 、Import the request library, and also need to import the lxml library (used later)
import re
#
Import the re library
from lxml
import etree
#
Import the lxml library (without this library, pip install lxml install)
2 , Construct the request data, Baidu search initiates the request data more
cookies ={
'
BIDUPSID
':
'
90EF3BD78F53BC8C96DF84CD3854CA2D
'
,
'
PSTM
':
'
1578233930
'
,
'
BD _ UPN
':
'
12314753
'
,
'
BAIDUID
':
'
885754C8E6BD7B1A771802631815CC6D:FG=1
'
,
'
BDORZ
':
'
B490B5EBF6F3CD402E515D22BCDA1598
'
,
'
BDUSS
':
'
mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpcWtabHRlSVFBACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2
'
7 _ 0 _ 5 _ 3 _ 11 _ 3 _ 0 _ 0 _ 4 _ 2 _ 1 _ 0 _ 73199 _ 0 _ 169 _ 0 _ 1580456363 _ 0 _ 1580456194%7C9%23622712 _ 32 _ 1580376248%7C6
'
,
'
cflag
':
'
13%3A3
'
,
'
BD _ HOME
':
'
1
'
,
'
BDRCVFR[ feWj1Vr5u3D ]
':
'
I67x6TjHwwYf0
'
,
'
delPer
':
'
0
'
,
'
BD _ CK _ SAM
':
'
1
'
,
'
PSINO
':
'
3
'
,
'
H _ PS _ PSSID
':
'
1438 _ 21104 _ 26350
'
,
'
H _ PS _ 645EC
':
'
29b8ZVy4WP7OUTz6%2FjeON9Iex%2FPZmThFknleY0LwzNQZ8j8 span>
'
,
'
BDSVRTM
':
'
121
'
,
'
WWW _ ST
':
'
1580466352318
'
,}headers =
{
'
is _ xhr
':
'
1
'
,
'
Accept-Encoding
':
'
gzip, deflate, br
'
,
'
Accept-Language
':
'
zh-CN,zh;q=0.9
'
,
'
User-Agent
':
'
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36
'
,
'
is _ pbs
':
'
600754
'
,
'
Accept
':
'
*/*
'
,
'
Referer
':
'
%2FjeON9Iex%2FPZmThFknleY0LwzNQZ8j8&rsv _ pq=b379448d00013935
'
,
'
X-Re-With
':
'
XMLHttpRequest
'
,
'
Connection
' :
'
keep-alive
'
,
'
is _ referer
':
'
'
,}params =
(((
'
ie
', [
'
utf-8
',
'
utf-8
'
]), (
'
newi
',
'
1
'
), (
'
mod
',
'
1
'
), (
'
isbd
',
'
1
'
), (
'
isid
',
'
b379448d00013935
'
), (
'
wd
',
'
600754
'
), (
'
rsv _ spt
',
'
1
'
) , (
'
rsv _ iqid
',
'
0xa5a17c8700013159
'
), (
'
issp
',
'
1
'
), (
'
f
',
'
8
'
), (
'
rsv _ bp
',
'
1
'
), (
'
rsv _ idx
',
'
2
'
), (
'
rqlang
',
'
cn
'
) , (
'
tn
',
'
baiduhome _ pg
'
), (
'
rsv _ enter
',
'
0
'
), (
'
rsv _ dl
' ,
'
tb
'
), (
'
oq
',
'
600754
'
), (
'
rsv _ t
',
'
29b8ZVy4WP7OUTz6/jeON9Iex/PZmThFknleY0LwzNQZ8j8
'
), (
'
rsv _ pq
',
'
b379448d00013935
'
), (
'
bs
',
'
600754
'
), (
'
rsv _ sid
',
'
1438 _ 21104 _ 26350
'
), (
'
_ ss
',
'
1
'
), (
'
clist
',
''
), (
'
hsug
',
''
), (
'
f4s
',
'
1
'
), (
'
csor
',
'
6
'
), (
'
_ cr1
',
'
29647
'
),)
3. Initiate a request, and the result is converted to text (after analyzing the returned data, you need to read the text content)
response = re.get('
', headers=headers, params=params, cookies=cookies).text
4. Analysis results (Xpath crawler used to analyze data) specific usage blog There are other articles explaining:
html =etree.HTML(response)a = html.xpath(
'
//span[ @class = "op -stockdynamic-moretab-cur-num c-gap-right-small" ]/text()
'
)
print(
'
Current price:
',a[ 0 ])
#
Current priceb = html.xpath(
'
//ul[ @class = "op-stockdynamic-moretab-info" ]/li[ 8 ]/span[ 2 ]/text()
'
)
print(
'
Current market value:
',b[ 0 ])
#
Current market value
For quick access to request header information, please refer to this article:
All code
#
This script Realize, specify the stock code Baidu to query the market value and current stock price
import re
#
into the re database
from
import re
#
span> lxml
import etree
#
Import the lxml library (without this library, pip install lxml install)
cookies =
{
'
BIDUPSID
':
'
90EF3BD78F53BC8C96DF84CD3854CA2D
'
,
'
PSTM
':
'
1578233930
'
,
'
BD _ UPN
':
'
12314753
'
,
'
BAIDUID
':
'
885754C8E6BD7B1A771802631815CC6D:FG=1
'
,
'
BDORZ
':
'
B490B5EBF6F3CD402E515D22BCDA1598
'
,
'
BDUSS
':
'
mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpcWtabHRlSVFBACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAA ': ' 7 _ 0 _ 5 _ 3 _ 11 _ 3 _ 0 _ 0 _ 4 _ 2 _ 1 _ 0 _ 73199 _ 0 _ 169 _ 0 _ 1580456363 _ 0 _ 1580456194%7C9%23622712 _ 32 _ 1580376248%7C6 ' , ' cflag ': ' 13%3A3 ' , ' BD _ HOME ': ' 1 ' , ' BDRCVFR[ feWj1Vr5u3D ] ': ' I67x6TjHwwYf0 ' , ' delPer ': ' 0 ' , ' BD _ CK _ SAM ': ' 1 ' , ' PSINO ': ' 3 ' , ' H _ PS _ PSSID ': ' 1438 _ 21104 _ 26350 ' , ' H _ PS _ 645EC ' : ' 29b8ZVy4WP7OUTz6%2FjeON9Iex%2FPZmThFknleY0LwzNQZ8j8 ' , ' BDSVRTM ': ' 121 ' , ' WWW _ ST ': ' 1580466352318 ' ,}headers = { ' is _ xhr ': ' 1 ' , ' Accept-Encoding ': ' gzip, deflate, br ' , ' Accept-Language ': ' zh-CN,zh;q= 0.9 ' , ' User-Agent ': ' Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 ' , ' is _ pbs ': ' 600754 ' , ' Accept ': ' */* ' , ' Referer ': ' %2FjeON9Iex%2FPZmThFknleY0LwzNQZ8j8&rsv _ pq=b379448d00013935 ' , ' X-Re-With ': ' XMLHttpRequest ' , ' Connection ': ' keep-alive ' , ' is _ referer ': ' ' ,}params = ((( ' ie ', [ ' utf-8 ', ' utf-8 ' ]), ( ' newi ', ' 1 ' ), ( ' mod ', ' 1 ' ), ( ' isbd ', ' 1 ' ), ( ' isid ', ' b379448d00013935 ' ), ( ' wd ', ' 600754 ' ), ( ' rsv _ spt ', ' 1 ' ), ( ' rsv _ iqid ', ' 0xa5a17c8700013159 ' ), ( ' issp ', ' 1 ' ), ( ' f ', ' 8 ' ), ( ' rsv _ bp ', ' 1 ' ), ( ' rsv _ idx ', ' 2 ' ), ( ' rqlang ', ' cn ' ), ( ' tn ', ' baiduhome _ pg ' ), ( ' rsv _ enter ', ' 0 ' ), ( ' rsv _ dl ', ' tb ' ), ( ' oq ', ' 600754 ' ), ( ' rsv _ t ', ' 29b8ZVy4WP7OUTz6/jeON9Iex/PZmThFknleY0LwzNQZ8j8 ' ), ( ' rsv _ pq ', ' b379448d00013935 ' ), ( ' bs ', ' 600754 ' ), ( ' rsv _ sid ', ' 1438 _ 21104 _ 26350 ' ), ( ' _ ss ' , ' 1 ' ), ( ' clist ', '' ), ( ' hsug ', '' ), ( ' f4s ', ' 1 ' ), ( ' csor ', ' 6 ' ), ( ' _ cr1 ', ' 29647 ' ), )response = re.get( ' ', headers=headers, params=params, cookies= cookies) .texthtml = etree.HTML(response)a = html.xpath( ' //span[ @class = "op-stockdynamic-moretab-cur-num c-gap-right-small" ]/text() ' ) print( ' Current price: ',a[ 0 ]) # Current priceb = html.xpath( ' //ul[ @class = "op-stockdynamic-moretab-info"] /li[ 8 ]/span[ 2 ]/text() ' ) print( ' Current market value: ',b[ 0 ]) # Current market value Article Url:https://www.liaochihuo.com/info/597248.html Label group:[python] [python crawler] [xpath] [lxml]