嘘~ 正在从服务器偷取页面 . . .

urllib补充

python

发布日期: 2020-07-08

文章字数: 254

阅读时长: 1 分

阅读次数:

引入urllib包

import urllib.request
import urllib.parse

例子

# 将打开网页的所有数据给一个对象
response = urllib.request.urlopen("https://www.baidu.com/")
print(response) # <http.client.HTTPResponse object at 0x02E23BB0>
#读取对象里的信息
print(response.read())
# 解码将2进制文件解析
print(response.read().decode('utf-8'))

获取一个post请求

data = bytes(urllib.parse.urlencode({"hello":"world"}),encoding="utf-8") # data参数必须是一个字节文件（各种各样的信息）
response = urllib.request.urlopen("http://httpbin.org/post",data=data)  # post请求需要给一个data参数
print(response.read().decode('utf-8'))

获取一个get请求

response = urllib.request.urlopen("http://httpbin.org/get")
print(response.read().decode('utf-8'))

超时处理

try:
    response = urllib.request.urlopen("http://httpbin.org/get",timeout=0.01)
    print(response.read().decode('utf-8'))
except urllib.error.URLError as e:
    print("Time out!")

import urllib.request
import urllib.parse

url = "https://www.douban.com"
# 根据request header 添加信息
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
}
# 请求对象
req = urllib.request.Request(url=url, headers=headers)
# 发出响应
response = urllib.request.urlopen(req)
print(response.read().decode("utf-8"))