HTTP 请求库

requests 是 Python 中最流行的 HTTP 请求库,简洁易用。

0x01. 安装

pip install requests

0x02. 基本请求

GET 请求

import requests

# 基本 GET 请求
response = requests.get('https://httpbin.org/get')
print(response.status_code)  # 200
print(response.text)         # 响应文本
print(response.json())       # JSON 响应

# 带参数的 GET 请求
params = {'key1': 'value1', 'key2': 'value2'}
response = requests.get('https://httpbin.org/get', params=params)
print(response.url)  # 完整 URL

# 带请求头
headers = {'User-Agent': 'MyApp/1.0'}
response = requests.get('https://httpbin.org/get', headers=headers)

POST 请求

import requests

# 发送表单数据
data = {'username': 'user', 'password': 'pass'}
response = requests.post('https://httpbin.org/post', data=data)

# 发送 JSON 数据
json_data = {'name': 'Alice', 'age': 25}
response = requests.post('https://httpbin.org/post', json=json_data)

# 发送文件
files = {'file': open('report.csv', 'rb')}
response = requests.post('https://httpbin.org/post', files=files)

其他请求方法

import requests

# PUT 请求
response = requests.put('https://httpbin.org/put', data={'key': 'value'})

# DELETE 请求
response = requests.delete('https://httpbin.org/delete')

# HEAD 请求
response = requests.head('https://httpbin.org/get')

# OPTIONS 请求
response = requests.options('https://httpbin.org/get')

# PATCH 请求
response = requests.patch('https://httpbin.org/patch', data={'key': 'value'})

0x03. 响应处理

响应属性

import requests

response = requests.get('https://httpbin.org/get')

# 状态码
print(response.status_code)  # 200
print(response.ok)           # True (200-299)
print(response.reason)       # OK

# 响应头
print(response.headers)
print(response.headers['Content-Type'])

# 响应内容
print(response.text)         # 文本内容
print(response.content)      # 二进制内容
print(response.json())       # JSON 内容

# 编码
print(response.encoding)     # UTF-8
response.encoding = 'utf-8'

# URL
print(response.url)          # 请求的 URL
print(response.history)      # 重定向历史

# Cookies
print(response.cookies)

异常处理

import requests
from requests.exceptions import (
    HTTPError,
    ConnectionError,
    Timeout,
    RequestException
)

try:
    response = requests.get('https://httpbin.org/status/404')
    response.raise_for_status()  # 抛出 HTTPError
except HTTPError as e:
    print(f'HTTP 错误: {e}')
except ConnectionError:
    print('连接错误')
except Timeout:
    print('请求超时')
except RequestException as e:
    print(f'请求错误: {e}')

0x04. 高级功能

会话

import requests

# 使用会话保持连接和 cookies
session = requests.Session()

# 设置会话级别的参数
session.headers.update({'User-Agent': 'MyApp/1.0'})
session.auth = ('user', 'pass')

# 发送请求
response1 = session.get('https://httpbin.org/cookies/set/session_id/12345')
response2 = session.get('https://httpbin.org/cookies')
print(response2.json())  # cookies 被保留

# 关闭会话
session.close()

# 或使用上下文管理器
with requests.Session() as session:
    response = session.get('https://httpbin.org/get')

认证

import requests
from requests.auth import HTTPBasicAuth, HTTPDigestAuth

# 基本认证
response = requests.get(
    'https://httpbin.org/basic-auth/user/pass',
    auth=HTTPBasicAuth('user', 'pass')
)

# 简写形式
response = requests.get(
    'https://httpbin.org/basic-auth/user/pass',
    auth=('user', 'pass')
)

# Digest 认证
response = requests.get(
    'https://httpbin.org/digest-auth/auth/user/pass',
    auth=HTTPDigestAuth('user', 'pass')
)

# 自定义认证
from requests.auth import AuthBase

class TokenAuth(AuthBase):
    def __init__(self, token):
        self.token = token

    def __call__(self, r):
        r.headers['Authorization'] = f'Bearer {self.token}'
        return r

response = requests.get(
    'https://api.example.com/data',
    auth=TokenAuth('my_token')
)

超时和重试

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# 设置超时
response = requests.get('https://httpbin.org/delay/5', timeout=3)  # 3秒超时

# 分别设置连接和读取超时
response = requests.get(
    'https://httpbin.org/get',
    timeout=(3.05, 27)  # 连接超时3.05秒,读取超时27秒
)

# 自动重试
session = requests.Session()
retry = Retry(
    total=3,              # 总重试次数
    backoff_factor=1,     # 重试间隔
    status_forcelist=[500, 502, 503, 504]  # 需要重试的状态码
)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)

response = session.get('https://httpbin.org/status/500')

代理

import requests

# 设置代理
proxies = {
    'http': 'http://10.10.1.10:3128',
    'https': 'http://10.10.1.10:1080',
}

response = requests.get('https://httpbin.org/ip', proxies=proxies)

# 带认证的代理
proxies = {
    'http': 'http://user:pass@10.10.1.10:3128',
}

# 使用 SOCKS 代理(需要 pip install requests[socks])
proxies = {
    'http': 'socks5://user:pass@host:port',
    'https': 'socks5://user:pass@host:port',
}

SSL 证书验证

import requests

# 禁用证书验证(不推荐)
response = requests.get('https://httpbin.org/get', verify=False)

# 使用自定义 CA 证书
response = requests.get('https://httpbin.org/get', verify='/path/to/certfile')

# 客户端证书
response = requests.get(
    'https://httpbin.org/get',
    cert=('/path/client.cert', '/path/client.key')
)

0x05. 实际应用

封装请求工具

import requests
from typing import Optional, Dict, Any
from dataclasses import dataclass

@dataclass
class APIResponse:
    success: bool
    status_code: int
    data: Any = None
    error: Optional[str] = None

class APIClient:
    def __init__(self, base_url: str, timeout: int = 30):
        self.base_url = base_url.rstrip('/')
        self.timeout = timeout
        self.session = requests.Session()
        self.session.headers.update({
            'Content-Type': 'application/json',
            'Accept': 'application/json'
        })

    def get(self, endpoint: str, params: Dict = None) -> APIResponse:
        try:
            response = self.session.get(
                f'{self.base_url}/{endpoint.lstrip("/")}',
                params=params,
                timeout=self.timeout
            )
            response.raise_for_status()
            return APIResponse(
                success=True,
                status_code=response.status_code,
                data=response.json()
            )
        except requests.exceptions.RequestException as e:
            return APIResponse(
                success=False,
                status_code=getattr(e.response, 'status_code', 0),
                error=str(e)
            )

    def post(self, endpoint: str, data: Dict = None) -> APIResponse:
        try:
            response = self.session.post(
                f'{self.base_url}/{endpoint.lstrip("/")}',
                json=data,
                timeout=self.timeout
            )
            response.raise_for_status()
            return APIResponse(
                success=True,
                status_code=response.status_code,
                data=response.json()
            )
        except requests.exceptions.RequestException as e:
            return APIResponse(
                success=False,
                status_code=getattr(e.response, 'status_code', 0),
                error=str(e)
            )

    def set_auth_token(self, token: str):
        self.session.headers['Authorization'] = f'Bearer {token}'

# 使用
client = APIClient('https://api.example.com')
response = client.get('/users')
if response.success:
    print(response.data)
else:
    print(f'错误: {response.error}')

文件下载

import requests
from pathlib import Path

def download_file(url: str, save_path: str, chunk_size: int = 8192):
    """下载文件并显示进度"""
    response = requests.get(url, stream=True)
    response.raise_for_status()

    total_size = int(response.headers.get('content-length', 0))
    downloaded = 0

    with open(save_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=chunk_size):
            if chunk:
                f.write(chunk)
                downloaded += len(chunk)
                if total_size:
                    progress = (downloaded / total_size) * 100
                    print(f'\r下载进度: {progress:.1f}%', end='')

    print(f'\n文件已保存到: {save_path}')

# 使用
download_file(
    'https://example.com/file.zip',
    'downloads/file.zip'
)

并发请求

import asyncio
import aiohttp
from typing import List

async def fetch_url(session: aiohttp.ClientSession, url: str) -> str:
    """异步获取 URL 内容"""
    async with session.get(url) as response:
        return await response.text()

async def fetch_all(urls: List[str]) -> List[str]:
    """并发获取多个 URL"""
    async with aiohttp.ClientSession() as session:
        tasks = [fetch_url(session, url) for url in urls]
        return await asyncio.gather(*tasks)

# 使用
urls = [
    'https://httpbin.org/delay/1',
    'https://httpbin.org/delay/2',
    'https://httpbin.org/delay/3'
]

results = asyncio.run(fetch_all(urls))
print(f'获取了 {len(results)} 个页面')

参考

  1. Requests 官方文档
  2. Requests GitHub
  3. aiohttp 文档