Python HTTP & APIs: requests, httpx, and Real-World Patterns
Consume REST APIs with the requests library, handle auth, retries, and errors, then graduate to httpx for async HTTP — with real patterns from production integration code.
Why This Matters
Pipelines fetch data from third-party APIs. Automation scripts hit internal services. ETL tools pull from REST endpoints. requests is how Python talks to the world — it's one of the most-downloaded packages in all of PyPI.
1. Installing requests and httpx
pip install requests httpx2. The Five HTTP Verbs You Use Daily
import requests
BASE = "https://jsonplaceholder.typicode.com"
# GET — retrieve resource
resp = requests.get(f"{BASE}/posts/1")
# POST — create resource
resp = requests.post(f"{BASE}/posts", json={"title": "New Post", "body": "Content", "userId": 1})
# PUT — replace resource entirely
resp = requests.put(f"{BASE}/posts/1", json={"title": "Updated", "body": "New body", "userId": 1})
# PATCH — partial update
resp = requests.patch(f"{BASE}/posts/1", json={"title": "Just the title changed"})
# DELETE — remove resource
resp = requests.delete(f"{BASE}/posts/1")3. Inspecting the Response
resp = requests.get("https://api.github.com/users/octocat")
# status
resp.status_code # 200
resp.ok # True if 2xx
# body
resp.json() # parse JSON -> dict / list
resp.text # raw string
resp.content # raw bytes
# headers
resp.headers["content-type"] # "application/json; charset=utf-8"
resp.headers.get("x-ratelimit-remaining")
# request info
resp.url # final URL after redirects
resp.elapsed # timedelta of how long it took
resp.history # list of redirect responsesAlways check for errors
# raises requests.HTTPError for 4xx/5xx
resp.raise_for_status()
# or handle manually
if resp.status_code == 404:
return None
if resp.status_code == 429:
raise RateLimitError("Too many requests")
resp.raise_for_status()4. Query Parameters
# these are equivalent:
resp = requests.get(
"https://api.example.com/users",
params={"page": 2, "per_page": 50, "status": "active"},
)
# actual URL: https://api.example.com/users?page=2&per_page=50&status=active
# multi-value params
resp = requests.get(url, params={"tag": ["python", "api", "tutorial"]})5. Headers and Authentication
Static headers
headers = {
"Accept": "application/json",
"X-Client-Version": "1.0",
}
resp = requests.get(url, headers=headers)Bearer token auth (most common)
import os
token = os.environ["API_TOKEN"]
resp = requests.get(
url,
headers={"Authorization": f"Bearer {token}"},
)Basic auth
resp = requests.get(url, auth=("username", "password"))
# or
from requests.auth import HTTPBasicAuth
resp = requests.get(url, auth=HTTPBasicAuth("user", "pass"))API key in header
resp = requests.get(
url,
headers={"X-API-Key": os.environ["MY_API_KEY"]},
)6. Sessions — Reuse Connections and Headers
A Session persists settings across requests and reuses TCP connections (much faster for multiple calls).
import requests
import os
session = requests.Session()
session.headers.update({
"Authorization": f"Bearer {os.environ['API_TOKEN']}",
"Accept": "application/json",
"User-Agent": "my-pipeline/1.0",
})
session.timeout = 10 # seconds, applies to all requests
resp = session.get("https://api.example.com/users")
resp2 = session.post("https://api.example.com/users", json={"name": "Alice"})7. Timeouts
Always set a timeout. Without one, a slow server will hang your script forever.
# (connect_timeout, read_timeout) in seconds
resp = requests.get(url, timeout=(5, 30))
# single value applies to both
resp = requests.get(url, timeout=10)8. Handling Errors Properly
import requests
from requests.exceptions import (
ConnectionError,
Timeout,
HTTPError,
RequestException,
)
def get_user(user_id: int) -> dict | None:
try:
resp = requests.get(
f"https://api.example.com/users/{user_id}",
timeout=10,
)
resp.raise_for_status()
return resp.json()
except Timeout:
print(f"Request timed out for user {user_id}")
return None
except ConnectionError:
print("Could not connect to API")
return None
except HTTPError as e:
if e.response.status_code == 404:
return None
print(f"HTTP error: {e.response.status_code}")
raise
except RequestException as e:
print(f"Unexpected error: {e}")
raise9. Retry Logic with tenacity
pip install tenacityfrom tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import requests
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=1, max=10),
retry=retry_if_exception_type(requests.RequestException),
)
def fetch_with_retry(url: str) -> dict:
resp = requests.get(url, timeout=10)
resp.raise_for_status()
return resp.json()Manual retry with backoff
import time
def fetch_with_backoff(url: str, max_retries: int = 3) -> dict:
for attempt in range(max_retries):
try:
resp = requests.get(url, timeout=10)
resp.raise_for_status()
return resp.json()
except requests.RequestException as e:
if attempt == max_retries - 1:
raise
wait = 2 ** attempt # 1, 2, 4 seconds
print(f"Retry {attempt + 1} after {wait}s: {e}")
time.sleep(wait)10. Uploading Files and Multipart
# upload a file
with open("report.pdf", "rb") as f:
resp = requests.post(
"https://api.example.com/upload",
files={"file": f},
)
# with metadata
with open("photo.jpg", "rb") as f:
resp = requests.post(
url,
files={"image": ("photo.jpg", f, "image/jpeg")},
data={"description": "Profile photo"},
)11. Pagination Pattern
Most APIs paginate. Handle it with a generator:
from typing import Iterator
def get_all_users(base_url: str, token: str) -> Iterator[dict]:
session = requests.Session()
session.headers["Authorization"] = f"Bearer {token}"
page = 1
while True:
resp = session.get(
f"{base_url}/users",
params={"page": page, "per_page": 100},
timeout=15,
)
resp.raise_for_status()
data = resp.json()
users = data.get("users", data if isinstance(data, list) else [])
if not users:
break
yield from users
# check for "next" link or total pages
if not data.get("next_page"):
break
page += 1
for user in get_all_users("https://api.example.com", token):
print(user["email"])12. API Client Class Pattern
For reusable integrations, wrap the session in a class:
from __future__ import annotations
import os
from typing import Any
import requests
from requests import Response
class GitHubClient:
BASE_URL = "https://api.github.com"
def __init__(self, token: str | None = None) -> None:
self._session = requests.Session()
self._session.headers.update({
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
})
token = token or os.environ.get("GITHUB_TOKEN")
if token:
self._session.headers["Authorization"] = f"Bearer {token}"
def _get(self, path: str, **params: Any) -> Any:
resp = self._session.get(f"{self.BASE_URL}{path}", params=params, timeout=15)
resp.raise_for_status()
return resp.json()
def get_user(self, username: str) -> dict:
return self._get(f"/users/{username}")
def get_repos(self, username: str, per_page: int = 30) -> list[dict]:
return self._get(f"/users/{username}/repos", per_page=per_page)
def search_repos(self, query: str, sort: str = "stars") -> list[dict]:
result = self._get("/search/repositories", q=query, sort=sort)
return result["items"]
# Usage
gh = GitHubClient()
user = gh.get_user("octocat")
print(user["public_repos"])
repos = gh.search_repos("python fastapi", sort="stars")
for repo in repos[:5]:
print(repo["full_name"], repo["stargazers_count"])13. httpx — Modern Sync + Async HTTP
httpx is a near-drop-in replacement for requests with async support and HTTP/2.
Synchronous (same API as requests)
import httpx
with httpx.Client(timeout=10) as client:
resp = client.get("https://httpbin.org/json")
resp.raise_for_status()
print(resp.json())Async (the main reason to use httpx)
import asyncio
import httpx
async def fetch_many(urls: list[str]) -> list[dict]:
async with httpx.AsyncClient(timeout=15) as client:
tasks = [client.get(url) for url in urls]
responses = await asyncio.gather(*tasks)
return [r.json() for r in responses]
urls = [
"https://jsonplaceholder.typicode.com/posts/1",
"https://jsonplaceholder.typicode.com/posts/2",
"https://jsonplaceholder.typicode.com/posts/3",
]
results = asyncio.run(fetch_many(urls))
print(len(results)) # 3, fetched concurrently14. Mocking HTTP in Tests
pip install responses # for requests
pip install pytest-httpx # for httpximport responses
import requests
@responses.activate
def test_get_user_returns_name():
responses.add(
responses.GET,
"https://api.example.com/users/1",
json={"id": 1, "name": "Alice"},
status=200,
)
result = get_user(1)
assert result["name"] == "Alice"Exercises
Exercise 1: Write a fetch_weather(city: str) -> dict function using the Open-Meteo API (no auth required). Handle timeouts and HTTP errors.
Exercise 2: Build a GitLabClient class with get_project, list_issues, and create_issue methods using a session with a private token.
Exercise 3: Write a paginate(url, params, token) generator that yields all items from a paginated GitHub API endpoint using the Link header for pagination.
Summary
| Scenario | Tool |
|----------|------|
| Simple one-off requests | requests.get/post/... |
| Multiple requests to same host | requests.Session |
| Retry on failure | tenacity + requests |
| Async concurrent requests | httpx.AsyncClient |
| Reusable API wrapper | Client class with Session |
| Unit testing HTTP | responses / pytest-httpx |
Next: Pandas for data pipelines and in-memory data transformation.
Enjoyed this article?
Explore the Backend Systems learning path for more.
Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.