Python & FastAPI · Lesson 4 of 10
File Handling & Scripting: pathlib, argparse, and Typer
Files and Scripts Are the Foundation of Automation
Pipeline tools, data ingestion scripts, DevOps automation, report generators — all of them read files, write files, and need a command-line interface. This lesson covers the complete toolkit.
1. File I/O Basics
Reading a file
# always use `with` — it guarantees the file is closed
with open("data.txt", "r", encoding="utf-8") as f:
content = f.read() # read entire file as string
with open("data.txt", "r") as f:
lines = f.readlines() # list of lines including \n
with open("data.txt", "r") as f:
for line in f: # iterate line by line (memory-efficient)
print(line.strip())Writing a file
with open("output.txt", "w", encoding="utf-8") as f:
f.write("Hello\n")
f.write("World\n")
# append mode — doesn't truncate
with open("log.txt", "a") as f:
f.write("New log entry\n")
# write multiple lines at once
lines = ["line one\n", "line two\n", "line three\n"]
with open("output.txt", "w") as f:
f.writelines(lines)File modes
| Mode | Meaning |
|------|---------|
| "r" | Read (default) |
| "w" | Write — creates or truncates |
| "a" | Append — creates or appends |
| "x" | Exclusive create — fails if exists |
| "b" | Binary mode (add to above: "rb", "wb") |
| "+" | Read + write: "r+", "w+" |
2. pathlib — The Modern Way to Work with Paths
pathlib.Path replaces os.path with an object-oriented API that works identically on Windows, macOS, and Linux.
from pathlib import Path
# creating paths
base = Path("/home/user/projects")
config = base / "config" / "settings.json" # join with /
relative = Path("data/input.csv")
# properties
config.name # "settings.json"
config.stem # "settings"
config.suffix # ".json"
config.suffixes # [".json"]
config.parent # Path('/home/user/projects/config')
config.parts # ('/', 'home', 'user', 'projects', 'config', 'settings.json')
# checking existence
config.exists()
config.is_file()
config.is_dir()
# resolve to absolute path
relative.resolve() # Path('/current/working/dir/data/input.csv')Creating directories and files
output_dir = Path("output/reports")
output_dir.mkdir(parents=True, exist_ok=True) # create all intermediate dirs
# write / read
config.write_text('{"key": "value"}', encoding="utf-8")
content = config.read_text(encoding="utf-8")
config.read_bytes()
config.write_bytes(b"\x00\x01\x02")Globbing and listing
# find all Python files recursively
for path in Path("src").rglob("*.py"):
print(path)
# non-recursive
for path in Path("data").glob("*.csv"):
print(path)
# list directory contents
for item in Path(".").iterdir():
print(item, "dir" if item.is_dir() else "file")File operations
from pathlib import Path
import shutil
src = Path("old_name.txt")
dst = Path("new_name.txt")
src.rename(dst) # rename/move within filesystem
shutil.copy(src, dst) # copy a file
shutil.copytree("src_dir", "dst") # copy a directory tree
src.unlink() # delete a file
shutil.rmtree("old_dir") # delete a directory tree (careful!)
# temp files
import tempfile
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
f.write(b'{"temp": true}')
temp_path = Path(f.name)3. Working with CSV
import csv
from pathlib import Path
# reading
with open(Path("data/users.csv"), newline="", encoding="utf-8") as f:
reader = csv.DictReader(f) # rows as dicts keyed by header
users = list(reader)
# each row is: {"name": "Alice", "age": "30", "email": "alice@example.com"}
# NOTE: all values are strings — convert types manually
users_typed = [
{"name": r["name"], "age": int(r["age"]), "email": r["email"]}
for r in users
]
# writing
fieldnames = ["name", "age", "email"]
with open(Path("output/users.csv"), "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(users_typed)4. Working with JSON
import json
from pathlib import Path
data = {"users": [{"name": "Alice", "active": True}], "count": 1}
# write
Path("output.json").write_text(json.dumps(data, indent=2), encoding="utf-8")
# read
loaded = json.loads(Path("output.json").read_text(encoding="utf-8"))
# streaming large JSON
with open("large.json") as f:
obj = json.load(f)
with open("large.json", "w") as f:
json.dump(obj, f, indent=2, ensure_ascii=False)Custom JSON serialization
from datetime import datetime
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj: object) -> object:
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
json.dumps({"created_at": datetime.now()}, cls=DateTimeEncoder)5. Working with YAML
pip install pyyamlimport yaml
from pathlib import Path
# reading
config = yaml.safe_load(Path("config.yaml").read_text())
# writing
with open("config.yaml", "w") as f:
yaml.dump(config, f, default_flow_style=False, indent=2)Always use yaml.safe_load, never yaml.load — the unsafe version can execute arbitrary Python.
6. Environment Variables and os Module
import os
from pathlib import Path
# read env var
db_url = os.environ["DATABASE_URL"] # raises KeyError if missing
debug = os.getenv("DEBUG", "false") # returns default if missing
# working directory
cwd = Path.cwd() # current directory
home = Path.home() # user home directory
os.chdir("/tmp") # change directory (avoid in scripts)
# path operations (prefer pathlib, but sometimes you need these)
os.path.join("a", "b", "c") # "a/b/c" or "a\\b\\c"
os.path.basename("/home/user/file.txt") # "file.txt"
os.path.dirname("/home/user/file.txt") # "/home/user"
os.path.exists("/etc/hosts") # True
os.makedirs("a/b/c", exist_ok=True)7. argparse — Standard Library CLI
argparse is built into Python — no install needed.
import argparse
from pathlib import Path
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Process and clean a CSV file",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"input",
type=Path,
help="Input CSV file path",
)
parser.add_argument(
"-o", "--output",
type=Path,
default=Path("output.csv"),
help="Output path (default: output.csv)",
)
parser.add_argument(
"--limit",
type=int,
default=None,
help="Max rows to process",
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose output",
)
parser.add_argument(
"--format",
choices=["csv", "json", "parquet"],
default="csv",
help="Output format",
)
return parser
def main() -> None:
args = build_parser().parse_args()
if not args.input.exists():
raise SystemExit(f"Error: {args.input} does not exist")
if args.verbose:
print(f"Processing {args.input} -> {args.output}")
# ... process data ...
if __name__ == "__main__":
main()python clean.py data/users.csv -o output/cleaned.csv --limit 1000 --verbose
python clean.py --help8. Typer — Modern CLI with Type Hints
Typer derives your CLI from Python type hints. No repetitive argument definitions.
pip install typer[all]Basic Typer app
import typer
from pathlib import Path
from typing import Optional
from typing_extensions import Annotated
app = typer.Typer(help="Data processing CLI")
@app.command()
def process(
input: Annotated[Path, typer.Argument(help="Input CSV file")],
output: Annotated[Path, typer.Option(help="Output path")] = Path("output.csv"),
limit: Annotated[Optional[int], typer.Option(help="Max rows")] = None,
verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False,
format: Annotated[str, typer.Option(help="Output format")] = "csv",
) -> None:
"""Process and clean a CSV file."""
if not input.exists():
typer.echo(f"Error: {input} not found", err=True)
raise typer.Exit(code=1)
if verbose:
typer.echo(f"Processing {input} -> {output}")
typer.echo(typer.style("Done!", fg=typer.colors.GREEN))
if __name__ == "__main__":
app()Multiple commands
@app.command()
def ingest(
source: Annotated[str, typer.Argument()],
dry_run: bool = False,
) -> None:
"""Ingest data from a source."""
typer.echo(f"Ingesting {source}" + (" (dry run)" if dry_run else ""))
@app.command()
def export(
destination: Annotated[str, typer.Argument()],
) -> None:
"""Export data to a destination."""
typer.echo(f"Exporting to {destination}")python cli.py process data.csv --verbose
python cli.py ingest s3://bucket/data --dry-run
python cli.py export output/
python cli.py --helpProgress bars with Typer
import time
@app.command()
def long_task(rows: int = 1000) -> None:
with typer.progressbar(range(rows), label="Processing") as progress:
for _ in progress:
time.sleep(0.001) # simulate work9. sys Module for Scripts
import sys
from pathlib import Path
# command-line arguments (raw, before argparse)
script_name = sys.argv[0]
args = sys.argv[1:]
# exit with a code
sys.exit(0) # success
sys.exit(1) # error
# standard streams
sys.stdout.write("output\n")
sys.stderr.write("error message\n") # errors go to stderr
# Python version check
if sys.version_info < (3, 11):
sys.exit("Python 3.11+ required")
# module search path
sys.path.insert(0, str(Path(__file__).parent.parent)) # add parent to path10. Complete Script Template
Use this as your starting point for any automation script:
#!/usr/bin/env python3
"""
process_data.py — Clean and transform a CSV file.
Usage:
python process_data.py input.csv -o output.csv --verbose
"""
from __future__ import annotations
import csv
import json
import sys
from pathlib import Path
from typing import Iterator
import typer
from typing_extensions import Annotated
app = typer.Typer()
def read_csv(path: Path) -> Iterator[dict]:
with open(path, newline="", encoding="utf-8") as f:
yield from csv.DictReader(f)
def write_json(data: list[dict], path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2), encoding="utf-8")
def clean_row(row: dict) -> dict | None:
row = {k.strip().lower(): v.strip() for k, v in row.items()}
if not row.get("email"):
return None
row["email"] = row["email"].lower()
return row
@app.command()
def main(
input_file: Annotated[Path, typer.Argument(help="Input CSV")],
output_file: Annotated[Path, typer.Option("-o", help="Output JSON")] = Path("output.json"),
limit: Annotated[int | None, typer.Option(help="Row limit")] = None,
verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False,
) -> None:
"""Clean CSV and export as JSON."""
if not input_file.exists():
typer.echo(f"File not found: {input_file}", err=True)
raise typer.Exit(1)
rows: list[dict] = []
for i, row in enumerate(read_csv(input_file)):
if limit and i >= limit:
break
cleaned = clean_row(row)
if cleaned is not None:
rows.append(cleaned)
write_json(rows, output_file)
if verbose:
typer.echo(f"Wrote {len(rows)} rows to {output_file}")
typer.echo(typer.style(f"Done: {output_file}", fg=typer.colors.GREEN))
if __name__ == "__main__":
app()Exercises
Exercise 1: Write a script that takes a directory path and recursively lists all .json files, printing their size in KB.
Exercise 2: Build a Typer CLI with two commands: split (splits a CSV into N equal chunks) and merge (merges multiple CSVs into one).
Exercise 3: Write a safe_write_json(data, path) function that writes to a temp file first and then renames it — preventing corrupt output if the process is killed mid-write.
Summary
| Tool | When to Use |
|------|------------|
| open() + with | Simple file reads/writes |
| pathlib.Path | All path operations — always prefer over os.path |
| csv.DictReader/Writer | Structured CSV without pandas |
| json.loads/dumps | JSON serialization |
| yaml.safe_load | Config files |
| argparse | Standard library CLIs, no extra deps |
| typer | Modern CLIs with type hints, rich output |
| sys.argv | Raw argument access before parsing |
Next: consuming HTTP APIs with the requests library.