Python File Handling & Scripting: pathlib, argparse, and Typer
Master file I/O with pathlib, read and write CSV/JSON/YAML, build professional CLI tools with argparse and Typer, and write automation scripts that run everywhere.
Files and Scripts Are the Foundation of Automation
Pipeline tools, data ingestion scripts, DevOps automation, report generators — all of them read files, write files, and need a command-line interface. This lesson covers the complete toolkit.
1. File I/O Basics
Reading a file
# always use `with` — it guarantees the file is closed
with open("data.txt", "r", encoding="utf-8") as f:
content = f.read() # read entire file as string
with open("data.txt", "r") as f:
lines = f.readlines() # list of lines including \n
with open("data.txt", "r") as f:
for line in f: # iterate line by line (memory-efficient)
print(line.strip())Writing a file
with open("output.txt", "w", encoding="utf-8") as f:
f.write("Hello\n")
f.write("World\n")
# append mode — doesn't truncate
with open("log.txt", "a") as f:
f.write("New log entry\n")
# write multiple lines at once
lines = ["line one\n", "line two\n", "line three\n"]
with open("output.txt", "w") as f:
f.writelines(lines)File modes
| Mode | Meaning |
|------|---------|
| "r" | Read (default) |
| "w" | Write — creates or truncates |
| "a" | Append — creates or appends |
| "x" | Exclusive create — fails if exists |
| "b" | Binary mode (add to above: "rb", "wb") |
| "+" | Read + write: "r+", "w+" |
2. pathlib — The Modern Way to Work with Paths
pathlib.Path replaces os.path with an object-oriented API that works identically on Windows, macOS, and Linux.
from pathlib import Path
# creating paths
base = Path("/home/user/projects")
config = base / "config" / "settings.json" # join with /
relative = Path("data/input.csv")
# properties
config.name # "settings.json"
config.stem # "settings"
config.suffix # ".json"
config.suffixes # [".json"]
config.parent # Path('/home/user/projects/config')
config.parts # ('/', 'home', 'user', 'projects', 'config', 'settings.json')
# checking existence
config.exists()
config.is_file()
config.is_dir()
# resolve to absolute path
relative.resolve() # Path('/current/working/dir/data/input.csv')Creating directories and files
output_dir = Path("output/reports")
output_dir.mkdir(parents=True, exist_ok=True) # create all intermediate dirs
# write / read
config.write_text('{"key": "value"}', encoding="utf-8")
content = config.read_text(encoding="utf-8")
config.read_bytes()
config.write_bytes(b"\x00\x01\x02")Globbing and listing
# find all Python files recursively
for path in Path("src").rglob("*.py"):
print(path)
# non-recursive
for path in Path("data").glob("*.csv"):
print(path)
# list directory contents
for item in Path(".").iterdir():
print(item, "dir" if item.is_dir() else "file")File operations
from pathlib import Path
import shutil
src = Path("old_name.txt")
dst = Path("new_name.txt")
src.rename(dst) # rename/move within filesystem
shutil.copy(src, dst) # copy a file
shutil.copytree("src_dir", "dst") # copy a directory tree
src.unlink() # delete a file
shutil.rmtree("old_dir") # delete a directory tree (careful!)
# temp files
import tempfile
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
f.write(b'{"temp": true}')
temp_path = Path(f.name)3. Working with CSV
import csv
from pathlib import Path
# reading
with open(Path("data/users.csv"), newline="", encoding="utf-8") as f:
reader = csv.DictReader(f) # rows as dicts keyed by header
users = list(reader)
# each row is: {"name": "Alice", "age": "30", "email": "alice@example.com"}
# NOTE: all values are strings — convert types manually
users_typed = [
{"name": r["name"], "age": int(r["age"]), "email": r["email"]}
for r in users
]
# writing
fieldnames = ["name", "age", "email"]
with open(Path("output/users.csv"), "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(users_typed)4. Working with JSON
import json
from pathlib import Path
data = {"users": [{"name": "Alice", "active": True}], "count": 1}
# write
Path("output.json").write_text(json.dumps(data, indent=2), encoding="utf-8")
# read
loaded = json.loads(Path("output.json").read_text(encoding="utf-8"))
# streaming large JSON
with open("large.json") as f:
obj = json.load(f)
with open("large.json", "w") as f:
json.dump(obj, f, indent=2, ensure_ascii=False)Custom JSON serialization
from datetime import datetime
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj: object) -> object:
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
json.dumps({"created_at": datetime.now()}, cls=DateTimeEncoder)5. Working with YAML
pip install pyyamlimport yaml
from pathlib import Path
# reading
config = yaml.safe_load(Path("config.yaml").read_text())
# writing
with open("config.yaml", "w") as f:
yaml.dump(config, f, default_flow_style=False, indent=2)Always use yaml.safe_load, never yaml.load — the unsafe version can execute arbitrary Python.
6. Environment Variables and os Module
import os
from pathlib import Path
# read env var
db_url = os.environ["DATABASE_URL"] # raises KeyError if missing
debug = os.getenv("DEBUG", "false") # returns default if missing
# working directory
cwd = Path.cwd() # current directory
home = Path.home() # user home directory
os.chdir("/tmp") # change directory (avoid in scripts)
# path operations (prefer pathlib, but sometimes you need these)
os.path.join("a", "b", "c") # "a/b/c" or "a\\b\\c"
os.path.basename("/home/user/file.txt") # "file.txt"
os.path.dirname("/home/user/file.txt") # "/home/user"
os.path.exists("/etc/hosts") # True
os.makedirs("a/b/c", exist_ok=True)7. argparse — Standard Library CLI
argparse is built into Python — no install needed.
import argparse
from pathlib import Path
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Process and clean a CSV file",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"input",
type=Path,
help="Input CSV file path",
)
parser.add_argument(
"-o", "--output",
type=Path,
default=Path("output.csv"),
help="Output path (default: output.csv)",
)
parser.add_argument(
"--limit",
type=int,
default=None,
help="Max rows to process",
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose output",
)
parser.add_argument(
"--format",
choices=["csv", "json", "parquet"],
default="csv",
help="Output format",
)
return parser
def main() -> None:
args = build_parser().parse_args()
if not args.input.exists():
raise SystemExit(f"Error: {args.input} does not exist")
if args.verbose:
print(f"Processing {args.input} -> {args.output}")
# ... process data ...
if __name__ == "__main__":
main()python clean.py data/users.csv -o output/cleaned.csv --limit 1000 --verbose
python clean.py --help8. Typer — Modern CLI with Type Hints
Typer derives your CLI from Python type hints. No repetitive argument definitions.
pip install typer[all]Basic Typer app
import typer
from pathlib import Path
from typing import Optional
from typing_extensions import Annotated
app = typer.Typer(help="Data processing CLI")
@app.command()
def process(
input: Annotated[Path, typer.Argument(help="Input CSV file")],
output: Annotated[Path, typer.Option(help="Output path")] = Path("output.csv"),
limit: Annotated[Optional[int], typer.Option(help="Max rows")] = None,
verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False,
format: Annotated[str, typer.Option(help="Output format")] = "csv",
) -> None:
"""Process and clean a CSV file."""
if not input.exists():
typer.echo(f"Error: {input} not found", err=True)
raise typer.Exit(code=1)
if verbose:
typer.echo(f"Processing {input} -> {output}")
typer.echo(typer.style("Done!", fg=typer.colors.GREEN))
if __name__ == "__main__":
app()Multiple commands
@app.command()
def ingest(
source: Annotated[str, typer.Argument()],
dry_run: bool = False,
) -> None:
"""Ingest data from a source."""
typer.echo(f"Ingesting {source}" + (" (dry run)" if dry_run else ""))
@app.command()
def export(
destination: Annotated[str, typer.Argument()],
) -> None:
"""Export data to a destination."""
typer.echo(f"Exporting to {destination}")python cli.py process data.csv --verbose
python cli.py ingest s3://bucket/data --dry-run
python cli.py export output/
python cli.py --helpProgress bars with Typer
import time
@app.command()
def long_task(rows: int = 1000) -> None:
with typer.progressbar(range(rows), label="Processing") as progress:
for _ in progress:
time.sleep(0.001) # simulate work9. sys Module for Scripts
import sys
from pathlib import Path
# command-line arguments (raw, before argparse)
script_name = sys.argv[0]
args = sys.argv[1:]
# exit with a code
sys.exit(0) # success
sys.exit(1) # error
# standard streams
sys.stdout.write("output\n")
sys.stderr.write("error message\n") # errors go to stderr
# Python version check
if sys.version_info < (3, 11):
sys.exit("Python 3.11+ required")
# module search path
sys.path.insert(0, str(Path(__file__).parent.parent)) # add parent to path10. Complete Script Template
Use this as your starting point for any automation script:
#!/usr/bin/env python3
"""
process_data.py — Clean and transform a CSV file.
Usage:
python process_data.py input.csv -o output.csv --verbose
"""
from __future__ import annotations
import csv
import json
import sys
from pathlib import Path
from typing import Iterator
import typer
from typing_extensions import Annotated
app = typer.Typer()
def read_csv(path: Path) -> Iterator[dict]:
with open(path, newline="", encoding="utf-8") as f:
yield from csv.DictReader(f)
def write_json(data: list[dict], path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2), encoding="utf-8")
def clean_row(row: dict) -> dict | None:
row = {k.strip().lower(): v.strip() for k, v in row.items()}
if not row.get("email"):
return None
row["email"] = row["email"].lower()
return row
@app.command()
def main(
input_file: Annotated[Path, typer.Argument(help="Input CSV")],
output_file: Annotated[Path, typer.Option("-o", help="Output JSON")] = Path("output.json"),
limit: Annotated[int | None, typer.Option(help="Row limit")] = None,
verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False,
) -> None:
"""Clean CSV and export as JSON."""
if not input_file.exists():
typer.echo(f"File not found: {input_file}", err=True)
raise typer.Exit(1)
rows: list[dict] = []
for i, row in enumerate(read_csv(input_file)):
if limit and i >= limit:
break
cleaned = clean_row(row)
if cleaned is not None:
rows.append(cleaned)
write_json(rows, output_file)
if verbose:
typer.echo(f"Wrote {len(rows)} rows to {output_file}")
typer.echo(typer.style(f"Done: {output_file}", fg=typer.colors.GREEN))
if __name__ == "__main__":
app()Exercises
Exercise 1: Write a script that takes a directory path and recursively lists all .json files, printing their size in KB.
Exercise 2: Build a Typer CLI with two commands: split (splits a CSV into N equal chunks) and merge (merges multiple CSVs into one).
Exercise 3: Write a safe_write_json(data, path) function that writes to a temp file first and then renames it — preventing corrupt output if the process is killed mid-write.
Summary
| Tool | When to Use |
|------|------------|
| open() + with | Simple file reads/writes |
| pathlib.Path | All path operations — always prefer over os.path |
| csv.DictReader/Writer | Structured CSV without pandas |
| json.loads/dumps | JSON serialization |
| yaml.safe_load | Config files |
| argparse | Standard library CLIs, no extra deps |
| typer | Modern CLIs with type hints, rich output |
| sys.argv | Raw argument access before parsing |
Next: consuming HTTP APIs with the requests library.
Enjoyed this article?
Explore the Backend Systems learning path for more.
Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.