Back to blog
Backend Systemsbeginner

Python File Handling & Scripting: pathlib, argparse, and Typer

Master file I/O with pathlib, read and write CSV/JSON/YAML, build professional CLI tools with argparse and Typer, and write automation scripts that run everywhere.

LearnixoMay 7, 20268 min read
Pythonfile handlingpathlibargparseTyperscriptingCLI
Share:𝕏

Files and Scripts Are the Foundation of Automation

Pipeline tools, data ingestion scripts, DevOps automation, report generators — all of them read files, write files, and need a command-line interface. This lesson covers the complete toolkit.


1. File I/O Basics

Reading a file

Python
# always use `with`  it guarantees the file is closed
with open("data.txt", "r", encoding="utf-8") as f:
    content = f.read()          # read entire file as string

with open("data.txt", "r") as f:
    lines = f.readlines()       # list of lines including \n

with open("data.txt", "r") as f:
    for line in f:              # iterate line by line (memory-efficient)
        print(line.strip())

Writing a file

Python
with open("output.txt", "w", encoding="utf-8") as f:
    f.write("Hello\n")
    f.write("World\n")

# append mode  doesn't truncate
with open("log.txt", "a") as f:
    f.write("New log entry\n")

# write multiple lines at once
lines = ["line one\n", "line two\n", "line three\n"]
with open("output.txt", "w") as f:
    f.writelines(lines)

File modes

| Mode | Meaning | |------|---------| | "r" | Read (default) | | "w" | Write — creates or truncates | | "a" | Append — creates or appends | | "x" | Exclusive create — fails if exists | | "b" | Binary mode (add to above: "rb", "wb") | | "+" | Read + write: "r+", "w+" |


2. pathlib — The Modern Way to Work with Paths

pathlib.Path replaces os.path with an object-oriented API that works identically on Windows, macOS, and Linux.

Python
from pathlib import Path

# creating paths
base = Path("/home/user/projects")
config = base / "config" / "settings.json"  # join with /
relative = Path("data/input.csv")

# properties
config.name          # "settings.json"
config.stem          # "settings"
config.suffix        # ".json"
config.suffixes      # [".json"]
config.parent        # Path('/home/user/projects/config')
config.parts         # ('/', 'home', 'user', 'projects', 'config', 'settings.json')

# checking existence
config.exists()
config.is_file()
config.is_dir()

# resolve to absolute path
relative.resolve()   # Path('/current/working/dir/data/input.csv')

Creating directories and files

Python
output_dir = Path("output/reports")
output_dir.mkdir(parents=True, exist_ok=True)   # create all intermediate dirs

# write / read
config.write_text('{"key": "value"}', encoding="utf-8")
content = config.read_text(encoding="utf-8")
config.read_bytes()
config.write_bytes(b"\x00\x01\x02")

Globbing and listing

Python
# find all Python files recursively
for path in Path("src").rglob("*.py"):
    print(path)

# non-recursive
for path in Path("data").glob("*.csv"):
    print(path)

# list directory contents
for item in Path(".").iterdir():
    print(item, "dir" if item.is_dir() else "file")

File operations

Python
from pathlib import Path
import shutil

src = Path("old_name.txt")
dst = Path("new_name.txt")

src.rename(dst)                     # rename/move within filesystem
shutil.copy(src, dst)               # copy a file
shutil.copytree("src_dir", "dst")   # copy a directory tree
src.unlink()                        # delete a file
shutil.rmtree("old_dir")            # delete a directory tree (careful!)

# temp files
import tempfile
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
    f.write(b'{"temp": true}')
    temp_path = Path(f.name)

3. Working with CSV

Python
import csv
from pathlib import Path

# reading
with open(Path("data/users.csv"), newline="", encoding="utf-8") as f:
    reader = csv.DictReader(f)   # rows as dicts keyed by header
    users = list(reader)

# each row is: {"name": "Alice", "age": "30", "email": "alice@example.com"}
# NOTE: all values are strings  convert types manually
users_typed = [
    {"name": r["name"], "age": int(r["age"]), "email": r["email"]}
    for r in users
]

# writing
fieldnames = ["name", "age", "email"]
with open(Path("output/users.csv"), "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(users_typed)

4. Working with JSON

Python
import json
from pathlib import Path

data = {"users": [{"name": "Alice", "active": True}], "count": 1}

# write
Path("output.json").write_text(json.dumps(data, indent=2), encoding="utf-8")

# read
loaded = json.loads(Path("output.json").read_text(encoding="utf-8"))

# streaming large JSON
with open("large.json") as f:
    obj = json.load(f)

with open("large.json", "w") as f:
    json.dump(obj, f, indent=2, ensure_ascii=False)

Custom JSON serialization

Python
from datetime import datetime

class DateTimeEncoder(json.JSONEncoder):
    def default(self, obj: object) -> object:
        if isinstance(obj, datetime):
            return obj.isoformat()
        return super().default(obj)

json.dumps({"created_at": datetime.now()}, cls=DateTimeEncoder)

5. Working with YAML

Bash
pip install pyyaml
Python
import yaml
from pathlib import Path

# reading
config = yaml.safe_load(Path("config.yaml").read_text())

# writing
with open("config.yaml", "w") as f:
    yaml.dump(config, f, default_flow_style=False, indent=2)

Always use yaml.safe_load, never yaml.load — the unsafe version can execute arbitrary Python.


6. Environment Variables and os Module

Python
import os
from pathlib import Path

# read env var
db_url = os.environ["DATABASE_URL"]          # raises KeyError if missing
debug = os.getenv("DEBUG", "false")          # returns default if missing

# working directory
cwd = Path.cwd()                              # current directory
home = Path.home()                            # user home directory
os.chdir("/tmp")                             # change directory (avoid in scripts)

# path operations (prefer pathlib, but sometimes you need these)
os.path.join("a", "b", "c")                  # "a/b/c" or "a\\b\\c"
os.path.basename("/home/user/file.txt")       # "file.txt"
os.path.dirname("/home/user/file.txt")        # "/home/user"
os.path.exists("/etc/hosts")                  # True
os.makedirs("a/b/c", exist_ok=True)

7. argparse — Standard Library CLI

argparse is built into Python — no install needed.

Python
import argparse
from pathlib import Path

def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="Process and clean a CSV file",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument(
        "input",
        type=Path,
        help="Input CSV file path",
    )
    parser.add_argument(
        "-o", "--output",
        type=Path,
        default=Path("output.csv"),
        help="Output path (default: output.csv)",
    )
    parser.add_argument(
        "--limit",
        type=int,
        default=None,
        help="Max rows to process",
    )
    parser.add_argument(
        "--verbose", "-v",
        action="store_true",
        help="Enable verbose output",
    )
    parser.add_argument(
        "--format",
        choices=["csv", "json", "parquet"],
        default="csv",
        help="Output format",
    )
    return parser

def main() -> None:
    args = build_parser().parse_args()

    if not args.input.exists():
        raise SystemExit(f"Error: {args.input} does not exist")

    if args.verbose:
        print(f"Processing {args.input} -> {args.output}")

    # ... process data ...

if __name__ == "__main__":
    main()
Bash
python clean.py data/users.csv -o output/cleaned.csv --limit 1000 --verbose
python clean.py --help

8. Typer — Modern CLI with Type Hints

Typer derives your CLI from Python type hints. No repetitive argument definitions.

Bash
pip install typer[all]

Basic Typer app

Python
import typer
from pathlib import Path
from typing import Optional
from typing_extensions import Annotated

app = typer.Typer(help="Data processing CLI")

@app.command()
def process(
    input: Annotated[Path, typer.Argument(help="Input CSV file")],
    output: Annotated[Path, typer.Option(help="Output path")] = Path("output.csv"),
    limit: Annotated[Optional[int], typer.Option(help="Max rows")] = None,
    verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False,
    format: Annotated[str, typer.Option(help="Output format")] = "csv",
) -> None:
    """Process and clean a CSV file."""
    if not input.exists():
        typer.echo(f"Error: {input} not found", err=True)
        raise typer.Exit(code=1)

    if verbose:
        typer.echo(f"Processing {input} -> {output}")

    typer.echo(typer.style("Done!", fg=typer.colors.GREEN))


if __name__ == "__main__":
    app()

Multiple commands

Python
@app.command()
def ingest(
    source: Annotated[str, typer.Argument()],
    dry_run: bool = False,
) -> None:
    """Ingest data from a source."""
    typer.echo(f"Ingesting {source}" + (" (dry run)" if dry_run else ""))

@app.command()
def export(
    destination: Annotated[str, typer.Argument()],
) -> None:
    """Export data to a destination."""
    typer.echo(f"Exporting to {destination}")
Bash
python cli.py process data.csv --verbose
python cli.py ingest s3://bucket/data --dry-run
python cli.py export output/
python cli.py --help

Progress bars with Typer

Python
import time

@app.command()
def long_task(rows: int = 1000) -> None:
    with typer.progressbar(range(rows), label="Processing") as progress:
        for _ in progress:
            time.sleep(0.001)   # simulate work

9. sys Module for Scripts

Python
import sys
from pathlib import Path

# command-line arguments (raw, before argparse)
script_name = sys.argv[0]
args = sys.argv[1:]

# exit with a code
sys.exit(0)    # success
sys.exit(1)    # error

# standard streams
sys.stdout.write("output\n")
sys.stderr.write("error message\n")   # errors go to stderr

# Python version check
if sys.version_info < (3, 11):
    sys.exit("Python 3.11+ required")

# module search path
sys.path.insert(0, str(Path(__file__).parent.parent))   # add parent to path

10. Complete Script Template

Use this as your starting point for any automation script:

Python
#!/usr/bin/env python3
"""
process_data.py — Clean and transform a CSV file.

Usage:
    python process_data.py input.csv -o output.csv --verbose
"""
from __future__ import annotations

import csv
import json
import sys
from pathlib import Path
from typing import Iterator

import typer
from typing_extensions import Annotated

app = typer.Typer()


def read_csv(path: Path) -> Iterator[dict]:
    with open(path, newline="", encoding="utf-8") as f:
        yield from csv.DictReader(f)


def write_json(data: list[dict], path: Path) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(data, indent=2), encoding="utf-8")


def clean_row(row: dict) -> dict | None:
    row = {k.strip().lower(): v.strip() for k, v in row.items()}
    if not row.get("email"):
        return None
    row["email"] = row["email"].lower()
    return row


@app.command()
def main(
    input_file: Annotated[Path, typer.Argument(help="Input CSV")],
    output_file: Annotated[Path, typer.Option("-o", help="Output JSON")] = Path("output.json"),
    limit: Annotated[int | None, typer.Option(help="Row limit")] = None,
    verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False,
) -> None:
    """Clean CSV and export as JSON."""
    if not input_file.exists():
        typer.echo(f"File not found: {input_file}", err=True)
        raise typer.Exit(1)

    rows: list[dict] = []
    for i, row in enumerate(read_csv(input_file)):
        if limit and i >= limit:
            break
        cleaned = clean_row(row)
        if cleaned is not None:
            rows.append(cleaned)

    write_json(rows, output_file)

    if verbose:
        typer.echo(f"Wrote {len(rows)} rows to {output_file}")

    typer.echo(typer.style(f"Done: {output_file}", fg=typer.colors.GREEN))


if __name__ == "__main__":
    app()

Exercises

Exercise 1: Write a script that takes a directory path and recursively lists all .json files, printing their size in KB.

Exercise 2: Build a Typer CLI with two commands: split (splits a CSV into N equal chunks) and merge (merges multiple CSVs into one).

Exercise 3: Write a safe_write_json(data, path) function that writes to a temp file first and then renames it — preventing corrupt output if the process is killed mid-write.


Summary

| Tool | When to Use | |------|------------| | open() + with | Simple file reads/writes | | pathlib.Path | All path operations — always prefer over os.path | | csv.DictReader/Writer | Structured CSV without pandas | | json.loads/dumps | JSON serialization | | yaml.safe_load | Config files | | argparse | Standard library CLIs, no extra deps | | typer | Modern CLIs with type hints, rich output | | sys.argv | Raw argument access before parsing |

Next: consuming HTTP APIs with the requests library.

Enjoyed this article?

Explore the Backend Systems learning path for more.

Found this helpful?

Share:𝕏

Leave a comment

Have a question, correction, or just found this helpful? Leave a note below.