Python Data Structures

Python has four built-in collection types that cover most programming needs. Knowing which to use — and all their methods — makes you significantly more productive.

Lists

Ordered, mutable, allows duplicates. The workhorse of Python collections.

Python

# Create
fruits = ["apple", "banana", "cherry"]
numbers = list(range(1, 6))  # [1, 2, 3, 4, 5]
mixed = [1, "hello", True, None, [1, 2]]

# Access
fruits[0]    # "apple"
fruits[-1]   # "cherry" (last)
fruits[1:3]  # ["banana", "cherry"] (slicing)
fruits[::-1] # reversed copy

# Common methods
fruits.append("date")          # add to end
fruits.insert(1, "avocado")    # insert at index
fruits.extend(["fig", "grape"]) # add multiple
fruits.remove("banana")        # remove by value (first occurrence)
popped = fruits.pop()          # remove and return last
popped = fruits.pop(0)         # remove and return at index
fruits.sort()                  # sort in place
fruits.sort(reverse=True)      # reverse sort
fruits.reverse()               # reverse in place
fruits.index("apple")          # find index of value
fruits.count("apple")          # count occurrences
fruits.clear()                 # remove all

# Non-mutating
sorted_copy = sorted(fruits)           # returns new sorted list
reversed_gen = reversed(fruits)        # returns iterator
length = len(fruits)
"apple" in fruits                      # membership check

List Comprehensions

Python

numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# [expression for item in iterable if condition]
squares = [x ** 2 for x in numbers]
evens = [x for x in numbers if x % 2 == 0]
even_squares = [x ** 2 for x in numbers if x % 2 == 0]

# Nested
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [x for row in matrix for x in row]
# [1, 2, 3, 4, 5, 6, 7, 8, 9]

# With conditional expression
labels = ["even" if x % 2 == 0 else "odd" for x in numbers]

Tuples

Ordered, immutable, allows duplicates. Like lists, but can't be changed after creation.

Python

# Create
point = (3, 7)
single = (42,)      # note the comma — (42) is just 42 in parentheses
empty = ()
rgb = tuple([255, 128, 0])

# Access (same as list)
point[0]   # 3
point[-1]  # 7

# Unpacking
x, y = point
lat, lon = 59.9139, 10.7522
a, *rest = (1, 2, 3, 4, 5)   # a=1, rest=[2,3,4,5]
first, *middle, last = (1,2,3,4,5)  # first=1, middle=[2,3,4], last=5

# Named tuples — tuples with field names
from collections import namedtuple

Point = namedtuple("Point", ["x", "y"])
p = Point(x=10, y=20)
p.x   # 10
p.y   # 20

When to use tuples vs lists:

Tuple: heterogeneous data with fixed structure (x, y coords; (name, age, email) records)
List: homogeneous data of variable length (list of names, list of orders)
Tuples are slightly faster and can be dict keys (immutable = hashable)

Dictionaries

Key-value pairs, ordered (Python 3.7+), mutable, keys must be hashable.

Python

# Create
user = {"name": "Asma", "age": 30, "city": "Oslo"}
empty = {}
from_keys = dict.fromkeys(["a", "b", "c"], 0)  # {"a":0, "b":0, "c":0}

# Access
user["name"]              # "Asma" — KeyError if not found
user.get("name")          # "Asma" — None if not found
user.get("phone", "N/A")  # "N/A" — default if not found

# Modify
user["email"] = "asma@example.com"  # add
user["age"] = 31                    # update
del user["city"]                    # delete
popped = user.pop("age")            # remove and return
user.pop("missing", None)           # safe pop with default

# Check
"name" in user          # True
"phone" in user         # False
"name" not in user      # False

# Iterate
for key in user:
    print(key, user[key])

for key, value in user.items():
    print(f"{key}: {value}")

user.keys()    # dict_keys(["name", "email"])
user.values()  # dict_values(["Asma", "asma@..."])
user.items()   # dict_items([("name","Asma"), ...])

# Merge (Python 3.9+)
defaults = {"theme": "light", "lang": "en"}
prefs = {"theme": "dark"}
merged = defaults | prefs   # {"theme": "dark", "lang": "en"}

Dict Comprehensions

Python

words = ["apple", "banana", "cherry"]
word_lengths = {word: len(word) for word in words}
# {"apple": 5, "banana": 6, "cherry": 6}

# Filter while building
long_words = {w: len(w) for w in words if len(w) > 5}
# {"banana": 6, "cherry": 6}

defaultdict and Counter

Python

from collections import defaultdict, Counter

# defaultdict — never raises KeyError, auto-creates missing keys
groups = defaultdict(list)
for item in [("fruit", "apple"), ("veg", "carrot"), ("fruit", "banana")]:
    groups[item[0]].append(item[1])
# {"fruit": ["apple", "banana"], "veg": ["carrot"]}

# Counter — count occurrences
text = "hello world"
char_count = Counter(text)
# Counter({"l": 3, "o": 2, " ": 1, ...})
char_count.most_common(3)  # [("l", 3), ("o", 2), (" ", 1)]

words = ["apple", "banana", "apple", "cherry", "apple"]
word_count = Counter(words)
word_count["apple"]       # 3
word_count["missing"]     # 0 (no KeyError)

Sets

Unordered, mutable, no duplicates, elements must be hashable.

Python

# Create
unique = {1, 2, 3, 4}
from_list = set([1, 1, 2, 2, 3])  # {1, 2, 3} — duplicates removed
empty_set = set()                  # NOT {} — that's an empty dict!

# Add and remove
unique.add(5)
unique.add(1)       # no-op, 1 already exists
unique.remove(3)    # KeyError if not found
unique.discard(99)  # no error if not found
popped = unique.pop()  # remove and return arbitrary element

# Set operations
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7}

a | b    # union: {1,2,3,4,5,6,7}
a & b    # intersection: {3,4,5}
a - b    # difference: {1,2} (in a but not b)
b - a    # difference: {6,7} (in b but not a)
a ^ b    # symmetric difference: {1,2,6,7} (in one but not both)

a.union(b)
a.intersection(b)
a.difference(b)
a.issubset({1,2,3,4,5,6})  # True — all of a is in the other
a.issuperset({1,2})         # True — a contains all of {1,2}
a.isdisjoint({6,7,8})       # True — no common elements

Practical Example: Student Grade Book

Python

from collections import defaultdict

class GradeBook:
    def __init__(self):
        self._grades = defaultdict(list)  # {student: [grade, grade, ...]}

    def add_grade(self, student: str, grade: float):
        if not 0 <= grade <= 100:
            raise ValueError(f"Grade must be 0-100, got {grade}")
        self._grades[student].append(grade)

    def average(self, student: str) -> float:
        grades = self._grades.get(student, [])
        return sum(grades) / len(grades) if grades else 0.0

    def top_students(self, n: int = 3) -> list[tuple[str, float]]:
        averages = [(s, self.average(s)) for s in self._grades]
        return sorted(averages, key=lambda x: x[1], reverse=True)[:n]

    def passing_students(self, threshold: float = 50.0) -> set[str]:
        return {s for s in self._grades if self.average(s) >= threshold}

    def grade_distribution(self) -> dict[str, int]:
        buckets = {"A (90-100)": 0, "B (80-89)": 0, "C (70-79)": 0,
                   "D (60-69)": 0, "F (<60)": 0}
        for grades in self._grades.values():
            for g in grades:
                if g >= 90: buckets["A (90-100)"] += 1
                elif g >= 80: buckets["B (80-89)"] += 1
                elif g >= 70: buckets["C (70-79)"] += 1
                elif g >= 60: buckets["D (60-69)"] += 1
                else: buckets["F (<60)"] += 1
        return buckets

# Usage
book = GradeBook()
book.add_grade("Alice", 92)
book.add_grade("Alice", 88)
book.add_grade("Bob", 75)
book.add_grade("Bob", 68)
book.add_grade("Carol", 55)

print(book.top_students())
# [("Alice", 90.0), ("Bob", 71.5), ("Carol", 55.0)]

print(book.passing_students())
# {"Alice", "Bob", "Carol"}

print(book.grade_distribution())
# {"A (90-100)": 1, "B (80-89)": 1, ...}

Key Takeaways

| Structure | Ordered | Mutable | Duplicates | Use for | |-----------|---------|---------|------------|---------| | list | Yes | Yes | Yes | Most collections | | tuple | Yes | No | Yes | Fixed-structure records | | dict | Yes (3.7+) | Yes | No (keys) | Key-value lookup | | set | No | Yes | No | Unique items, fast membership |

List comprehensions beat loops for transformations — use them
dict.get(key, default) for safe access without try/except
defaultdict eliminates "key not found" checks
Counter for frequency analysis — far cleaner than manual counting
Sets have O(1) membership check — use x in my_set instead of x in my_list for large collections

Python Data Structures — Lists, Tuples, Dictionaries, and Sets

Python Data Structures

Lists

List Comprehensions

Tuples

Dictionaries

Dict Comprehensions

defaultdict and Counter

Sets

Practical Example: Student Grade Book

Key Takeaways

Enjoyed this article?

Leave a comment