Python Data Structures — Lists, Tuples, Dictionaries, and Sets
Master Python's built-in data structures: lists, tuples, dictionaries, and sets. Learn when to use each, all key methods, and practical examples.
Python Data Structures
Python has four built-in collection types that cover most programming needs. Knowing which to use — and all their methods — makes you significantly more productive.
Lists
Ordered, mutable, allows duplicates. The workhorse of Python collections.
# Create
fruits = ["apple", "banana", "cherry"]
numbers = list(range(1, 6)) # [1, 2, 3, 4, 5]
mixed = [1, "hello", True, None, [1, 2]]
# Access
fruits[0] # "apple"
fruits[-1] # "cherry" (last)
fruits[1:3] # ["banana", "cherry"] (slicing)
fruits[::-1] # reversed copy
# Common methods
fruits.append("date") # add to end
fruits.insert(1, "avocado") # insert at index
fruits.extend(["fig", "grape"]) # add multiple
fruits.remove("banana") # remove by value (first occurrence)
popped = fruits.pop() # remove and return last
popped = fruits.pop(0) # remove and return at index
fruits.sort() # sort in place
fruits.sort(reverse=True) # reverse sort
fruits.reverse() # reverse in place
fruits.index("apple") # find index of value
fruits.count("apple") # count occurrences
fruits.clear() # remove all
# Non-mutating
sorted_copy = sorted(fruits) # returns new sorted list
reversed_gen = reversed(fruits) # returns iterator
length = len(fruits)
"apple" in fruits # membership checkList Comprehensions
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# [expression for item in iterable if condition]
squares = [x ** 2 for x in numbers]
evens = [x for x in numbers if x % 2 == 0]
even_squares = [x ** 2 for x in numbers if x % 2 == 0]
# Nested
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [x for row in matrix for x in row]
# [1, 2, 3, 4, 5, 6, 7, 8, 9]
# With conditional expression
labels = ["even" if x % 2 == 0 else "odd" for x in numbers]Tuples
Ordered, immutable, allows duplicates. Like lists, but can't be changed after creation.
# Create
point = (3, 7)
single = (42,) # note the comma — (42) is just 42 in parentheses
empty = ()
rgb = tuple([255, 128, 0])
# Access (same as list)
point[0] # 3
point[-1] # 7
# Unpacking
x, y = point
lat, lon = 59.9139, 10.7522
a, *rest = (1, 2, 3, 4, 5) # a=1, rest=[2,3,4,5]
first, *middle, last = (1,2,3,4,5) # first=1, middle=[2,3,4], last=5
# Named tuples — tuples with field names
from collections import namedtuple
Point = namedtuple("Point", ["x", "y"])
p = Point(x=10, y=20)
p.x # 10
p.y # 20When to use tuples vs lists:
- Tuple: heterogeneous data with fixed structure (x, y coords; (name, age, email) records)
- List: homogeneous data of variable length (list of names, list of orders)
- Tuples are slightly faster and can be dict keys (immutable = hashable)
Dictionaries
Key-value pairs, ordered (Python 3.7+), mutable, keys must be hashable.
# Create
user = {"name": "Asma", "age": 30, "city": "Oslo"}
empty = {}
from_keys = dict.fromkeys(["a", "b", "c"], 0) # {"a":0, "b":0, "c":0}
# Access
user["name"] # "Asma" — KeyError if not found
user.get("name") # "Asma" — None if not found
user.get("phone", "N/A") # "N/A" — default if not found
# Modify
user["email"] = "asma@example.com" # add
user["age"] = 31 # update
del user["city"] # delete
popped = user.pop("age") # remove and return
user.pop("missing", None) # safe pop with default
# Check
"name" in user # True
"phone" in user # False
"name" not in user # False
# Iterate
for key in user:
print(key, user[key])
for key, value in user.items():
print(f"{key}: {value}")
user.keys() # dict_keys(["name", "email"])
user.values() # dict_values(["Asma", "asma@..."])
user.items() # dict_items([("name","Asma"), ...])
# Merge (Python 3.9+)
defaults = {"theme": "light", "lang": "en"}
prefs = {"theme": "dark"}
merged = defaults | prefs # {"theme": "dark", "lang": "en"}Dict Comprehensions
words = ["apple", "banana", "cherry"]
word_lengths = {word: len(word) for word in words}
# {"apple": 5, "banana": 6, "cherry": 6}
# Filter while building
long_words = {w: len(w) for w in words if len(w) > 5}
# {"banana": 6, "cherry": 6}defaultdict and Counter
from collections import defaultdict, Counter
# defaultdict — never raises KeyError, auto-creates missing keys
groups = defaultdict(list)
for item in [("fruit", "apple"), ("veg", "carrot"), ("fruit", "banana")]:
groups[item[0]].append(item[1])
# {"fruit": ["apple", "banana"], "veg": ["carrot"]}
# Counter — count occurrences
text = "hello world"
char_count = Counter(text)
# Counter({"l": 3, "o": 2, " ": 1, ...})
char_count.most_common(3) # [("l", 3), ("o", 2), (" ", 1)]
words = ["apple", "banana", "apple", "cherry", "apple"]
word_count = Counter(words)
word_count["apple"] # 3
word_count["missing"] # 0 (no KeyError)Sets
Unordered, mutable, no duplicates, elements must be hashable.
# Create
unique = {1, 2, 3, 4}
from_list = set([1, 1, 2, 2, 3]) # {1, 2, 3} — duplicates removed
empty_set = set() # NOT {} — that's an empty dict!
# Add and remove
unique.add(5)
unique.add(1) # no-op, 1 already exists
unique.remove(3) # KeyError if not found
unique.discard(99) # no error if not found
popped = unique.pop() # remove and return arbitrary element
# Set operations
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7}
a | b # union: {1,2,3,4,5,6,7}
a & b # intersection: {3,4,5}
a - b # difference: {1,2} (in a but not b)
b - a # difference: {6,7} (in b but not a)
a ^ b # symmetric difference: {1,2,6,7} (in one but not both)
a.union(b)
a.intersection(b)
a.difference(b)
a.issubset({1,2,3,4,5,6}) # True — all of a is in the other
a.issuperset({1,2}) # True — a contains all of {1,2}
a.isdisjoint({6,7,8}) # True — no common elementsPractical Example: Student Grade Book
from collections import defaultdict
class GradeBook:
def __init__(self):
self._grades = defaultdict(list) # {student: [grade, grade, ...]}
def add_grade(self, student: str, grade: float):
if not 0 <= grade <= 100:
raise ValueError(f"Grade must be 0-100, got {grade}")
self._grades[student].append(grade)
def average(self, student: str) -> float:
grades = self._grades.get(student, [])
return sum(grades) / len(grades) if grades else 0.0
def top_students(self, n: int = 3) -> list[tuple[str, float]]:
averages = [(s, self.average(s)) for s in self._grades]
return sorted(averages, key=lambda x: x[1], reverse=True)[:n]
def passing_students(self, threshold: float = 50.0) -> set[str]:
return {s for s in self._grades if self.average(s) >= threshold}
def grade_distribution(self) -> dict[str, int]:
buckets = {"A (90-100)": 0, "B (80-89)": 0, "C (70-79)": 0,
"D (60-69)": 0, "F (<60)": 0}
for grades in self._grades.values():
for g in grades:
if g >= 90: buckets["A (90-100)"] += 1
elif g >= 80: buckets["B (80-89)"] += 1
elif g >= 70: buckets["C (70-79)"] += 1
elif g >= 60: buckets["D (60-69)"] += 1
else: buckets["F (<60)"] += 1
return buckets
# Usage
book = GradeBook()
book.add_grade("Alice", 92)
book.add_grade("Alice", 88)
book.add_grade("Bob", 75)
book.add_grade("Bob", 68)
book.add_grade("Carol", 55)
print(book.top_students())
# [("Alice", 90.0), ("Bob", 71.5), ("Carol", 55.0)]
print(book.passing_students())
# {"Alice", "Bob", "Carol"}
print(book.grade_distribution())
# {"A (90-100)": 1, "B (80-89)": 1, ...}Key Takeaways
| Structure | Ordered | Mutable | Duplicates | Use for | |-----------|---------|---------|------------|---------| | list | Yes | Yes | Yes | Most collections | | tuple | Yes | No | Yes | Fixed-structure records | | dict | Yes (3.7+) | Yes | No (keys) | Key-value lookup | | set | No | Yes | No | Unique items, fast membership |
- List comprehensions beat loops for transformations — use them
dict.get(key, default)for safe access without try/exceptdefaultdicteliminates "key not found" checksCounterfor frequency analysis — far cleaner than manual counting- Sets have O(1) membership check — use
x in my_setinstead ofx in my_listfor large collections
Enjoyed this article?
Explore the Backend Systems learning path for more.
Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.