Learnixo

Python Essentials for AI Engineers · Lesson 16 of 36

Essential List Methods for AI Engineers

Adding Elements

Python
docs = ["warfarin study", "aspirin meta-analysis"]

# append: add one item to the end  O(1) amortized
docs.append("metformin review")
print(docs)   # ["warfarin study", "aspirin meta-analysis", "metformin review"]

# extend: add all items from an iterable  O(k) where k is len of iterable
new_docs = ["lisinopril trial", "atorvastatin rct"]
docs.extend(new_docs)
print(len(docs))   # 5

# insert: add at a specific index  O(n) (shifts elements)
docs.insert(0, "introduction")   # Insert at position 0 (front)
docs.insert(2, "methods")        # Insert at position 2

# + operator: creates a NEW list (does not modify in place)
combined = docs + ["appendix"]
print(docs is combined)   # False  different objects

Removing Elements

Python
scores = [0.9, 0.4, 0.85, 0.4, 0.7]

# pop: remove and return by index  O(n) for non-last elements
last = scores.pop()      # Remove last  O(1)
print(last)              # 0.7
first = scores.pop(0)    # Remove first  O(n)
print(first)             # 0.9

# remove: remove first occurrence of a value  O(n)
scores.remove(0.4)       # Removes the first 0.4
print(scores)            # [0.85, 0.4]

# clear: remove all elements  O(n)
temp = [1, 2, 3]
temp.clear()
print(temp)   # []

# del: remove by index or slice
data = [10, 20, 30, 40, 50]
del data[1]       # Remove index 1
del data[1:3]     # Remove a slice
print(data)       # [10, 50]

Searching and Counting

Python
labels = ["positive", "negative", "positive", "neutral", "positive"]

# index: find first occurrence  O(n), raises ValueError if not found
idx = labels.index("negative")
print(idx)   # 1

# Safe search with a default
def find_first(lst: list, value, default: int = -1) -> int:
    try:
        return lst.index(value)
    except ValueError:
        return default

print(find_first(labels, "neutral"))   # 3
print(find_first(labels, "unknown"))   # -1

# count: count occurrences  O(n)
print(labels.count("positive"))   # 3
print(labels.count("negative"))   # 1

# 'in' operator: O(n) check  use set for repeated lookups
print("neutral" in labels)   # True

Sorting

Python
scores = [0.45, 0.92, 0.78, 0.61, 0.89]

# sort: sorts in place  O(n log n)  modifies the original list
scores.sort()
print(scores)   # [0.45, 0.61, 0.78, 0.89, 0.92]

scores.sort(reverse=True)
print(scores)   # [0.92, 0.89, 0.78, 0.61, 0.45]

# sorted: returns a NEW list  original unchanged
original = [0.45, 0.92, 0.78]
ranked = sorted(original, reverse=True)
print(original)   # [0.45, 0.92, 0.78]  unchanged
print(ranked)     # [0.92, 0.78, 0.45]

# Sort with key function
results = [
    {"drug": "aspirin", "score": 0.78},
    {"drug": "warfarin", "score": 0.92},
    {"drug": "metformin", "score": 0.61},
]

results.sort(key=lambda r: r["score"], reverse=True)
print(results[0]["drug"])   # "warfarin"  highest score

# Sort by multiple keys: primary then secondary
drugs = [("warfarin", "anticoagulant"), ("aspirin", "anticoagulant"), ("metformin", "antidiabetic")]
drugs.sort(key=lambda d: (d[1], d[0]))   # By class, then name
print(drugs)
# [("aspirin", "anticoagulant"), ("warfarin", "anticoagulant"), ("metformin", "antidiabetic")]

Reversing

Python
tokens = ["the", "patient", "takes", "warfarin"]

# reverse: reverses in place  O(n)
tokens.reverse()
print(tokens)   # ["warfarin", "takes", "patient", "the"]

# reversed(): returns an iterator (does not modify original)
original = [1, 2, 3, 4, 5]
rev_iter = reversed(original)
print(list(rev_iter))   # [5, 4, 3, 2, 1]
print(original)         # [1, 2, 3, 4, 5]  unchanged

# Slice reversal: [::-1] creates a new list
reversed_copy = original[::-1]
print(reversed_copy)   # [5, 4, 3, 2, 1]

Copying

Python
# copy: shallow copy  new list, same element references
original = [{"drug": "warfarin", "dose": 5}, {"drug": "aspirin", "dose": 81}]
shallow = original.copy()

shallow.append({"drug": "metformin"})
print(len(original))   # 2  not affected by append

# BUT: shared nested objects
shallow[0]["dose"] = 10
print(original[0]["dose"])   # 10  mutation propagates! Both point to same dict

# Deep copy: copies everything
import copy
deep = copy.deepcopy(original)
deep[0]["dose"] = 999
print(original[0]["dose"])   # 10  NOT affected

Stack and Queue Patterns

Python
from collections import deque

# Stack (LIFO): use append/pop  both O(1)
call_stack = []
call_stack.append("retrieve_documents")
call_stack.append("generate_response")
call_stack.append("format_output")

step = call_stack.pop()   # "format_output"  last in, first out
print(step)

# Queue (FIFO): use deque for O(1) popleft  list.pop(0) is O(n)
request_queue = deque()
request_queue.append("user_1_query")
request_queue.append("user_2_query")
request_queue.append("user_3_query")

next_request = request_queue.popleft()   # "user_1_query"  first in, first out
print(next_request)

AI Pipeline Patterns

Collecting Retrieval Results

Python
from dataclasses import dataclass, field

@dataclass
class RetrievalBatch:
    query: str
    results: list[dict] = field(default_factory=list)
    scores: list[float] = field(default_factory=list)

    def add(self, doc: dict, score: float) -> None:
        self.results.append(doc)
        self.scores.append(score)

    def top_k(self, k: int) -> list[dict]:
        """Return top k results sorted by score descending."""
        paired = sorted(zip(self.scores, self.results), reverse=True)
        return [doc for _, doc in paired[:k]]

    def filter_by_score(self, threshold: float) -> list[dict]:
        return [doc for doc, score in zip(self.results, self.scores) if score >= threshold]


batch = RetrievalBatch(query="anticoagulant dosing")
batch.add({"id": "doc1", "text": "Warfarin dosing..."}, score=0.92)
batch.add({"id": "doc2", "text": "Heparin protocol..."}, score=0.78)
batch.add({"id": "doc3", "text": "Aspirin and NSAIDs..."}, score=0.45)

print(batch.top_k(2))
# [{"id": "doc1", ...}, {"id": "doc2", ...}]

Chunking Documents for Batch Processing

Python
def chunk_list(items: list, chunk_size: int) -> list[list]:
    """Split a list into fixed-size chunks."""
    return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]


documents = list(range(250))   # 250 documents
batches = chunk_list(documents, chunk_size=32)
print(len(batches))           # 8 batches (7 full + 1 partial)
print(len(batches[-1]))       # 26  last batch

List Methods Quick Reference

| Method | Description | Complexity | In-Place? | |---|---|---|---| | append(x) | Add x to end | O(1) amortized | Yes | | extend(iterable) | Add all items from iterable | O(k) | Yes | | insert(i, x) | Insert x at index i | O(n) | Yes | | remove(x) | Remove first occurrence of x | O(n) | Yes | | pop(i=-1) | Remove and return item at i | O(1) for last, O(n) otherwise | Yes | | clear() | Remove all items | O(n) | Yes | | index(x) | Index of first x | O(n) | No | | count(x) | Count occurrences of x | O(n) | No | | sort(key, reverse) | Sort in place | O(n log n) | Yes | | reverse() | Reverse in place | O(n) | Yes | | copy() | Shallow copy | O(n) | No — new list |

Key distinction: methods that mutate return None. If you do result = my_list.sort(), result is None. Use sorted() when you need the return value.