Python Essentials for AI Engineers · Lesson 4 of 36
What is the difference between int and float?
int: Arbitrary Precision
Python's int has no fixed size — it can hold any whole number, limited only by available memory.
# No overflow — Python ints grow automatically
big = 2 ** 128
print(big) # 340282366920938463463374607431768211456
print(type(big)) # <class 'int'>
# Common int operations
a, b = 17, 5
print(a + b) # 22
print(a - b) # 12
print(a * b) # 85
print(a // b) # 3 — floor division (integer result)
print(a % b) # 2 — modulo (remainder)
print(a ** b) # 1419857 — exponentiation
print(a / b) # 3.4 — always returns float, even if evenly divisible
print(10 / 2) # 5.0 — float!float: 64-bit IEEE 754
Python's float is a 64-bit double-precision number. It has ~15–17 significant decimal digits of precision.
x = 3.14
print(type(x)) # <class 'float'>
# Scientific notation
y = 1.5e-3 # 0.0015
z = 2.5e10 # 25000000000.0
# Float operations
print(0.1 + 0.2) # 0.30000000000000004 — NOT 0.3!
print(0.1 + 0.2 == 0.3) # False — floating-point representation error
# Correct comparison: use math.isclose
import math
print(math.isclose(0.1 + 0.2, 0.3)) # True
print(math.isclose(0.1 + 0.2, 0.3, rel_tol=1e-9)) # TrueWhy Floating-Point is Imprecise
Floats are stored in binary (base 2). Most decimal fractions cannot be represented exactly in binary, just as 1/3 cannot be represented exactly in base 10.
# 0.1 in binary is a repeating fraction — it gets rounded
from decimal import Decimal
print(Decimal(0.1))
# 0.1000000000000000055511151231257827021181583404541015625
# The stored value is the closest representable binary fraction to 0.1
# More examples
print(1.1 + 2.2) # 3.3000000000000003
print(0.1 * 3) # 0.30000000000000004
print(round(0.1 * 3, 1)) # 0.3 — round() helps for displayConverting Between int and float
# int → float
x = float(5)
print(x) # 5.0
print(type(x)) # <class 'float'>
# float → int: truncates (does NOT round)
print(int(3.9)) # 3 — truncated, not rounded
print(int(-3.9)) # -3 — truncated toward zero
print(int(3.0)) # 3
# Rounding
print(round(3.9)) # 4
print(round(3.14159, 2)) # 3.14
# Banker's rounding (rounds to nearest even)
print(round(2.5)) # 2 — rounds to even
print(round(3.5)) # 4 — rounds to evenSpecial float Values
import math
# Infinity
pos_inf = float("inf")
neg_inf = float("-inf")
print(pos_inf > 1e308) # True
print(neg_inf < -1e308) # True
# Not a Number
nan = float("nan")
print(math.isnan(nan)) # True
print(nan == nan) # False — NaN is not equal to itself!
print(math.isnan(float("nan"))) # True — correct way to check
# math constants
print(math.pi) # 3.141592653589793
print(math.e) # 2.718281828459045
print(math.inf) # inf
print(math.nan) # nanAI and ML: Where This Matters
Loss Values and Comparisons
import math
def is_loss_improving(old_loss: float, new_loss: float, min_delta: float = 1e-6) -> bool:
"""Check if loss improved by more than a threshold."""
return (old_loss - new_loss) > min_delta # Never use == for floats
def is_valid_loss(loss: float) -> bool:
"""A NaN or infinite loss signals a training crash."""
return math.isfinite(loss)
# Training loop guard
loss = compute_loss(model, batch)
if not is_valid_loss(loss):
raise RuntimeError(f"Training diverged: loss = {loss}")Probability Scores
def clamp_probability(p: float) -> float:
"""Clamp to valid probability range [0, 1]."""
return max(0.0, min(1.0, p))
def log_loss(y_true: int, y_pred: float, epsilon: float = 1e-7) -> float:
"""
Binary cross-entropy loss.
epsilon guards against log(0) which is -inf.
"""
import math
y_pred = clamp_probability(y_pred)
y_pred = max(epsilon, min(1 - epsilon, y_pred)) # Clip away from 0 and 1
return -(y_true * math.log(y_pred) + (1 - y_true) * math.log(1 - y_pred))
print(log_loss(1, 0.9)) # 0.10536... — low loss, correct prediction
print(log_loss(1, 0.1)) # 2.30258... — high loss, wrong predictionInteger vs Float Indexing and Slicing
import numpy as np
embeddings = np.random.randn(100, 1536)
# int: fine for indexing
idx = 5
print(embeddings[idx].shape) # (1536,)
# float: NOT valid as an index
ratio = 0.8
split = int(len(embeddings) * ratio) # Convert to int first
train = embeddings[:split]
test = embeddings[split:]
print(train.shape) # (80, 1536)
print(test.shape) # (20, 1536)Token Count Budgets
def estimate_tokens(text: str) -> int:
"""Rough estimate: ~4 characters per token. Always returns int."""
return len(text) // 4 # // ensures int result
def within_budget(text: str, max_tokens: int = 4096) -> bool:
return estimate_tokens(text) <= max_tokens
# Integer division is exact and safe for this
budget_ratio = 0.8
max_context = 8192
soft_limit = int(max_context * budget_ratio) # 6553 — always convert
print(soft_limit) # 6553Common Pitfalls
# Pitfall 1: using == to compare floats
total = 0.1 + 0.1 + 0.1
if total == 0.3: # False! Don't do this
print("equal")
if math.isclose(total, 0.3): # True — do this instead
print("approximately equal")
# Pitfall 2: int division gives float in Python 3
result = 10 / 3
print(result, type(result)) # 3.3333... <class 'float'>
# Use // for integer (floor) division
result = 10 // 3
print(result, type(result)) # 3 <class 'int'>
# Pitfall 3: NaN silently propagates
import numpy as np
scores = np.array([0.9, float("nan"), 0.8])
print(scores.mean()) # nan — the whole mean is poisoned
print(np.nanmean(scores)) # 0.85 — ignores NaN
# Pitfall 4: loss of precision in long float chains
total = 0.0
for _ in range(10_000):
total += 0.1
print(total) # 1000.0000000000159 — slight drift
print(math.isclose(total, 1000.0, rel_tol=1e-9)) # Falseint vs float Quick Reference
| Property | int | float |
|---|---|---|
| Precision | Exact, arbitrary size | ~15–17 decimal digits |
| Memory | Grows with value | Fixed 64 bits |
| Division (/) | Returns float | Returns float |
| Floor division (//) | Returns int | Returns float |
| Overflow | Never | Becomes inf |
| Comparison | Exact with == | Use math.isclose() |
| Index arrays | Yes | No — convert first |
| Common AI use | token counts, indices, dims | probabilities, losses, embeddings |