Skip to content

Data Structures Reference (Page 1 of 2)

Python Built-in Types: Operations & Big-O

list (dynamic array)

Operation Avg Worst Notes
a[i] O(1) O(1) index
a[i] = x O(1) O(1) assign
a.append(x) O(1) O(n) amortized O(1)
a.pop() O(1) O(1) pop from end
a.pop(i) O(n) O(n) shift elements
a.insert(i,x) O(n) O(n) shift elements
del a[i] O(n) O(n) shift elements
x in a O(n) O(n) linear scan
a.sort() O(n log n) O(n log n) Timsort
a[i:j] O(j-i) O(j-i) slice copy
a.extend(b) O(k) O(k) k = len(b)
len(a) O(1) O(1)
a.reverse() O(n) O(n) in-place
a + b O(n+m) O(n+m) creates new list
a * k O(nk) O(nk) creates new list

dict (hash map)

Operation Avg Worst Notes
d[k] O(1) O(n) get
d[k] = v O(1) O(n) set
del d[k] O(1) O(n) delete
k in d O(1) O(n) membership
d.get(k, default) O(1) O(n) safe get
d.keys/values/items O(1) O(1) view objects
len(d) O(1) O(1)
d.pop(k) O(1) O(n)
d.setdefault(k,v) O(1) O(n) get or set
d1 \| d2 O(n+m) merge (3.9+)

Note: dict preserves insertion order (Python 3.7+). Worst case O(n) from hash collisions (rare).

set (hash set)

Operation Avg Worst
s.add(x) O(1) O(n)
s.remove(x) O(1) O(n)
s.discard(x) O(1) O(n)
x in s O(1) O(n)
s \| t (union) O(n+m)
s & t (intersect) O(min(n,m))
s - t (difference) O(n)
s ^ t (sym diff) O(n+m)
s <= t (subset) O(n)
s = {1, 2, 3}
s.add(4); s.remove(4)       # remove raises KeyError if missing
s.discard(4)                 # no error if missing
s.pop()                      # remove/return arbitrary element
frozenset({1,2,3})           # immutable, hashable (usable as dict key)

tuple

  • Immutable, hashable (usable as dict keys/set elements if contents are hashable)
  • O(1) index, O(n) search, O(n) iteration
  • Unpacking: a, b, *rest = (1, 2, 3, 4, 5)

Array/List Patterns

Two Pointers (sorted array or opposite ends)

# Pair sum in sorted array
def two_sum_sorted(a, target):
    lo, hi = 0, len(a) - 1
    while lo < hi:
        s = a[lo] + a[hi]
        if s == target: return [lo, hi]
        elif s < target: lo += 1
        else: hi -= 1
    return []

# Remove duplicates in-place (sorted)
def remove_dupes(a):
    if not a: return 0
    w = 1  # write pointer
    for r in range(1, len(a)):
        if a[r] != a[r-1]:
            a[w] = a[r]; w += 1
    return w

Sliding Window

# Fixed size window: max sum of subarray of size k
def max_sum_k(a, k):
    s = sum(a[:k])
    best = s
    for i in range(k, len(a)):
        s += a[i] - a[i-k]
        best = max(best, s)
    return best

# Variable size: smallest subarray with sum >= target
def min_subarray(a, target):
    lo = total = 0
    best = float('inf')
    for hi in range(len(a)):
        total += a[hi]
        while total >= target:
            best = min(best, hi - lo + 1)
            total -= a[lo]; lo += 1
    return best if best != float('inf') else 0

Prefix Sum

# Build prefix sum
pre = [0] * (len(a) + 1)
for i in range(len(a)):
    pre[i+1] = pre[i] + a[i]
# Sum of a[i..j] inclusive = pre[j+1] - pre[i]

# 2D Prefix Sum
m, n = len(grid), len(grid[0])
pre = [[0]*(n+1) for _ in range(m+1)]
for i in range(m):
    for j in range(n):
        pre[i+1][j+1] = grid[i][j] + pre[i][j+1] + pre[i+1][j] - pre[i][j]
# Sum of submatrix (r1,c1) to (r2,c2) inclusive:
# pre[r2+1][c2+1] - pre[r1][c2+1] - pre[r2+1][c1] + pre[r1][c1]

Stack / Queue

# Stack (LIFO) - use list
stack = []
stack.append(x)              # push O(1)
stack.pop()                  # pop  O(1)
stack[-1]                    # peek O(1)

# Queue (FIFO) - use deque (NOT list.pop(0) which is O(n))
from collections import deque
q = deque()
q.append(x)                 # enqueue O(1)
q.popleft()                 # dequeue O(1)
q[0]                         # peek    O(1)

# Monotonic stack: next greater element
def next_greater(nums):
    n = len(nums)
    res = [-1] * n
    stack = []               # stores indices
    for i in range(n):
        while stack and nums[i] > nums[stack[-1]]:
            res[stack.pop()] = nums[i]
        stack.append(i)
    return res

Data Structures Reference (Page 2 of 2)

Linked List

class ListNode:
    def __init__(self, val=0, next=None):
        self.val = val
        self.next = next

# Dummy head pattern (simplifies edge cases)
dummy = ListNode(0)
dummy.next = head
# ... manipulate list ...
return dummy.next

# Reverse linked list
def reverse(head):
    prev, curr = None, head
    while curr:
        nxt = curr.next
        curr.next = prev
        prev = curr
        curr = nxt
    return prev

# Find middle (slow/fast pointers)
def find_middle(head):
    slow = fast = head
    while fast and fast.next:
        slow = slow.next
        fast = fast.next.next
    return slow  # middle node (2nd middle if even length)

# Detect cycle
def has_cycle(head):
    slow = fast = head
    while fast and fast.next:
        slow = slow.next
        fast = fast.next.next
        if slow == fast: return True
    return False

# Find cycle start
def cycle_start(head):
    slow = fast = head
    while fast and fast.next:
        slow = slow.next
        fast = fast.next.next
        if slow == fast:
            slow = head
            while slow != fast:
                slow = slow.next
                fast = fast.next
            return slow
    return None

Tree

class TreeNode:
    def __init__(self, val=0, left=None, right=None):
        self.val = val
        self.left = left
        self.right = right

# Inorder (left, root, right) - BST gives sorted order
def inorder(root):
    if not root: return []
    return inorder(root.left) + [root.val] + inorder(root.right)

# Iterative inorder (useful pattern)
def inorder_iter(root):
    res, stack = [], []
    curr = root
    while curr or stack:
        while curr:
            stack.append(curr)
            curr = curr.left
        curr = stack.pop()
        res.append(curr.val)
        curr = curr.right
    return res

# Level-order (BFS)
def level_order(root):
    if not root: return []
    res, q = [], deque([root])
    while q:
        level = []
        for _ in range(len(q)):
            node = q.popleft()
            level.append(node.val)
            if node.left:  q.append(node.left)
            if node.right: q.append(node.right)
        res.append(level)
    return res

Trie

class Trie:
    def __init__(self):
        self.children = {}
        self.is_end = False

    def insert(self, word):
        node = self
        for c in word:
            if c not in node.children:
                node.children[c] = Trie()
            node = node.children[c]
        node.is_end = True

    def search(self, word):
        node = self
        for c in word:
            if c not in node.children: return False
            node = node.children[c]
        return node.is_end

    def starts_with(self, prefix):
        node = self
        for c in prefix:
            if c not in node.children: return False
            node = node.children[c]
        return True

Graph Representations

# Adjacency list (most common) - dict of lists
graph = defaultdict(list)
for u, v in edges:
    graph[u].append(v)
    graph[v].append(u)          # undirected

# Adjacency list from n nodes, 0-indexed
graph = [[] for _ in range(n)]
for u, v in edges:
    graph[u].append(v)

# Weighted adjacency list
graph = defaultdict(list)
for u, v, w in edges:
    graph[u].append((v, w))

# Adjacency matrix (dense graphs or need O(1) edge lookup)
adj = [[0]*n for _ in range(n)]
for u, v in edges:
    adj[u][v] = 1
    adj[v][u] = 1               # undirected

# Edge list (for Union-Find / Kruskal's)
edges = [(weight, u, v), ...]
edges.sort()                     # sort by weight

Hash Map / Set Patterns

# Two Sum (unsorted): value -> index
def two_sum(nums, target):
    seen = {}
    for i, x in enumerate(nums):
        comp = target - x
        if comp in seen: return [seen[comp], i]
        seen[x] = i

# Group anagrams: sorted string as key
groups = defaultdict(list)
for s in strs:
    groups[tuple(sorted(s))].append(s)

# Subarray sum equals k (prefix sum + hash map)
def subarray_sum(nums, k):
    count = 0
    prefix = 0
    seen = {0: 1}               # prefix_sum -> count of occurrences
    for x in nums:
        prefix += x
        count += seen.get(prefix - k, 0)
        seen[prefix] = seen.get(prefix, 0) + 1
    return count

# Longest substring without repeating chars
def length_of_longest(s):
    seen = {}
    lo = best = 0
    for hi, c in enumerate(s):
        if c in seen and seen[c] >= lo:
            lo = seen[c] + 1
        seen[c] = hi
        best = max(best, hi - lo + 1)
    return best