Skip to content

Kth Largest

Problem

Given an unsorted array and an integer k, return the kth largest element. Also implement a streaming version that supports adding new values.

Approach

Maintain a min-heap of size k. The heap top is always the kth largest. For each new element larger than the heap top, replace it.

When to Use

Streaming top-K — "kth largest in a stream", "maintain running top K". Min-heap of size k keeps only the K largest seen so far. Also: real-time leaderboard, top-K sensor readings in telemetry.

Implementation

kth_largest

Find the kth largest element.

Problem

Given an unsorted array and an integer k, return the kth largest element. Also implement a streaming version that supports adding new values.

Approach

Maintain a min-heap of size k. The heap top is always the kth largest. For each new element larger than the heap top, replace it.

When to use

Streaming top-K — "kth largest in a stream", "maintain running top K". Min-heap of size k keeps only the K largest seen so far. Also: real-time leaderboard, top-K sensor readings in telemetry.

Complexity

find_kth_largest: O(n log k) time, O(k) space KthLargest.add: O(log k) time per call, O(k) space total

KthLargest

Stream version: maintain the kth largest over a growing dataset.

kl = KthLargest(3, [4, 5, 8, 2]) kl.add(3) 4 kl.add(5) 5 kl.add(10) 5

Source code in src/algo/heaps/kth_largest.py
class KthLargest:
    """Stream version: maintain the kth largest over a growing dataset.

    >>> kl = KthLargest(3, [4, 5, 8, 2])
    >>> kl.add(3)
    4
    >>> kl.add(5)
    5
    >>> kl.add(10)
    5
    """

    def __init__(self, k: int, nums: Sequence[int]) -> None:
        self._k = k
        self._heap: list[int] = []
        for n in nums:
            self.add(n)

    def add(self, val: int) -> int:
        """Add a value and return the current kth largest element."""
        if len(self._heap) < self._k:
            heapq.heappush(self._heap, val)
        elif val > self._heap[0]:
            heapq.heapreplace(self._heap, val)
        return self._heap[0]

add

add(val: int) -> int

Add a value and return the current kth largest element.

Source code in src/algo/heaps/kth_largest.py
def add(self, val: int) -> int:
    """Add a value and return the current kth largest element."""
    if len(self._heap) < self._k:
        heapq.heappush(self._heap, val)
    elif val > self._heap[0]:
        heapq.heapreplace(self._heap, val)
    return self._heap[0]

find_kth_largest

find_kth_largest(nums: Sequence[int], k: int) -> int

Return the kth largest element in the array.

find_kth_largest([3, 2, 1, 5, 6, 4], 2) 5

Source code in src/algo/heaps/kth_largest.py
def find_kth_largest(nums: Sequence[int], k: int) -> int:
    """Return the kth largest element in the array.

    >>> find_kth_largest([3, 2, 1, 5, 6, 4], 2)
    5
    """
    if k <= 0 or k > len(nums):
        msg = f"k={k} out of range for length {len(nums)}"
        raise ValueError(msg)

    heap = list(nums[:k])
    heapq.heapify(heap)

    for n in nums[k:]:
        if n > heap[0]:
            heapq.heapreplace(heap, n)

    return heap[0]
tests/heaps/test_kth_largest.py
"""Tests for kth largest element."""

import pytest
from hypothesis import given
from hypothesis import strategies as st

from algo.heaps.kth_largest import KthLargest, find_kth_largest


class TestFindKthLargest:
    def test_basic(self) -> None:
        assert find_kth_largest([3, 2, 1, 5, 6, 4], 2) == 5

    def test_k_equals_one(self) -> None:
        assert find_kth_largest([3, 2, 3, 1, 2, 4, 5, 5, 6], 1) == 6

    def test_k_equals_length(self) -> None:
        assert find_kth_largest([3, 2, 1], 3) == 1

    def test_duplicates(self) -> None:
        assert find_kth_largest([1, 1, 1, 1], 2) == 1

    def test_single_element(self) -> None:
        assert find_kth_largest([7], 1) == 7

    def test_k_zero_raises(self) -> None:
        with pytest.raises(ValueError):
            find_kth_largest([1, 2, 3], 0)

    def test_k_too_large_raises(self) -> None:
        with pytest.raises(ValueError):
            find_kth_largest([1, 2], 3)

    @given(
        data=st.lists(
            st.integers(min_value=-1000, max_value=1000), min_size=1, max_size=100
        ),
    )
    def test_matches_sorted(self, data: list[int]) -> None:
        k = 1
        assert find_kth_largest(data, k) == sorted(data, reverse=True)[k - 1]


class TestKthLargestStream:
    def test_leetcode_example(self) -> None:
        kl = KthLargest(3, [4, 5, 8, 2])
        assert kl.add(3) == 4
        assert kl.add(5) == 5
        assert kl.add(10) == 5
        assert kl.add(9) == 8
        assert kl.add(4) == 8

    def test_empty_initial(self) -> None:
        kl = KthLargest(2, [])
        assert kl.add(1) == 1
        assert kl.add(3) == 1
        assert kl.add(5) == 3

    def test_k_one(self) -> None:
        kl = KthLargest(1, [])
        assert kl.add(10) == 10
        assert kl.add(20) == 20
        assert kl.add(5) == 20

    def test_all_same(self) -> None:
        kl = KthLargest(2, [5, 5, 5])
        assert kl.add(5) == 5

Implement it yourself

Run: just challenge heaps kth_largest

Then implement the functions to make all tests pass. Use just study heaps for watch mode.

Reveal Solution

kth_largest

Find the kth largest element.

Problem

Given an unsorted array and an integer k, return the kth largest element. Also implement a streaming version that supports adding new values.

Approach

Maintain a min-heap of size k. The heap top is always the kth largest. For each new element larger than the heap top, replace it.

When to use

Streaming top-K — "kth largest in a stream", "maintain running top K". Min-heap of size k keeps only the K largest seen so far. Also: real-time leaderboard, top-K sensor readings in telemetry.

Complexity

find_kth_largest: O(n log k) time, O(k) space KthLargest.add: O(log k) time per call, O(k) space total

KthLargest

Stream version: maintain the kth largest over a growing dataset.

kl = KthLargest(3, [4, 5, 8, 2]) kl.add(3) 4 kl.add(5) 5 kl.add(10) 5

Source code in src/algo/heaps/kth_largest.py
class KthLargest:
    """Stream version: maintain the kth largest over a growing dataset.

    >>> kl = KthLargest(3, [4, 5, 8, 2])
    >>> kl.add(3)
    4
    >>> kl.add(5)
    5
    >>> kl.add(10)
    5
    """

    def __init__(self, k: int, nums: Sequence[int]) -> None:
        self._k = k
        self._heap: list[int] = []
        for n in nums:
            self.add(n)

    def add(self, val: int) -> int:
        """Add a value and return the current kth largest element."""
        if len(self._heap) < self._k:
            heapq.heappush(self._heap, val)
        elif val > self._heap[0]:
            heapq.heapreplace(self._heap, val)
        return self._heap[0]

add

add(val: int) -> int

Add a value and return the current kth largest element.

Source code in src/algo/heaps/kth_largest.py
def add(self, val: int) -> int:
    """Add a value and return the current kth largest element."""
    if len(self._heap) < self._k:
        heapq.heappush(self._heap, val)
    elif val > self._heap[0]:
        heapq.heapreplace(self._heap, val)
    return self._heap[0]

find_kth_largest

find_kth_largest(nums: Sequence[int], k: int) -> int

Return the kth largest element in the array.

find_kth_largest([3, 2, 1, 5, 6, 4], 2) 5

Source code in src/algo/heaps/kth_largest.py
def find_kth_largest(nums: Sequence[int], k: int) -> int:
    """Return the kth largest element in the array.

    >>> find_kth_largest([3, 2, 1, 5, 6, 4], 2)
    5
    """
    if k <= 0 or k > len(nums):
        msg = f"k={k} out of range for length {len(nums)}"
        raise ValueError(msg)

    heap = list(nums[:k])
    heapq.heapify(heap)

    for n in nums[k:]:
        if n > heap[0]:
            heapq.heapreplace(heap, n)

    return heap[0]