taulu

Taulu - segment tables from images

Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).

To use this package, you first need to make an annotation of the headers in your table images. The idea is that these headers will be similar across your full set of images, and they will be used as a starting point for the search algorithm that finds the table grid.

Here is an example python script of how to use Taulu:

from taulu import Taulu, Split
import os


def setup():
    # create an Annotation file of the headers in the image
    # (one for the left header, one for the right)
    # and store them in the examples directory
    print("Annotating the LEFT header...")
    Taulu.annotate("../data/table_00.png", "table_00_header_left.png")

    print("Annotating the RIGHT header...")
    Taulu.annotate("../data/table_00.png", "table_00_header_right.png")


def main():
    taulu = Taulu(Split("table_00_header_left.png", "table_00_header_right.png"))
    table = taulu.segment_table("../data/table_00.png", debug_view=True)

    table.show_cells("../data/table_00.png")


if __name__ == "__main__":
    if os.path.exists("table_00_header_left.png") and os.path.exists(
        "table_00_header_right.png"
    ):
        main()
    else:
        setup()
        main()

If you want a high-level overview of how to use Taulu, see .taulu.Taulu">the Taulu class

View Source

 1"""
 2Taulu - *segment tables from images*
 3
 4Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).
 5
 6To use this package, you first need to make an annotation of the headers in your table images.
 7The idea is that these headers will be similar across your full set of images, and they will be
 8used as a starting point for the search algorithm that finds the table grid.
 9
10Here is an example python script of how to use Taulu:
11```python
12from taulu import Taulu, Split
13import os
14
15
16def setup():
17    # create an Annotation file of the headers in the image
18    # (one for the left header, one for the right)
19    # and store them in the examples directory
20    print("Annotating the LEFT header...")
21    Taulu.annotate("../data/table_00.png", "table_00_header_left.png")
22
23    print("Annotating the RIGHT header...")
24    Taulu.annotate("../data/table_00.png", "table_00_header_right.png")
25
26
27def main():
28    taulu = Taulu(Split("table_00_header_left.png", "table_00_header_right.png"))
29    table = taulu.segment_table("../data/table_00.png", debug_view=True)
30
31    table.show_cells("../data/table_00.png")
32
33
34if __name__ == "__main__":
35    if os.path.exists("table_00_header_left.png") and os.path.exists(
36        "table_00_header_right.png"
37    ):
38        main()
39    else:
40        setup()
41        main()
42
43```
44
45If you want a high-level overview of how to use Taulu, see [the Taulu class](./taulu.html#taulu.taulu.Taulu)
46"""
47
48from .config import TauluConfig
49from .grid import SegmentedTable, TableDetector
50from .split import Split
51from .table_indexer import TableIndexer
52from .table_template import TableTemplate
53from .taulu import Taulu
54from .template_matcher import FeatureDetector, TemplateMatcher
55
56__pdoc__ = {}
57__pdoc__["constants"] = False
58__pdoc__["main"] = False
59__pdoc__["decorators"] = False
60__pdoc__["error"] = False
61__pdoc__["types"] = False
62__pdoc__["img_util"] = False
63
64__all__ = [
65    "FeatureDetector",
66    "SegmentedTable",
67    "Split",
68    "TableDetector",
69    "TableIndexer",
70    "TableTemplate",
71    "Taulu",
72    "TauluConfig",
73    "TemplateMatcher",
74]
75
76try:
77    from . import gpu  # noqa: F401  # ty: ignore[unresolved-import]
78
79    __all__.append("gpu")
80except ImportError:
81    pass

FeatureDetector = typing.Literal['orb', 'sift', 'surf', 'akaze']

class SegmentedTable(taulu.TableIndexer): View Source

 895class SegmentedTable(TableIndexer):
 896    """
 897    Represents a detected table grid as a 2D array of intersection points.
 898
 899    Returned by `Taulu.segment_table`. Provides methods for querying cell
 900    locations, cropping cells/regions from the source image, and interactive
 901    visualization. Can be saved to and restored from JSON.
 902    """
 903
 904    _right_offset: int | None = None
 905
 906    def __init__(self, points: list[list[Point]], right_offset: int | None = None):
 907        """
 908        Args:
 909            points: 2D list of intersections between horizontal and vertical
 910                rules, in row-major order.
 911            right_offset: For tables built from a `Split`, the column index
 912                where the right half begins. ``None`` for single-page tables.
 913        """
 914        self._points = points
 915        self._right_offset = right_offset
 916
 917    @property
 918    def points(self) -> list[list[Point]]:
 919        """The raw 2D grid of intersection points."""
 920        return self._points
 921
 922    def row(self, i: int) -> list[Point]:
 923        """Return the ``i``-th row of intersection points."""
 924        assert 0 <= i and i < len(self._points)
 925        return self._points[i]
 926
 927    @property
 928    def cols(self) -> int:
 929        """Number of cell columns (one fewer than vertical rules; two fewer
 930        for split tables, accounting for the seam between halves)."""
 931        if self._right_offset is not None:
 932            return len(self.row(0)) - 2
 933        else:
 934            return len(self.row(0)) - 1
 935
 936    @property
 937    def rows(self) -> int:
 938        """Number of cell rows (one fewer than horizontal rules)."""
 939        return len(self._points) - 1
 940
 941    @property
 942    def right_offset(self) -> int | None:
 943        """Column index where the right half begins, or ``None``."""
 944        return self._right_offset
 945
 946    @staticmethod
 947    def from_split(
 948        split_grids: Split["SegmentedTable"], offsets: Split[Point]
 949    ) -> "SegmentedTable":
 950        """
 951        Convert two ``SegmentedTable`` objects into one that can segment the original (non-cropped) image.
 952
 953        Args:
 954            split_grids (Split[SegmentedTable]): SegmentedTable objects for the left and right part of the table
 955            offsets (Split[tuple[int, int]]): the offsets in the original image where each crop started
 956
 957        Returns:
 958            SegmentedTable: a merged grid spanning both halves.
 959
 960        Raises:
 961            ValueError: if no row is fully populated in both halves.
 962        """
 963
 964        def offset_points(points, offset):
 965            return [
 966                [
 967                    (p[0] + offset[0], p[1] + offset[1]) if p is not None else None
 968                    for p in row
 969                ]
 970                for row in points
 971            ]
 972
 973        split_points = split_grids.apply(
 974            lambda grid, offset: offset_points(grid.points, offset), offsets
 975        )
 976        points = []
 977        rows = min(split_grids.left.rows, split_grids.right.rows)
 978        for row in range(rows + 1):
 979            left_row = split_points.left[row]
 980            right_row = split_points.right[row]
 981
 982            # Skip rows that contain None values
 983            if any(p is None for p in left_row) or any(p is None for p in right_row):
 984                logger.warning(
 985                    f"Skipping row {row} in from_split due to incomplete grid data"
 986                )
 987                continue
 988
 989            row_points = []
 990            row_points.extend(left_row)
 991            row_points.extend(right_row)
 992            points.append(row_points)
 993        if not points:
 994            raise ValueError(
 995                "Cannot create SegmentedTable from split: no complete rows found in both grids"
 996            )
 997        table_grid = SegmentedTable(points, split_grids.left.cols)
 998        return table_grid
 999
1000    def save(self, path: str | Path):
1001        """
1002        Persist the table grid to a JSON file.
1003
1004        Saves the grid corner points and right_offset (for split tables) to disk,
1005        allowing the grid to be reloaded later without re-running detection.
1006
1007        Args:
1008            path: Path to save the JSON file.
1009
1010        Example:
1011            >>> grid = taulu.segment_table("table.png")
1012            >>> grid.save("grid.json")
1013        """
1014        with open(path, "w") as f:
1015            json.dump({"points": self.points, "right_offset": self._right_offset}, f)
1016
1017    @staticmethod
1018    def from_saved(path: str | Path) -> "SegmentedTable":
1019        """
1020        Load a previously saved SegmentedTable from a JSON file.
1021
1022        Args:
1023            path: Path to the JSON file created by `save()`.
1024
1025        Returns:
1026            A SegmentedTable instance with the saved corner points.
1027
1028        Example:
1029            >>> grid = SegmentedTable.from_saved("grid.json")
1030            >>> cell = grid.crop_cell(image, (0, 0))
1031        """
1032        with open(path) as f:
1033            points = json.load(f)
1034            right_offset = points.get("right_offset", None)
1035            points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]]
1036            return SegmentedTable(points, right_offset)
1037
1038    def add_left_col(self, width: int):
1039        """
1040        Prepend a column to the grid by shifting the first column ``width``
1041        pixels to the left and inserting it as a new column.
1042
1043        Args:
1044            width: Width of the new column in pixels.
1045        """
1046        for row in self._points:
1047            first = row[0]
1048            new_first = (first[0] - width, first[1])
1049            row.insert(0, new_first)
1050
1051    def add_top_row(self, height: int):
1052        """
1053        Prepend a row to the grid by shifting the first row ``height`` pixels
1054        upward and inserting it as a new row.
1055
1056        Args:
1057            height: Height of the new row in pixels.
1058        """
1059        new_row = []
1060        for point in self._points[0]:
1061            new_row.append((point[0], point[1] - height))
1062
1063        self.points.insert(0, new_row)
1064
1065    def _surrounds(self, rect: list[Point], point: tuple[float, float]) -> bool:
1066        """Check if ``point`` (x, y) lies inside the quadrilateral ``rect``
1067        (lt, rt, rb, lb)."""
1068        lt, rt, rb, lb = rect
1069        x, y = point
1070
1071        top = _Rule(*lt, *rt)
1072        if top._y_at_x(x) > y:
1073            return False
1074
1075        right = _Rule(*rt, *rb)
1076        if right._x_at_y(y) < x:
1077            return False
1078
1079        bottom = _Rule(*lb, *rb)
1080        if bottom._y_at_x(x) < y:
1081            return False
1082
1083        left = _Rule(*lb, *lt)
1084        if left._x_at_y(y) > x:
1085            return False
1086
1087        return True
1088
1089    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
1090        """
1091        Get the cell indices (row, col) containing a pixel coordinate.
1092
1093        Searches through all cells to find which one contains the given point,
1094        accounting for the non-rectangular (perspective-warped) cell boundaries.
1095
1096        Args:
1097            point: Pixel coordinates (x, y) in the original image.
1098
1099        Returns:
1100            (row, col) indices of the containing cell, or (-1, -1) if the point
1101            is outside all cells.
1102
1103        Example:
1104            >>> grid = taulu.segment_table("table.png")
1105            >>> row, col = grid.cell((150, 200))
1106            >>> if row >= 0:
1107            ...     print(f"Point is in cell ({row}, {col})")
1108        """
1109        for r in range(len(self._points) - 1):
1110            offset = 0
1111            for c in range(len(self.row(0)) - 1):
1112                if self._right_offset is not None and c == self._right_offset:
1113                    offset = -1
1114                    continue
1115
1116                if self._surrounds(
1117                    [
1118                        self._points[r][c],
1119                        self._points[r][c + 1],
1120                        self._points[r + 1][c + 1],
1121                        self._points[r + 1][c],
1122                    ],
1123                    point,
1124                ):
1125                    return (r, c + offset)
1126
1127        return (-1, -1)
1128
1129    def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]:
1130        """
1131        Get the four corner coordinates of a cell.
1132
1133        Returns the corners in clockwise order starting from top-left,
1134        suitable for use with OpenCV drawing functions.
1135
1136        Args:
1137            cell: Cell indices as (row, col).
1138
1139        Returns:
1140            Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order:
1141            top-left, top-right, bottom-right, bottom-left.
1142
1143        Raises:
1144            TauluException: If row or col indices are out of bounds.
1145
1146        Example:
1147            >>> lt, rt, rb, lb = grid.cell_polygon((0, 0))
1148            >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32)
1149            >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
1150        """
1151        r, c = cell
1152
1153        self._check_row_idx(r)
1154        self._check_col_idx(c)
1155
1156        if self._right_offset is not None and c >= self._right_offset:
1157            c = c + 1
1158
1159        return (
1160            self._points[r][c],
1161            self._points[r][c + 1],
1162            self._points[r + 1][c + 1],
1163            self._points[r + 1][c],
1164        )
1165
1166    def region(
1167        self, start: tuple[int, int], end: tuple[int, int]
1168    ) -> tuple[Point, Point, Point, Point]:
1169        """
1170        Get the bounding polygon for a rectangular region of cells.
1171
1172        Returns the four corner coordinates that enclose all cells from
1173        start to end (inclusive).
1174
1175        Args:
1176            start: Top-left cell as (row, col).
1177            end: Bottom-right cell as (row, col).
1178
1179        Returns:
1180            Four corner points (lt, rt, rb, lb) enclosing the region,
1181            each as (x, y) pixel coordinates.
1182
1183        Raises:
1184            TauluException: If any row or col indices are out of bounds.
1185
1186        Example:
1187            >>> # Get bounding box for cells (0,0) through (2,3)
1188            >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
1189        """
1190        r0, c0 = start
1191        r1, c1 = end
1192
1193        self._check_row_idx(r0)
1194        self._check_row_idx(r1)
1195        self._check_col_idx(c0)
1196        self._check_col_idx(c1)
1197
1198        if self._right_offset is not None and c0 >= self._right_offset:
1199            c0 = c0 + 1
1200
1201        if self._right_offset is not None and c1 >= self._right_offset:
1202            c1 = c1 + 1
1203
1204        lt = self._points[r0][c0]
1205        rt = self._points[r0][c1 + 1]
1206        rb = self._points[r1 + 1][c1 + 1]
1207        lb = self._points[r1 + 1][c0]
1208
1209        return lt, rt, rb, lb
1210
1211    def visualize_points(self, img: MatLike):
1212        """
1213        Draw the detected table points on the image for visual verification
1214        """
1215        import colorsys
1216
1217        def clr(index, total_steps):
1218            hue = index / total_steps  # Normalized hue between 0 and 1
1219            r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
1220            return int(r * 255), int(g * 255), int(b * 255)
1221
1222        for i, row in enumerate(self._points):
1223            for p in row:
1224                cv.circle(img, p, 4, clr(i, len(self._points)), -1)
1225
1226        imu.show(img)
1227
1228    def text_regions(
1229        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3
1230    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
1231        """
1232        Split a row into spans of consecutive cells whose vertical separators
1233        are obscured by text (i.e. continuous handwriting crosses the rule).
1234
1235        Args:
1236            img: Source table image.
1237            row: Row index to scan.
1238            margin_x: Horizontal margin around each rule crop, in pixels.
1239            margin_y: Vertical margin around each rule crop, in pixels.
1240
1241        Returns:
1242            List of ``((row, start_col), (row, end_col))`` spans (inclusive).
1243        """
1244
1245        def vertical_rule_crop(row: int, col: int):
1246            self._check_col_idx(col)
1247            self._check_row_idx(row)
1248
1249            if self._right_offset is not None and col >= self._right_offset:
1250                col = col + 1
1251
1252            top = self._points[row][col]
1253            bottom = self._points[row + 1][col]
1254
1255            left = int(min(top[0], bottom[0]))
1256            right = int(max(top[0], bottom[0]))
1257
1258            return img[
1259                int(top[1]) - margin_y : int(bottom[1]) + margin_y,
1260                left - margin_x : right + margin_x,
1261            ]
1262
1263        result = []
1264
1265        start = None
1266        for col in range(self.cols):
1267            crop = vertical_rule_crop(row, col)
1268            text_over_score = imu.text_presence_score(crop)
1269            text_over = text_over_score > -0.10
1270
1271            if not text_over:
1272                if start is not None:
1273                    result.append(((row, start), (row, col - 1)))
1274                start = col
1275
1276        if start is not None:
1277            result.append(((row, start), (row, self.cols - 1)))
1278
1279        return result

Represents a detected table grid as a 2D array of intersection points.

Returned by Taulu.segment_table. Provides methods for querying cell locations, cropping cells/regions from the source image, and interactive visualization. Can be saved to and restored from JSON.

SegmentedTable(points: list[list[tuple[int, int]]], right_offset: int | None = None) View Source

906    def __init__(self, points: list[list[Point]], right_offset: int | None = None):
907        """
908        Args:
909            points: 2D list of intersections between horizontal and vertical
910                rules, in row-major order.
911            right_offset: For tables built from a `Split`, the column index
912                where the right half begins. ``None`` for single-page tables.
913        """
914        self._points = points
915        self._right_offset = right_offset

Arguments:

points: 2D list of intersections between horizontal and vertical rules, in row-major order.
right_offset: For tables built from a Split, the column index where the right half begins. None for single-page tables.

points: list[list[tuple[int, int]]] View Source

917    @property
918    def points(self) -> list[list[Point]]:
919        """The raw 2D grid of intersection points."""
920        return self._points

The raw 2D grid of intersection points.

def row(self, i: int) -> list[tuple[int, int]]: View Source

922    def row(self, i: int) -> list[Point]:
923        """Return the ``i``-th row of intersection points."""
924        assert 0 <= i and i < len(self._points)
925        return self._points[i]

Return the i-th row of intersection points.

cols: int View Source

927    @property
928    def cols(self) -> int:
929        """Number of cell columns (one fewer than vertical rules; two fewer
930        for split tables, accounting for the seam between halves)."""
931        if self._right_offset is not None:
932            return len(self.row(0)) - 2
933        else:
934            return len(self.row(0)) - 1

Number of cell columns (one fewer than vertical rules; two fewer for split tables, accounting for the seam between halves).

rows: int View Source

936    @property
937    def rows(self) -> int:
938        """Number of cell rows (one fewer than horizontal rules)."""
939        return len(self._points) - 1

Number of cell rows (one fewer than horizontal rules).

right_offset: int | None View Source

941    @property
942    def right_offset(self) -> int | None:
943        """Column index where the right half begins, or ``None``."""
944        return self._right_offset

Column index where the right half begins, or None.

@staticmethod

def from_split( split_grids: Split[SegmentedTable], offsets: Split[tuple[int, int]]) -> SegmentedTable: View Source

946    @staticmethod
947    def from_split(
948        split_grids: Split["SegmentedTable"], offsets: Split[Point]
949    ) -> "SegmentedTable":
950        """
951        Convert two ``SegmentedTable`` objects into one that can segment the original (non-cropped) image.
952
953        Args:
954            split_grids (Split[SegmentedTable]): SegmentedTable objects for the left and right part of the table
955            offsets (Split[tuple[int, int]]): the offsets in the original image where each crop started
956
957        Returns:
958            SegmentedTable: a merged grid spanning both halves.
959
960        Raises:
961            ValueError: if no row is fully populated in both halves.
962        """
963
964        def offset_points(points, offset):
965            return [
966                [
967                    (p[0] + offset[0], p[1] + offset[1]) if p is not None else None
968                    for p in row
969                ]
970                for row in points
971            ]
972
973        split_points = split_grids.apply(
974            lambda grid, offset: offset_points(grid.points, offset), offsets
975        )
976        points = []
977        rows = min(split_grids.left.rows, split_grids.right.rows)
978        for row in range(rows + 1):
979            left_row = split_points.left[row]
980            right_row = split_points.right[row]
981
982            # Skip rows that contain None values
983            if any(p is None for p in left_row) or any(p is None for p in right_row):
984                logger.warning(
985                    f"Skipping row {row} in from_split due to incomplete grid data"
986                )
987                continue
988
989            row_points = []
990            row_points.extend(left_row)
991            row_points.extend(right_row)
992            points.append(row_points)
993        if not points:
994            raise ValueError(
995                "Cannot create SegmentedTable from split: no complete rows found in both grids"
996            )
997        table_grid = SegmentedTable(points, split_grids.left.cols)
998        return table_grid

Convert two SegmentedTable objects into one that can segment the original (non-cropped) image.

Arguments:

split_grids (Split[SegmentedTable]): SegmentedTable objects for the left and right part of the table
offsets (Split[tuple[int, int]]): the offsets in the original image where each crop started

Returns:

SegmentedTable: a merged grid spanning both halves.

Raises:

ValueError: if no row is fully populated in both halves.

def save(self, path: str | pathlib._local.Path): View Source

1000    def save(self, path: str | Path):
1001        """
1002        Persist the table grid to a JSON file.
1003
1004        Saves the grid corner points and right_offset (for split tables) to disk,
1005        allowing the grid to be reloaded later without re-running detection.
1006
1007        Args:
1008            path: Path to save the JSON file.
1009
1010        Example:
1011            >>> grid = taulu.segment_table("table.png")
1012            >>> grid.save("grid.json")
1013        """
1014        with open(path, "w") as f:
1015            json.dump({"points": self.points, "right_offset": self._right_offset}, f)

Persist the table grid to a JSON file.

Saves the grid corner points and right_offset (for split tables) to disk, allowing the grid to be reloaded later without re-running detection.

Arguments:

path: Path to save the JSON file.

Example:

>>> grid = taulu.segment_table("table.png")
>>> grid.save("grid.json")

@staticmethod

def from_saved(path: str | pathlib._local.Path) -> SegmentedTable: View Source

1017    @staticmethod
1018    def from_saved(path: str | Path) -> "SegmentedTable":
1019        """
1020        Load a previously saved SegmentedTable from a JSON file.
1021
1022        Args:
1023            path: Path to the JSON file created by `save()`.
1024
1025        Returns:
1026            A SegmentedTable instance with the saved corner points.
1027
1028        Example:
1029            >>> grid = SegmentedTable.from_saved("grid.json")
1030            >>> cell = grid.crop_cell(image, (0, 0))
1031        """
1032        with open(path) as f:
1033            points = json.load(f)
1034            right_offset = points.get("right_offset", None)
1035            points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]]
1036            return SegmentedTable(points, right_offset)

Load a previously saved SegmentedTable from a JSON file.

Arguments:

path: Path to the JSON file created by save().

Returns:

A SegmentedTable instance with the saved corner points.

Example:

>>> grid = SegmentedTable.from_saved("grid.json")
>>> cell = grid.crop_cell(image, (0, 0))

def add_left_col(self, width: int): View Source

1038    def add_left_col(self, width: int):
1039        """
1040        Prepend a column to the grid by shifting the first column ``width``
1041        pixels to the left and inserting it as a new column.
1042
1043        Args:
1044            width: Width of the new column in pixels.
1045        """
1046        for row in self._points:
1047            first = row[0]
1048            new_first = (first[0] - width, first[1])
1049            row.insert(0, new_first)

Prepend a column to the grid by shifting the first column width pixels to the left and inserting it as a new column.

Arguments:

width: Width of the new column in pixels.

def add_top_row(self, height: int): View Source

1051    def add_top_row(self, height: int):
1052        """
1053        Prepend a row to the grid by shifting the first row ``height`` pixels
1054        upward and inserting it as a new row.
1055
1056        Args:
1057            height: Height of the new row in pixels.
1058        """
1059        new_row = []
1060        for point in self._points[0]:
1061            new_row.append((point[0], point[1] - height))
1062
1063        self.points.insert(0, new_row)

Prepend a row to the grid by shifting the first row height pixels upward and inserting it as a new row.

Arguments:

height: Height of the new row in pixels.

def cell(self, point: tuple[float, float]) -> tuple[int, int]: View Source

1089    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
1090        """
1091        Get the cell indices (row, col) containing a pixel coordinate.
1092
1093        Searches through all cells to find which one contains the given point,
1094        accounting for the non-rectangular (perspective-warped) cell boundaries.
1095
1096        Args:
1097            point: Pixel coordinates (x, y) in the original image.
1098
1099        Returns:
1100            (row, col) indices of the containing cell, or (-1, -1) if the point
1101            is outside all cells.
1102
1103        Example:
1104            >>> grid = taulu.segment_table("table.png")
1105            >>> row, col = grid.cell((150, 200))
1106            >>> if row >= 0:
1107            ...     print(f"Point is in cell ({row}, {col})")
1108        """
1109        for r in range(len(self._points) - 1):
1110            offset = 0
1111            for c in range(len(self.row(0)) - 1):
1112                if self._right_offset is not None and c == self._right_offset:
1113                    offset = -1
1114                    continue
1115
1116                if self._surrounds(
1117                    [
1118                        self._points[r][c],
1119                        self._points[r][c + 1],
1120                        self._points[r + 1][c + 1],
1121                        self._points[r + 1][c],
1122                    ],
1123                    point,
1124                ):
1125                    return (r, c + offset)
1126
1127        return (-1, -1)

Get the cell indices (row, col) containing a pixel coordinate.

Searches through all cells to find which one contains the given point, accounting for the non-rectangular (perspective-warped) cell boundaries.

Arguments:

point: Pixel coordinates (x, y) in the original image.

Returns:

(row, col) indices of the containing cell, or (-1, -1) if the point is outside all cells.

Example:

>>> grid = taulu.segment_table("table.png")
>>> row, col = grid.cell((150, 200))
>>> if row >= 0:
...     print(f"Point is in cell ({row}, {col})")

def cell_polygon( self, cell: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: View Source

1129    def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]:
1130        """
1131        Get the four corner coordinates of a cell.
1132
1133        Returns the corners in clockwise order starting from top-left,
1134        suitable for use with OpenCV drawing functions.
1135
1136        Args:
1137            cell: Cell indices as (row, col).
1138
1139        Returns:
1140            Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order:
1141            top-left, top-right, bottom-right, bottom-left.
1142
1143        Raises:
1144            TauluException: If row or col indices are out of bounds.
1145
1146        Example:
1147            >>> lt, rt, rb, lb = grid.cell_polygon((0, 0))
1148            >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32)
1149            >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
1150        """
1151        r, c = cell
1152
1153        self._check_row_idx(r)
1154        self._check_col_idx(c)
1155
1156        if self._right_offset is not None and c >= self._right_offset:
1157            c = c + 1
1158
1159        return (
1160            self._points[r][c],
1161            self._points[r][c + 1],
1162            self._points[r + 1][c + 1],
1163            self._points[r + 1][c],
1164        )

Get the four corner coordinates of a cell.

Returns the corners in clockwise order starting from top-left, suitable for use with OpenCV drawing functions.

Arguments:

cell: Cell indices as (row, col).

Returns:

Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order: top-left, top-right, bottom-right, bottom-left.

Raises:

TauluException: If row or col indices are out of bounds.

Example:

>>> lt, rt, rb, lb = grid.cell_polygon((0, 0))
>>> pts = np.array([lt, rt, rb, lb], dtype=np.int32)
>>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)

def region( self, start: tuple[int, int], end: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: View Source

1166    def region(
1167        self, start: tuple[int, int], end: tuple[int, int]
1168    ) -> tuple[Point, Point, Point, Point]:
1169        """
1170        Get the bounding polygon for a rectangular region of cells.
1171
1172        Returns the four corner coordinates that enclose all cells from
1173        start to end (inclusive).
1174
1175        Args:
1176            start: Top-left cell as (row, col).
1177            end: Bottom-right cell as (row, col).
1178
1179        Returns:
1180            Four corner points (lt, rt, rb, lb) enclosing the region,
1181            each as (x, y) pixel coordinates.
1182
1183        Raises:
1184            TauluException: If any row or col indices are out of bounds.
1185
1186        Example:
1187            >>> # Get bounding box for cells (0,0) through (2,3)
1188            >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
1189        """
1190        r0, c0 = start
1191        r1, c1 = end
1192
1193        self._check_row_idx(r0)
1194        self._check_row_idx(r1)
1195        self._check_col_idx(c0)
1196        self._check_col_idx(c1)
1197
1198        if self._right_offset is not None and c0 >= self._right_offset:
1199            c0 = c0 + 1
1200
1201        if self._right_offset is not None and c1 >= self._right_offset:
1202            c1 = c1 + 1
1203
1204        lt = self._points[r0][c0]
1205        rt = self._points[r0][c1 + 1]
1206        rb = self._points[r1 + 1][c1 + 1]
1207        lb = self._points[r1 + 1][c0]
1208
1209        return lt, rt, rb, lb

Get the bounding polygon for a rectangular region of cells.

Returns the four corner coordinates that enclose all cells from start to end (inclusive).

Arguments:

start: Top-left cell as (row, col).
end: Bottom-right cell as (row, col).

Returns:

Four corner points (lt, rt, rb, lb) enclosing the region, each as (x, y) pixel coordinates.

Raises:

TauluException: If any row or col indices are out of bounds.

Example:

>>> # Get bounding box for cells (0,0) through (2,3)
>>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))

def visualize_points(self, img: Union[cv2.Mat, numpy.ndarray]): View Source

1211    def visualize_points(self, img: MatLike):
1212        """
1213        Draw the detected table points on the image for visual verification
1214        """
1215        import colorsys
1216
1217        def clr(index, total_steps):
1218            hue = index / total_steps  # Normalized hue between 0 and 1
1219            r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
1220            return int(r * 255), int(g * 255), int(b * 255)
1221
1222        for i, row in enumerate(self._points):
1223            for p in row:
1224                cv.circle(img, p, 4, clr(i, len(self._points)), -1)
1225
1226        imu.show(img)

Draw the detected table points on the image for visual verification

def text_regions( self, img: Union[cv2.Mat, numpy.ndarray], row: int, margin_x: int = 10, margin_y: int = -3) -> list[tuple[tuple[int, int], tuple[int, int]]]: View Source

1228    def text_regions(
1229        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3
1230    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
1231        """
1232        Split a row into spans of consecutive cells whose vertical separators
1233        are obscured by text (i.e. continuous handwriting crosses the rule).
1234
1235        Args:
1236            img: Source table image.
1237            row: Row index to scan.
1238            margin_x: Horizontal margin around each rule crop, in pixels.
1239            margin_y: Vertical margin around each rule crop, in pixels.
1240
1241        Returns:
1242            List of ``((row, start_col), (row, end_col))`` spans (inclusive).
1243        """
1244
1245        def vertical_rule_crop(row: int, col: int):
1246            self._check_col_idx(col)
1247            self._check_row_idx(row)
1248
1249            if self._right_offset is not None and col >= self._right_offset:
1250                col = col + 1
1251
1252            top = self._points[row][col]
1253            bottom = self._points[row + 1][col]
1254
1255            left = int(min(top[0], bottom[0]))
1256            right = int(max(top[0], bottom[0]))
1257
1258            return img[
1259                int(top[1]) - margin_y : int(bottom[1]) + margin_y,
1260                left - margin_x : right + margin_x,
1261            ]
1262
1263        result = []
1264
1265        start = None
1266        for col in range(self.cols):
1267            crop = vertical_rule_crop(row, col)
1268            text_over_score = imu.text_presence_score(crop)
1269            text_over = text_over_score > -0.10
1270
1271            if not text_over:
1272                if start is not None:
1273                    result.append(((row, start), (row, col - 1)))
1274                start = col
1275
1276        if start is not None:
1277            result.append(((row, start), (row, self.cols - 1)))
1278
1279        return result

Split a row into spans of consecutive cells whose vertical separators are obscured by text (i.e. continuous handwriting crosses the rule).

Arguments:

img: Source table image.
row: Row index to scan.
margin_x: Horizontal margin around each rule crop, in pixels.
margin_y: Vertical margin around each rule crop, in pixels.

Returns:

List of ((row, start_col), (row, end_col)) spans (inclusive).

class Split(typing.Generic[T]): View Source

 19class Split[T]:
 20    """
 21    Container for paired left/right data with convenient manipulation methods.
 22
 23    The Split class is designed for working with table images that span two pages
 24    or have distinct left and right sections. It allows you to:
 25    - Store related data for both sides
 26    - Apply functions to both sides simultaneously
 27    - Access attributes/methods of contained objects transparently
 28
 29    Examples:
 30        >>> # Create a split with different parameters for each side
 31        >>> thresholds = Split(0.25, 0.30)
 32        >>>
 33        >>> # Apply a function to both sides
 34        >>> images = Split(left_img, right_img)
 35        >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5)))
 36        >>>
 37        >>> # Use with different parameters per side
 38        >>> results = images.apply(
 39        ...     lambda img, k: sauvola_threshold(img, k),
 40        ...     k=thresholds  # k.left used for left img, k.right for right
 41        ... )
 42        >>>
 43        >>> # Access methods of contained objects directly
 44        >>> templates = Split(template_left, template_right)
 45        >>> widths = templates.cell_widths(0)  # Calls on both templates
 46
 47    Type Parameters:
 48        T: The type of objects stored in left and right
 49    """
 50
 51    @classmethod
 52    def __get_pydantic_core_schema__(
 53        cls,
 54        source_type: Any,
 55        handler: GetCoreSchemaHandler,
 56    ) -> core_schema.CoreSchema:
 57        args = get_args(source_type)
 58        inner_type = args[0] if args else Any
 59
 60        inner_schema = handler.generate_schema(inner_type)
 61
 62        def validate_split(value: Any) -> Split:
 63            if isinstance(value, Split):
 64                return value
 65            if isinstance(value, dict) and "left" in value and "right" in value:
 66                return Split(value["left"], value["right"])
 67            raise ValueError(
 68                f"Expected Split instance or dict with 'left'/'right' keys, got {type(value)}"
 69            )
 70
 71        return core_schema.no_info_plain_validator_function(
 72            validate_split,
 73            serialization=core_schema.plain_serializer_function_ser_schema(
 74                lambda v: {"left": v.left, "right": v.right},
 75                info_arg=False,
 76            ),
 77            metadata={
 78                "pydantic_js_functions": [
 79                    lambda _schema, handler: {
 80                        "type": "object",
 81                        "properties": {
 82                            "left": handler(inner_schema),
 83                            "right": handler(inner_schema),
 84                        },
 85                        "required": ["left", "right"],
 86                        "additionalProperties": False,
 87                    }
 88                ]
 89            },
 90        )
 91
 92    def __init__(self, left: T | None = None, right: T | None = None):
 93        """
 94        Initialize a Split container.
 95
 96        Args:
 97            left: Data for the left side
 98            right: Data for the right side
 99
100        Note:
101            Both can initially be None. Use the `append` method or set
102            properties directly to populate.
103        """
104        self._left = left
105        self._right = right
106
107    @property
108    def left(self) -> T:
109        """The left value. Asserts it has been set."""
110        assert self._left is not None
111        return self._left
112
113    @left.setter
114    def left(self, value: T):
115        self._left = value
116
117    @property
118    def right(self) -> T:
119        """The right value. Asserts it has been set."""
120        assert self._right is not None
121        return self._right
122
123    @right.setter
124    def right(self, value: T):
125        self._right = value
126
127    def append(self, value: T):
128        """Set ``left`` if unset, otherwise set ``right``."""
129        if self._left is None:
130            self._left = value
131        else:
132            self._right = value
133
134    def __repr__(self) -> str:
135        return f"left: {self._left}, right: {self._right}"
136
137    def __iter__(self):
138        assert self._left is not None
139        assert self._right is not None
140        return iter((self._left, self._right))
141
142    def __getitem__(self, index: bool | int) -> T:
143        assert self._left is not None
144        assert self._right is not None
145        if int(index) == 0:
146            return self._left
147        else:
148            return self._right
149
150    def apply(
151        self,
152        funcs: Split[Callable[..., V]] | Callable[..., V],
153        *args,
154        **kwargs,
155    ) -> Split[V]:
156        """
157        Call ``funcs`` on each side and return a new Split of the results.
158
159        ``self.left`` (resp. ``self.right``) is passed as the first positional
160        argument. Any extra ``args``/``kwargs`` that are themselves a `Split`
161        are unpacked per side; non-Split values are forwarded unchanged.
162
163        Args:
164            funcs: A single callable applied to both sides, or a `Split` of
165                callables for per-side functions.
166
167        Returns:
168            Split[V]: results of the per-side calls.
169        """
170        if not isinstance(funcs, Split):
171            funcs = Split(funcs, funcs)
172
173        def get_arg(side: str, arg):
174            if isinstance(arg, Split):
175                return getattr(arg, side)
176            return arg
177
178        def call(side: str):
179            func = getattr(funcs, side)
180            target = getattr(self, side)
181
182            side_args = [get_arg(side, arg) for arg in args]
183            side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()}
184
185            return func(target, *side_args, **side_kwargs)
186
187        return Split(call("left"), call("right"))
188
189    def __getattr__(self, attr_name: str):
190        if attr_name in self.__dict__:
191            return getattr(self, attr_name)
192
193        def wrapper(*args, **kwargs):
194            return self.apply(
195                Split(
196                    getattr(self.left.__class__, attr_name),
197                    getattr(self.right.__class__, attr_name),
198                ),
199                *args,
200                **kwargs,
201            )
202
203        return wrapper

Container for paired left/right data with convenient manipulation methods.

The Split class is designed for working with table images that span two pages or have distinct left and right sections. It allows you to:

Store related data for both sides
Apply functions to both sides simultaneously
Access attributes/methods of contained objects transparently

Examples:

>>> # Create a split with different parameters for each side
>>> thresholds = Split(0.25, 0.30)
>>>
>>> # Apply a function to both sides
>>> images = Split(left_img, right_img)
>>> processed = images.apply(lambda img: cv2.blur(img, (5, 5)))
>>>
>>> # Use with different parameters per side
>>> results = images.apply(
...     lambda img, k: sauvola_threshold(img, k),
...     k=thresholds  # k.left used for left img, k.right for right
... )
>>>
>>> # Access methods of contained objects directly
>>> templates = Split(template_left, template_right)
>>> widths = templates.cell_widths(0)  # Calls on both templates

Type Parameters:

T: The type of objects stored in left and right

Split(left: 'T | None' = None, right: 'T | None' = None) View Source

 92    def __init__(self, left: T | None = None, right: T | None = None):
 93        """
 94        Initialize a Split container.
 95
 96        Args:
 97            left: Data for the left side
 98            right: Data for the right side
 99
100        Note:
101            Both can initially be None. Use the `append` method or set
102            properties directly to populate.
103        """
104        self._left = left
105        self._right = right

Initialize a Split container.

Arguments:

left: Data for the left side
right: Data for the right side

Note:

Both can initially be None. Use the append method or set properties directly to populate.

left: 'T' View Source

107    @property
108    def left(self) -> T:
109        """The left value. Asserts it has been set."""
110        assert self._left is not None
111        return self._left

The left value. Asserts it has been set.

right: 'T' View Source

117    @property
118    def right(self) -> T:
119        """The right value. Asserts it has been set."""
120        assert self._right is not None
121        return self._right

The right value. Asserts it has been set.

def append(self, value: 'T'): View Source

127    def append(self, value: T):
128        """Set ``left`` if unset, otherwise set ``right``."""
129        if self._left is None:
130            self._left = value
131        else:
132            self._right = value

Set left if unset, otherwise set right.

def apply( self, funcs: Union[Split[Callable[..., ~V]], Callable[..., ~V]], *args, **kwargs) -> Split[~V]: View Source

150    def apply(
151        self,
152        funcs: Split[Callable[..., V]] | Callable[..., V],
153        *args,
154        **kwargs,
155    ) -> Split[V]:
156        """
157        Call ``funcs`` on each side and return a new Split of the results.
158
159        ``self.left`` (resp. ``self.right``) is passed as the first positional
160        argument. Any extra ``args``/``kwargs`` that are themselves a `Split`
161        are unpacked per side; non-Split values are forwarded unchanged.
162
163        Args:
164            funcs: A single callable applied to both sides, or a `Split` of
165                callables for per-side functions.
166
167        Returns:
168            Split[V]: results of the per-side calls.
169        """
170        if not isinstance(funcs, Split):
171            funcs = Split(funcs, funcs)
172
173        def get_arg(side: str, arg):
174            if isinstance(arg, Split):
175                return getattr(arg, side)
176            return arg
177
178        def call(side: str):
179            func = getattr(funcs, side)
180            target = getattr(self, side)
181
182            side_args = [get_arg(side, arg) for arg in args]
183            side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()}
184
185            return func(target, *side_args, **side_kwargs)
186
187        return Split(call("left"), call("right"))

Call funcs on each side and return a new Split of the results.

self.left (resp. self.right) is passed as the first positional argument. Any extra args/kwargs that are themselves a Split are unpacked per side; non-Split values are forwarded unchanged.

Arguments:

funcs: A single callable applied to both sides, or a Split of callables for per-side functions.

Returns:

Split[V]: results of the per-side calls.

class TableDetector: View Source

121class TableDetector:
122    """
123    Detects table grid intersections using morphological filtering and template matching.
124
125    This detector implements a multi-stage pipeline:
126
127    1. **Binarization**: Sauvola adaptive thresholding to handle varying lighting
128    2. **Morphological operations**: Dilation to connect broken rule segments
129    3. **Cross-kernel matching**: Template matching with a cross-shaped kernel to find
130       rule intersections where horizontal and vertical lines meet
131    4. **Grid growing**: Iterative point detection starting from a known seed point
132
133    The cross-kernel is designed to match the specific geometry of your table rules.
134    It should be sized so that after morphology, it aligns with actual corner shapes.
135
136    ## Tuning Guidelines
137
138    - **intersection_kernel_size**: Increase if you need more selectivity (fewer false positives)
139    - **line_thickness/line_thickness_horizontal**: Should match rule thickness after morphology
140    - **line_gap_fill**: Increase to connect more broken lines, but this thickens rules
141    - **binarization_sensitivity**: Increase to threshold more aggressively (remove noise)
142    - **search_radius**: Increase for documents with more warping/distortion
143    - **position_weight**: Increase to prefer corners closer to expected positions
144
145    ## Visual Debugging
146
147    Set `visual=True` in methods to see intermediate results and tune parameters.
148    """
149
150    def __init__(
151        self,
152        intersection_kernel_size: int = 21,
153        line_thickness: int = 6,
154        line_thickness_horizontal: int | None = None,
155        line_gap_fill: int | None = None,
156        binarization_sensitivity: float = 0.04,
157        binarization_window: int = 15,
158        detection_scale: float = 1.0,
159        search_radius: int = 40,
160        position_weight: float = 0.4,
161        pathfinding_threshold: float = 0.2,
162        min_rows: int = 5,
163        detection_threshold: float = 0.3,
164        extrapolation_distance: int = 4,
165        growing_resets: int = 3,
166        reset_fraction: float = 0.5,
167    ):
168        """
169        Args:
170            intersection_kernel_size (int): the size of the cross kernel
171                a larger kernel size often means that more penalty is applied, often leading
172                to more sparse results
173            line_thickness (int): the width of one of the edges in the cross filter, should be
174                roughly equal to the width of the rules in the image after morphology is applied
175            line_thickness_horizontal (int | None): useful if the horizontal rules and vertical rules
176                have different sizes
177            line_gap_fill (int | None): the size of the morphology operators that are applied before
178                the cross kernel. 'bridges the gaps' of broken-up lines
179            binarization_sensitivity (float): threshold parameter for sauvola thresholding
180            binarization_window (int): window_size parameter for sauvola thresholding
181            detection_scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
182            search_radius (int): area in which to search for a new max value in `find_nearest` etc.
183            position_weight (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
184            pathfinding_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skip astar pathfinding
185            min_rows (int): minimum number of rows to find before stopping the table finding algorithm
186            detection_threshold (float): the threshold for accepting a new point when growing the table
187            extrapolation_distance (int): how many points away to look when calculating the median slope
188            growing_resets (int): The amount of cuts (large deletions) to do in the grid during table growing
189            reset_fraction (float): The portion of the already-chosen corner points to delete during cutting
190        """
191        self._validate_parameters(
192            intersection_kernel_size,
193            line_thickness,
194            line_thickness_horizontal,
195            line_gap_fill,
196            search_radius,
197            binarization_sensitivity,
198            binarization_window,
199            position_weight,
200            pathfinding_threshold,
201            growing_resets,
202            reset_fraction,
203        )
204
205        self._intersection_kernel_size = intersection_kernel_size
206        self._line_thickness = line_thickness
207        self._line_thickness_horizontal = (
208            line_thickness
209            if line_thickness_horizontal is None
210            else line_thickness_horizontal
211        )
212        self._line_gap_fill = (
213            line_gap_fill if line_gap_fill is not None else line_thickness
214        )
215        self._search_radius = search_radius
216        self._binarization_sensitivity = binarization_sensitivity
217        self._binarization_window = binarization_window
218        self._position_weight = position_weight
219        self._scale = detection_scale
220        self._pathfinding_threshold = pathfinding_threshold
221        self._min_rows = min_rows
222        self._detection_threshold = detection_threshold
223        self._extrapolation_distance = extrapolation_distance
224        self._growing_resets = growing_resets
225        self._reset_fraction = reset_fraction
226
227        self._cross_kernel = self._create_cross_kernel()
228
229    def _validate_parameters(
230        self,
231        intersection_kernel_size: int,
232        line_thickness: int,
233        line_thickness_horizontal: int | None,
234        line_gap_fill: int | None,
235        search_radius: int,
236        binarization_sensitivity: float,
237        binarization_window: int,
238        position_weight: float,
239        pathfinding_threshold: float,
240        growing_resets: int,
241        reset_fraction: float,
242    ) -> None:
243        """Validate initialization parameters."""
244        if intersection_kernel_size % 2 == 0:
245            raise ValueError("intersection_kernel_size must be odd")
246        if (
247            intersection_kernel_size <= 0
248            or line_thickness <= 0
249            or search_radius <= 0
250            or binarization_window <= 0
251        ):
252            raise ValueError("Size parameters must be positive")
253        if line_thickness_horizontal is not None and line_thickness_horizontal <= 0:
254            raise ValueError("line_thickness_horizontal must be positive")
255        if line_gap_fill is not None and line_gap_fill <= 0:
256            raise ValueError("line_gap_fill must be positive")
257        if not 0 <= position_weight <= 1:
258            raise ValueError("position_weight must be in [0, 1]")
259        if binarization_sensitivity <= 0:
260            raise ValueError("binarization_sensitivity must be positive")
261        if pathfinding_threshold < 0 or pathfinding_threshold > 1:
262            raise ValueError("pathfinding_threshold must be in [0, 1]")
263        if reset_fraction < 0 or reset_fraction > 1:
264            raise ValueError("reset_fraction must be in [0, 1]")
265        if growing_resets < 0:
266            raise ValueError("growing_resets must be zero or positive")
267
268    def _create_gaussian_weights(self, region_size: int) -> NDArray:
269        """
270        Create a square 2D Gaussian weight mask used to bias `find_nearest`
271        toward points close to the search center.
272
273        Args:
274            region_size (int): Side length of the square mask.
275
276        Returns:
277            NDArray: ``(region_size, region_size)`` float32 weight mask, peak 1.0
278            at the center, falling off to ``1 - position_weight`` at the edge.
279        """
280        if self._position_weight == 0:
281            return np.ones((region_size, region_size), dtype=np.float32)
282
283        y = np.linspace(-1, 1, region_size)
284        x = np.linspace(-1, 1, region_size)
285        xv, yv = np.meshgrid(x, y)
286        dist_squared = xv**2 + yv**2
287
288        # Prevent log(0) when position_weight is 1
289        if self._position_weight >= 0.999:
290            sigma = 0.1  # Small sigma for very sharp peak
291        else:
292            sigma = np.sqrt(-1 / (2 * np.log(1 - self._position_weight)))
293
294        weights = np.exp(-dist_squared / (2 * sigma**2))
295
296        return weights.astype(np.float32)
297
298    def _create_cross_kernel(self) -> NDArray:
299        kernel = np.zeros(
300            (self._intersection_kernel_size, self._intersection_kernel_size),
301            dtype=np.uint8,
302        )
303        center = self._intersection_kernel_size // 2
304
305        # Create horizontal bar
306        h_start = max(0, center - self._line_thickness_horizontal // 2)
307        h_end = min(
308            self._intersection_kernel_size,
309            center + (self._line_thickness_horizontal + 1) // 2,
310        )
311        kernel[h_start:h_end, :] = 255
312
313        # Create vertical bar
314        v_start = max(0, center - self._line_thickness // 2)
315        v_end = min(
316            self._intersection_kernel_size, center + (self._line_thickness + 1) // 2
317        )
318        kernel[:, v_start:v_end] = 255
319
320        return kernel
321
322    def _apply_morphology(self, binary: MatLike) -> MatLike:
323        # Define a horizontal kernel (adjust width as needed)
324        kernel_hor = cv.getStructuringElement(cv.MORPH_RECT, (self._line_gap_fill, 1))
325        kernel_ver = cv.getStructuringElement(cv.MORPH_RECT, (1, self._line_gap_fill))
326
327        # Apply dilation
328        dilated = cv.dilate(binary, kernel_hor, iterations=1)
329        dilated = cv.dilate(dilated, kernel_ver, iterations=1)
330
331        return dilated
332
333    def _apply_cross_matching(self, img: MatLike) -> MatLike:
334        """Apply cross kernel template matching."""
335        pad_y = self._cross_kernel.shape[0] // 2
336        pad_x = self._cross_kernel.shape[1] // 2
337
338        padded = cv.copyMakeBorder(
339            img, pad_y, pad_y, pad_x, pad_x, borderType=cv.BORDER_CONSTANT, value=0
340        )
341
342        filtered = cv.matchTemplate(padded, self._cross_kernel, cv.TM_SQDIFF_NORMED)
343        # Invert and normalize to 0-255 range
344        filtered = cv.normalize(1.0 - filtered, None, 0, 255, cv.NORM_MINMAX)
345        return filtered.astype(np.uint8)
346
347    def apply(
348        self, img: MatLike, visual: bool = False, visual_notebook: bool = False
349    ) -> MatLike:
350        """
351        Apply the grid detection filter to the input image.
352
353        Args:
354            img (MatLike): the input image
355            visual (bool): whether to show intermediate steps via OpenCV windows
356            visual_notebook (bool): whether to show intermediate steps inline in a Jupyter notebook
357
358        Returns:
359            MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
360        """
361
362        if img is None or img.size == 0:
363            raise ValueError("Input image is empty or None")
364
365        binary = imu.sauvola(
366            img, k=self._binarization_sensitivity, window_size=self._binarization_window
367        )
368
369        if visual:
370            imu.show(binary, title="thresholded")
371        if visual_notebook:
372            imu.show_notebook(binary, title="thresholded")
373
374        binary = self._apply_morphology(binary)
375
376        if visual:
377            imu.show(binary, title="dilated")
378        if visual_notebook:
379            imu.show_notebook(binary, title="dilated")
380
381        filtered = self._apply_cross_matching(binary)
382
383        return filtered
384
385    @log_calls(level=logging.DEBUG, include_return=True)
386    def find_nearest(
387        self, filtered: MatLike, point: Point, region: int | None = None
388    ) -> tuple[Point, float]:
389        """
390        Find the nearest 'corner match' in the image, along with its score [0,1]
391
392        Args:
393            filtered (MatLike): the filtered image (obtained through `apply`)
394            point (tuple[int, int]): the approximate target point (x, y)
395            region (None | int): alternative value for search region,
396                overwriting the `__init__` parameter `search_radius`
397
398        Returns:
399            tuple[Point, float]: the best-matching pixel ``(x, y)`` and its
400            confidence in ``[0, 1]``. If the search window falls outside the
401            image, the input ``point`` is returned with confidence ``0.0``.
402        """
403
404        if filtered is None or filtered.size == 0:
405            raise ValueError("Filtered image is empty or None")
406
407        region_size = region if region is not None else self._search_radius
408        x, y = point
409
410        # Calculate crop boundaries
411        crop_x = max(0, x - region_size // 2)
412        crop_y = max(0, y - region_size // 2)
413        crop_width = min(region_size, filtered.shape[1] - crop_x)
414        crop_height = min(region_size, filtered.shape[0] - crop_y)
415
416        # Handle edge cases
417        if crop_width <= 0 or crop_height <= 0:
418            logger.warning(f"Point {point} is outside image bounds")
419            return point, 0.0
420
421        cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width]
422
423        if cropped.size == 0:
424            return point, 0.0
425
426        # Always apply Gaussian weighting by extending crop if needed
427        if cropped.shape[0] == region_size and cropped.shape[1] == region_size:
428            # Perfect size - apply weights directly
429            weights = self._create_gaussian_weights(region_size)
430            weighted = cropped.astype(np.float32) * weights
431        else:
432            # Extend crop to match region_size, apply weights, then restore
433            extended = np.zeros((region_size, region_size), dtype=cropped.dtype)
434
435            # Calculate offset to center the cropped region in extended array
436            offset_y = (region_size - cropped.shape[0]) // 2
437            offset_x = (region_size - cropped.shape[1]) // 2
438
439            # Place cropped region in center of extended array
440            extended[
441                offset_y : offset_y + cropped.shape[0],
442                offset_x : offset_x + cropped.shape[1],
443            ] = cropped
444
445            # Apply Gaussian weights to extended array
446            weights = self._create_gaussian_weights(region_size)
447            weighted_extended = extended.astype(np.float32) * weights
448
449            # Extract the original region back out
450            weighted = weighted_extended[
451                offset_y : offset_y + cropped.shape[0],
452                offset_x : offset_x + cropped.shape[1],
453            ]
454
455        best_idx = np.argmax(weighted)
456        best_y, best_x = np.unravel_index(best_idx, cropped.shape)
457
458        result_point = (
459            int(crop_x + best_x),
460            int(crop_y + best_y),
461        )
462        result_confidence = float(weighted[best_y, best_x]) / 255.0
463
464        return result_point, result_confidence
465
466    def detect_row_heights(
467        self,
468        img: MatLike,
469        filtered: MatLike,
470        top_row: list[Point | None],
471        min_row_height: int,
472        max_row_height: int,
473        path_scale: float = 0.25,
474        prominence: float = 18.0,
475        cluster_tolerance: int | None = None,
476        min_columns_for_rule: float = 0.4,
477        straight_cost: int = 10,
478        perpendicular_cost: int = 30,
479        darkness_divisor: int = 100,
480    ) -> list[int]:
481        """
482        Detect variable row heights from the cross-correlation map by following
483        each vertical rule downward via A* and finding peaks of cross-correlation
484        along that path.
485
486        Args:
487            img: Original (full-resolution) table image.
488            filtered: Cross-correlation map produced by `apply()` (full resolution).
489            top_row: Top points of vertical rules in image space. ``None`` entries
490                (where header alignment failed for that rule) are skipped.
491            min_row_height: Minimum allowed row height in pixels.
492            max_row_height: Maximum allowed row height in pixels.
493            path_scale: Downscale factor used when running A* (purely for speed).
494                The detected path is rescaled back to full resolution for sampling.
495            prominence: Minimum peak value [0, 255] in the cross-correlation profile.
496            cluster_tolerance: Cross-column matching tolerance in pixels.
497                Defaults to ``min_row_height // 2``.
498            min_columns_for_rule: Fraction of columns that must agree on a peak.
499            straight_cost: A* cost per straight (down/up) step.
500            perpendicular_cost: A* cost per lateral step. Higher = stronger
501                straight-line bias.
502            darkness_divisor: A* image cost is ``pixel / darkness_divisor``.
503                Higher = lighter line bias.
504
505        Returns:
506            List of per-row heights (consecutive differences of detected offsets).
507            Empty if detection failed.
508        """
509        valid_points = [(float(p[0]), float(p[1])) for p in top_row if p is not None]
510        if not valid_points:
511            return []
512
513        gray = ensure_gray(img)
514        if path_scale != 1.0:
515            scaled_gray = cv.resize(gray, None, fx=path_scale, fy=path_scale)
516        else:
517            scaled_gray = gray
518
519        tol = (
520            cluster_tolerance
521            if cluster_tolerance is not None
522            else max(1, min_row_height // 2)
523        )
524
525        offsets = rust_detect_row_offsets(
526            filtered,
527            scaled_gray,
528            valid_points,
529            float(path_scale),
530            int(min_row_height),
531            int(max_row_height),
532            float(prominence),
533            int(tol),
534            float(min_columns_for_rule),
535            int(straight_cost),
536            int(perpendicular_cost),
537            int(darkness_divisor),
538        )
539
540        if not offsets:
541            return []
542
543        heights: list[int] = [offsets[0]]
544        for i in range(1, len(offsets)):
545            heights.append(offsets[i] - offsets[i - 1])
546        return heights
547
548    def find_table_points(
549        self,
550        img: MatLike | PathLike[str],
551        top_row: list[Point | None],
552        cell_widths: list[int],
553        cell_heights: list[int] | int,
554        visual: bool = False,
555        visual_notebook: bool = False,
556        window: str = WINDOW,
557        goals_width: int | None = None,
558        filtered: MatLike | PathLike[str] | None = None,
559        smooth: bool = False,
560        smooth_strength: float = 0.5,
561        smooth_iterations: int = 1,
562        smooth_degree: int = 1,
563    ) -> "SegmentedTable":
564        """
565        Parse the image to a `SegmentedTable` structure that holds all of the
566        intersections between horizontal and vertical rules, starting near the `left_top` point
567
568        Args:
569            img (MatLike): the input image of a table
570            top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
571            cell_widths (list[int]): the expected widths of the cells (based on a header template)
572            cell_heights (list[int]): the expected height of the rows of data.
573                The last value from this list is used until the image has no more vertical space.
574            visual (bool): whether to show intermediate steps
575            window (str): the name of the OpenCV window to use for visualization
576            goals_width (int | None): the width of the goal region when searching for the next point.
577                If None, defaults to 1.5 * search_radius
578            filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of
579                calculating the filtered image from scratch
580            smooth (bool): if True, smooth the grid after detection
581            smooth_strength (float): blend factor per smoothing iteration (0.0-1.0). Default: 0.5
582            smooth_iterations (int): number of smoothing passes. Default: 3
583            smooth_degree (int): polynomial degree for smoothing regression (1 or 2). Default: 1
584
585        Returns:
586            a SegmentedTable object
587        """
588
589        if goals_width is None:
590            goals_width = self._search_radius * 3 // 2
591
592        if not cell_widths:
593            raise ValueError("cell_widths must contain at least one value")
594
595        if not isinstance(img, np.ndarray):
596            tmp_img = cv.imread(os.fspath(cast(PathLike[str], img)))
597            assert tmp_img is not None
598            img = tmp_img
599
600        img = cast(MatLike, img)
601
602        if filtered is None:
603            filtered = self.apply(img, visual, visual_notebook)
604        else:
605            if not isinstance(filtered, np.ndarray):
606                filtered = cv.imread(os.fspath(filtered))
607
608            filtered = ensure_gray(filtered)
609
610        if visual:
611            imu.show(filtered, window=window)
612        if visual_notebook:
613            imu.show_notebook(filtered, title="filtered")
614
615        if isinstance(cell_heights, int):
616            cell_heights = [cell_heights]
617
618        for i in range(len(top_row)):
619            if top_row[i] is None:
620                continue
621
622            point = top_row[i]
623            assert point is not None
624            adjusted, confidence = self.find_nearest(
625                filtered, point, int(self._search_radius * 2)
626            )
627
628            if confidence < 0.15:
629                top_row[i] = None
630            else:
631                top_row[i] = adjusted
632
633        if not any(top_row):
634            logger.error("No good starting candidates given")
635
636        # resize all parameters according to scale
637        img = cv.resize(img, None, fx=self._scale, fy=self._scale)
638
639        if visual:
640            imu.push(img)
641
642        filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale)
643        cell_widths = [int(w * self._scale) for w in cell_widths]
644        cell_heights = [int(h * self._scale) for h in cell_heights]
645        top_row = [
646            (int(p[0] * self._scale), int(p[1] * self._scale))
647            if p is not None
648            else None
649            for p in top_row
650        ]
651        search_radius = int(self._search_radius * self._scale)
652
653        img_gray = ensure_gray(img)
654        filtered_gray = ensure_gray(filtered)
655
656        table_grower = TableGrower(
657            filtered_gray,
658            cell_widths,
659            cell_heights,
660            top_row,
661            search_radius,
662            self._position_weight,
663            self._extrapolation_distance,
664            self._detection_threshold,
665            self._pathfinding_threshold,
666            self._min_rows,
667            self._growing_resets,
668            self._reset_fraction,
669        )
670
671        def show_grower_progress(wait: bool = False):
672            img_orig = np.copy(img)
673            corners = table_grower.get_all_corners()
674            for y in range(len(corners)):
675                for x in range(len(corners[y])):
676                    if corners[y][x] is not None:
677                        img_orig = imu.draw_points(
678                            img_orig,
679                            [corners[y][x]],  # type:ignore
680                            color=(0, 0, 255),
681                            thickness=30,
682                        )
683
684            edge = table_grower.get_edge_points()
685
686            for point, score in edge:
687                color = (100, int(clamp(score * 255, 0, 255)), 100)
688                imu.draw_point(img_orig, point, color=color, thickness=20)
689
690            imu.show(img_orig, wait=wait)
691
692        if visual:
693            threshold = self._detection_threshold
694
695            # python implementation of rust loops, for visualization purposes
696            # note this is a LOT slower
697            while table_grower.grow_point(img_gray, filtered_gray) is not None:
698                show_grower_progress()
699
700            show_grower_progress(True)
701
702            original_threshold = threshold
703
704            loops_without_change = 0
705
706            while not table_grower.is_table_complete():
707                loops_without_change += 1
708
709                if loops_without_change > 50:
710                    break
711
712                if table_grower.extrapolate_one(img_gray, filtered_gray) is not None:
713                    show_grower_progress()
714
715                    loops_without_change = 0
716
717                    grown = False
718                    while table_grower.grow_point(img_gray, filtered_gray) is not None:
719                        show_grower_progress()
720                        grown = True
721                        threshold = min(0.1 + 0.9 * threshold, original_threshold)
722                        table_grower.set_threshold(threshold)
723
724                    if not grown:
725                        threshold *= 0.9
726                        table_grower.set_threshold(threshold)
727
728                else:
729                    threshold *= 0.9
730                    table_grower.set_threshold(threshold)
731
732                    if table_grower.grow_point(img_gray, filtered_gray) is not None:
733                        show_grower_progress()
734                        loops_without_change = 0
735
736        else:
737            table_grower.grow_table(img_gray, filtered_gray)
738
739        if smooth:
740            table_grower.smooth_grid(smooth_strength, smooth_iterations, smooth_degree)
741        corners = table_grower.get_all_corners()
742        logger.info(
743            f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns"
744        )
745        # rescale corners back to original size
746        if self._scale != 1.0:
747            for y in range(len(corners)):
748                for x in range(len(corners[y])):
749                    if corners[y][x] is not None:
750                        corners[y][x] = (
751                            int(corners[y][x][0] / self._scale),  # type:ignore
752                            int(corners[y][x][1] / self._scale),  # type:ignore
753                        )
754
755        return SegmentedTable(corners)  # type: ignore
756
757    def _visualize_grid(self, img: MatLike, points: list[list[Point]]) -> None:
758        """Visualize the detected grid points."""
759        all_points = [point for row in points for point in row]
760        drawn = imu.draw_points(img, all_points)
761        imu.show(drawn, wait=True)
762
763    def _visualize_path_finding(
764        self,
765        path: list[Point],
766        current: Point,
767        next_point: Point,
768        previous_row_target: Point | None = None,
769        region_center: Point | None = None,
770        region_size: int | None = None,
771    ) -> None:
772        """Visualize the path finding process for debugging."""
773        global show_time
774
775        screen = imu.pop()
776
777        # if gray, convert to BGR
778        if len(screen.shape) == 2 or screen.shape[2] == 1:
779            debug_img = cv.cvtColor(screen, cv.COLOR_GRAY2BGR)
780
781        debug_img = imu.draw_points(debug_img, path, color=(200, 200, 0), thickness=2)
782        debug_img = imu.draw_points(
783            debug_img, [current], color=(0, 255, 0), thickness=3
784        )
785        debug_img = imu.draw_points(
786            debug_img, [next_point], color=(0, 0, 255), thickness=2
787        )
788
789        # Draw previous row target if available
790        if previous_row_target is not None:
791            debug_img = imu.draw_points(
792                debug_img, [previous_row_target], color=(255, 0, 255), thickness=2
793            )
794
795        # Draw search region if available
796        if region_center is not None and region_size is not None:
797            top_left = (
798                max(0, region_center[0] - region_size // 2),
799                max(0, region_center[1] - region_size // 2),
800            )
801            bottom_right = (
802                min(debug_img.shape[1], region_center[0] + region_size // 2),
803                min(debug_img.shape[0], region_center[1] + region_size // 2),
804            )
805            cv.rectangle(
806                debug_img,
807                top_left,
808                bottom_right,
809                color=(255, 0, 0),
810                thickness=2,
811                lineType=cv.LINE_AA,
812            )
813
814        imu.push(debug_img)
815
816        show_time += 1
817        if show_time % 10 != 1:
818            return
819
820        imu.show(debug_img, title="Next column point", wait=False)
821        # time.sleep(0.003)
822
823    @log_calls(level=logging.DEBUG, include_return=True)
824    def _astar(
825        self,
826        img: np.ndarray,
827        start: tuple[int, int],
828        goals: list[tuple[int, int]],
829        direction: str,
830    ) -> list[Point] | None:
831        """
832        Find the best path between the start point and one of the goal points on the image.
833
834        Args:
835            img: Grayscale image to follow rules through.
836            start: Starting pixel ``(x, y)``.
837            goals: Candidate end pixels.
838            direction: Either ``"horizontal"`` or ``"vertical"``.
839
840        Returns:
841            list[Point] | None: Path from start to the closest reachable goal,
842            or ``None`` if no path exists.
843        """
844
845        if not goals:
846            return None
847
848        if self._scale != 1.0:
849            img = cv.resize(img, None, fx=self._scale, fy=self._scale)
850            start = (int(start[0] * self._scale), int(start[1] * self._scale))
851            goals = [(int(g[0] * self._scale), int(g[1] * self._scale)) for g in goals]
852
853        # calculate bounding box with margin
854        all_points = [*goals, start]
855        xs = [p[0] for p in all_points]
856        ys = [p[1] for p in all_points]
857
858        margin = 30
859        top_left = (max(0, min(xs) - margin), max(0, min(ys) - margin))
860        bottom_right = (
861            min(img.shape[1], max(xs) + margin),
862            min(img.shape[0], max(ys) + margin),
863        )
864
865        # check bounds
866        if (
867            top_left[0] >= bottom_right[0]
868            or top_left[1] >= bottom_right[1]
869            or top_left[0] >= img.shape[1]
870            or top_left[1] >= img.shape[0]
871        ):
872            return None
873
874        # transform coordinates to cropped image
875        start_local = (start[0] - top_left[0], start[1] - top_left[1])
876        goals_local = [(g[0] - top_left[0], g[1] - top_left[1]) for g in goals]
877
878        cropped = img[top_left[1] : bottom_right[1], top_left[0] : bottom_right[0]]
879
880        if cropped.size == 0:
881            return None
882
883        path = rust_astar(cropped, start_local, goals_local, direction)
884
885        if path is None:
886            return None
887
888        if self._scale != 1.0:
889            path = [(int(p[0] / self._scale), int(p[1] / self._scale)) for p in path]
890            top_left = (int(top_left[0] / self._scale), int(top_left[1] / self._scale))
891
892        return [(p[0] + top_left[0], p[1] + top_left[1]) for p in path]

Detects table grid intersections using morphological filtering and template matching.

This detector implements a multi-stage pipeline:

Binarization: Sauvola adaptive thresholding to handle varying lighting
Morphological operations: Dilation to connect broken rule segments
Cross-kernel matching: Template matching with a cross-shaped kernel to find rule intersections where horizontal and vertical lines meet
Grid growing: Iterative point detection starting from a known seed point

The cross-kernel is designed to match the specific geometry of your table rules. It should be sized so that after morphology, it aligns with actual corner shapes.

Tuning Guidelines

intersection_kernel_size: Increase if you need more selectivity (fewer false positives)
line_thickness/line_thickness_horizontal: Should match rule thickness after morphology
line_gap_fill: Increase to connect more broken lines, but this thickens rules
binarization_sensitivity: Increase to threshold more aggressively (remove noise)
search_radius: Increase for documents with more warping/distortion
position_weight: Increase to prefer corners closer to expected positions

Visual Debugging

Set visual=True in methods to see intermediate results and tune parameters.

TableDetector( intersection_kernel_size: int = 21, line_thickness: int = 6, line_thickness_horizontal: int | None = None, line_gap_fill: int | None = None, binarization_sensitivity: float = 0.04, binarization_window: int = 15, detection_scale: float = 1.0, search_radius: int = 40, position_weight: float = 0.4, pathfinding_threshold: float = 0.2, min_rows: int = 5, detection_threshold: float = 0.3, extrapolation_distance: int = 4, growing_resets: int = 3, reset_fraction: float = 0.5) View Source

150    def __init__(
151        self,
152        intersection_kernel_size: int = 21,
153        line_thickness: int = 6,
154        line_thickness_horizontal: int | None = None,
155        line_gap_fill: int | None = None,
156        binarization_sensitivity: float = 0.04,
157        binarization_window: int = 15,
158        detection_scale: float = 1.0,
159        search_radius: int = 40,
160        position_weight: float = 0.4,
161        pathfinding_threshold: float = 0.2,
162        min_rows: int = 5,
163        detection_threshold: float = 0.3,
164        extrapolation_distance: int = 4,
165        growing_resets: int = 3,
166        reset_fraction: float = 0.5,
167    ):
168        """
169        Args:
170            intersection_kernel_size (int): the size of the cross kernel
171                a larger kernel size often means that more penalty is applied, often leading
172                to more sparse results
173            line_thickness (int): the width of one of the edges in the cross filter, should be
174                roughly equal to the width of the rules in the image after morphology is applied
175            line_thickness_horizontal (int | None): useful if the horizontal rules and vertical rules
176                have different sizes
177            line_gap_fill (int | None): the size of the morphology operators that are applied before
178                the cross kernel. 'bridges the gaps' of broken-up lines
179            binarization_sensitivity (float): threshold parameter for sauvola thresholding
180            binarization_window (int): window_size parameter for sauvola thresholding
181            detection_scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
182            search_radius (int): area in which to search for a new max value in `find_nearest` etc.
183            position_weight (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
184            pathfinding_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skip astar pathfinding
185            min_rows (int): minimum number of rows to find before stopping the table finding algorithm
186            detection_threshold (float): the threshold for accepting a new point when growing the table
187            extrapolation_distance (int): how many points away to look when calculating the median slope
188            growing_resets (int): The amount of cuts (large deletions) to do in the grid during table growing
189            reset_fraction (float): The portion of the already-chosen corner points to delete during cutting
190        """
191        self._validate_parameters(
192            intersection_kernel_size,
193            line_thickness,
194            line_thickness_horizontal,
195            line_gap_fill,
196            search_radius,
197            binarization_sensitivity,
198            binarization_window,
199            position_weight,
200            pathfinding_threshold,
201            growing_resets,
202            reset_fraction,
203        )
204
205        self._intersection_kernel_size = intersection_kernel_size
206        self._line_thickness = line_thickness
207        self._line_thickness_horizontal = (
208            line_thickness
209            if line_thickness_horizontal is None
210            else line_thickness_horizontal
211        )
212        self._line_gap_fill = (
213            line_gap_fill if line_gap_fill is not None else line_thickness
214        )
215        self._search_radius = search_radius
216        self._binarization_sensitivity = binarization_sensitivity
217        self._binarization_window = binarization_window
218        self._position_weight = position_weight
219        self._scale = detection_scale
220        self._pathfinding_threshold = pathfinding_threshold
221        self._min_rows = min_rows
222        self._detection_threshold = detection_threshold
223        self._extrapolation_distance = extrapolation_distance
224        self._growing_resets = growing_resets
225        self._reset_fraction = reset_fraction
226
227        self._cross_kernel = self._create_cross_kernel()

Arguments:

intersection_kernel_size (int): the size of the cross kernel a larger kernel size often means that more penalty is applied, often leading to more sparse results
line_thickness (int): the width of one of the edges in the cross filter, should be roughly equal to the width of the rules in the image after morphology is applied
line_thickness_horizontal (int | None): useful if the horizontal rules and vertical rules have different sizes
line_gap_fill (int | None): the size of the morphology operators that are applied before the cross kernel. 'bridges the gaps' of broken-up lines
binarization_sensitivity (float): threshold parameter for sauvola thresholding
binarization_window (int): window_size parameter for sauvola thresholding
detection_scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
search_radius (int): area in which to search for a new max value in find_nearest etc.
position_weight (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
pathfinding_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skip astar pathfinding
min_rows (int): minimum number of rows to find before stopping the table finding algorithm
detection_threshold (float): the threshold for accepting a new point when growing the table
extrapolation_distance (int): how many points away to look when calculating the median slope
growing_resets (int): The amount of cuts (large deletions) to do in the grid during table growing
reset_fraction (float): The portion of the already-chosen corner points to delete during cutting

def apply( self, img: Union[cv2.Mat, numpy.ndarray], visual: bool = False, visual_notebook: bool = False) -> Union[cv2.Mat, numpy.ndarray]: View Source

347    def apply(
348        self, img: MatLike, visual: bool = False, visual_notebook: bool = False
349    ) -> MatLike:
350        """
351        Apply the grid detection filter to the input image.
352
353        Args:
354            img (MatLike): the input image
355            visual (bool): whether to show intermediate steps via OpenCV windows
356            visual_notebook (bool): whether to show intermediate steps inline in a Jupyter notebook
357
358        Returns:
359            MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
360        """
361
362        if img is None or img.size == 0:
363            raise ValueError("Input image is empty or None")
364
365        binary = imu.sauvola(
366            img, k=self._binarization_sensitivity, window_size=self._binarization_window
367        )
368
369        if visual:
370            imu.show(binary, title="thresholded")
371        if visual_notebook:
372            imu.show_notebook(binary, title="thresholded")
373
374        binary = self._apply_morphology(binary)
375
376        if visual:
377            imu.show(binary, title="dilated")
378        if visual_notebook:
379            imu.show_notebook(binary, title="dilated")
380
381        filtered = self._apply_cross_matching(binary)
382
383        return filtered

Apply the grid detection filter to the input image.

Arguments:

img (MatLike): the input image
visual (bool): whether to show intermediate steps via OpenCV windows
visual_notebook (bool): whether to show intermediate steps inline in a Jupyter notebook

Returns:

MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules

@log_calls(level=logging.DEBUG, include_return=True)

def find_nearest( self, filtered: Union[cv2.Mat, numpy.ndarray], point: tuple[int, int], region: int | None = None) -> tuple[tuple[int, int], float]: View Source

385    @log_calls(level=logging.DEBUG, include_return=True)
386    def find_nearest(
387        self, filtered: MatLike, point: Point, region: int | None = None
388    ) -> tuple[Point, float]:
389        """
390        Find the nearest 'corner match' in the image, along with its score [0,1]
391
392        Args:
393            filtered (MatLike): the filtered image (obtained through `apply`)
394            point (tuple[int, int]): the approximate target point (x, y)
395            region (None | int): alternative value for search region,
396                overwriting the `__init__` parameter `search_radius`
397
398        Returns:
399            tuple[Point, float]: the best-matching pixel ``(x, y)`` and its
400            confidence in ``[0, 1]``. If the search window falls outside the
401            image, the input ``point`` is returned with confidence ``0.0``.
402        """
403
404        if filtered is None or filtered.size == 0:
405            raise ValueError("Filtered image is empty or None")
406
407        region_size = region if region is not None else self._search_radius
408        x, y = point
409
410        # Calculate crop boundaries
411        crop_x = max(0, x - region_size // 2)
412        crop_y = max(0, y - region_size // 2)
413        crop_width = min(region_size, filtered.shape[1] - crop_x)
414        crop_height = min(region_size, filtered.shape[0] - crop_y)
415
416        # Handle edge cases
417        if crop_width <= 0 or crop_height <= 0:
418            logger.warning(f"Point {point} is outside image bounds")
419            return point, 0.0
420
421        cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width]
422
423        if cropped.size == 0:
424            return point, 0.0
425
426        # Always apply Gaussian weighting by extending crop if needed
427        if cropped.shape[0] == region_size and cropped.shape[1] == region_size:
428            # Perfect size - apply weights directly
429            weights = self._create_gaussian_weights(region_size)
430            weighted = cropped.astype(np.float32) * weights
431        else:
432            # Extend crop to match region_size, apply weights, then restore
433            extended = np.zeros((region_size, region_size), dtype=cropped.dtype)
434
435            # Calculate offset to center the cropped region in extended array
436            offset_y = (region_size - cropped.shape[0]) // 2
437            offset_x = (region_size - cropped.shape[1]) // 2
438
439            # Place cropped region in center of extended array
440            extended[
441                offset_y : offset_y + cropped.shape[0],
442                offset_x : offset_x + cropped.shape[1],
443            ] = cropped
444
445            # Apply Gaussian weights to extended array
446            weights = self._create_gaussian_weights(region_size)
447            weighted_extended = extended.astype(np.float32) * weights
448
449            # Extract the original region back out
450            weighted = weighted_extended[
451                offset_y : offset_y + cropped.shape[0],
452                offset_x : offset_x + cropped.shape[1],
453            ]
454
455        best_idx = np.argmax(weighted)
456        best_y, best_x = np.unravel_index(best_idx, cropped.shape)
457
458        result_point = (
459            int(crop_x + best_x),
460            int(crop_y + best_y),
461        )
462        result_confidence = float(weighted[best_y, best_x]) / 255.0
463
464        return result_point, result_confidence

Find the nearest 'corner match' in the image, along with its score [0,1]

Arguments:

filtered (MatLike): the filtered image (obtained through apply)
point (tuple[int, int]): the approximate target point (x, y)
region (None | int): alternative value for search region, overwriting the __init__ parameter search_radius

Returns:

tuple[Point, float]: the best-matching pixel (x, y) and its confidence in [0, 1]. If the search window falls outside the image, the input point is returned with confidence 0.0.

def detect_row_heights( self, img: Union[cv2.Mat, numpy.ndarray], filtered: Union[cv2.Mat, numpy.ndarray], top_row: list[tuple[int, int] | None], min_row_height: int, max_row_height: int, path_scale: float = 0.25, prominence: float = 18.0, cluster_tolerance: int | None = None, min_columns_for_rule: float = 0.4, straight_cost: int = 10, perpendicular_cost: int = 30, darkness_divisor: int = 100) -> list[int]: View Source

466    def detect_row_heights(
467        self,
468        img: MatLike,
469        filtered: MatLike,
470        top_row: list[Point | None],
471        min_row_height: int,
472        max_row_height: int,
473        path_scale: float = 0.25,
474        prominence: float = 18.0,
475        cluster_tolerance: int | None = None,
476        min_columns_for_rule: float = 0.4,
477        straight_cost: int = 10,
478        perpendicular_cost: int = 30,
479        darkness_divisor: int = 100,
480    ) -> list[int]:
481        """
482        Detect variable row heights from the cross-correlation map by following
483        each vertical rule downward via A* and finding peaks of cross-correlation
484        along that path.
485
486        Args:
487            img: Original (full-resolution) table image.
488            filtered: Cross-correlation map produced by `apply()` (full resolution).
489            top_row: Top points of vertical rules in image space. ``None`` entries
490                (where header alignment failed for that rule) are skipped.
491            min_row_height: Minimum allowed row height in pixels.
492            max_row_height: Maximum allowed row height in pixels.
493            path_scale: Downscale factor used when running A* (purely for speed).
494                The detected path is rescaled back to full resolution for sampling.
495            prominence: Minimum peak value [0, 255] in the cross-correlation profile.
496            cluster_tolerance: Cross-column matching tolerance in pixels.
497                Defaults to ``min_row_height // 2``.
498            min_columns_for_rule: Fraction of columns that must agree on a peak.
499            straight_cost: A* cost per straight (down/up) step.
500            perpendicular_cost: A* cost per lateral step. Higher = stronger
501                straight-line bias.
502            darkness_divisor: A* image cost is ``pixel / darkness_divisor``.
503                Higher = lighter line bias.
504
505        Returns:
506            List of per-row heights (consecutive differences of detected offsets).
507            Empty if detection failed.
508        """
509        valid_points = [(float(p[0]), float(p[1])) for p in top_row if p is not None]
510        if not valid_points:
511            return []
512
513        gray = ensure_gray(img)
514        if path_scale != 1.0:
515            scaled_gray = cv.resize(gray, None, fx=path_scale, fy=path_scale)
516        else:
517            scaled_gray = gray
518
519        tol = (
520            cluster_tolerance
521            if cluster_tolerance is not None
522            else max(1, min_row_height // 2)
523        )
524
525        offsets = rust_detect_row_offsets(
526            filtered,
527            scaled_gray,
528            valid_points,
529            float(path_scale),
530            int(min_row_height),
531            int(max_row_height),
532            float(prominence),
533            int(tol),
534            float(min_columns_for_rule),
535            int(straight_cost),
536            int(perpendicular_cost),
537            int(darkness_divisor),
538        )
539
540        if not offsets:
541            return []
542
543        heights: list[int] = [offsets[0]]
544        for i in range(1, len(offsets)):
545            heights.append(offsets[i] - offsets[i - 1])
546        return heights

Detect variable row heights from the cross-correlation map by following each vertical rule downward via A* and finding peaks of cross-correlation along that path.

Arguments:

img: Original (full-resolution) table image.
filtered: Cross-correlation map produced by apply() (full resolution).
top_row: Top points of vertical rules in image space. None entries (where header alignment failed for that rule) are skipped.
min_row_height: Minimum allowed row height in pixels.
max_row_height: Maximum allowed row height in pixels.
path_scale: Downscale factor used when running A* (purely for speed). The detected path is rescaled back to full resolution for sampling.
prominence: Minimum peak value [0, 255] in the cross-correlation profile.
cluster_tolerance: Cross-column matching tolerance in pixels. Defaults to min_row_height // 2.
min_columns_for_rule: Fraction of columns that must agree on a peak.
straight_cost: A* cost per straight (down/up) step.
perpendicular_cost: A* cost per lateral step. Higher = stronger straight-line bias.
darkness_divisor: A* image cost is pixel / darkness_divisor. Higher = lighter line bias.

Returns:

List of per-row heights (consecutive differences of detected offsets). Empty if detection failed.

def find_table_points( self, img: Union[cv2.Mat, numpy.ndarray, os.PathLike[str]], top_row: list[tuple[int, int] | None], cell_widths: list[int], cell_heights: list[int] | int, visual: bool = False, visual_notebook: bool = False, window: str = 'taulu', goals_width: int | None = None, filtered: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], NoneType] = None, smooth: bool = False, smooth_strength: float = 0.5, smooth_iterations: int = 1, smooth_degree: int = 1) -> SegmentedTable: View Source

548    def find_table_points(
549        self,
550        img: MatLike | PathLike[str],
551        top_row: list[Point | None],
552        cell_widths: list[int],
553        cell_heights: list[int] | int,
554        visual: bool = False,
555        visual_notebook: bool = False,
556        window: str = WINDOW,
557        goals_width: int | None = None,
558        filtered: MatLike | PathLike[str] | None = None,
559        smooth: bool = False,
560        smooth_strength: float = 0.5,
561        smooth_iterations: int = 1,
562        smooth_degree: int = 1,
563    ) -> "SegmentedTable":
564        """
565        Parse the image to a `SegmentedTable` structure that holds all of the
566        intersections between horizontal and vertical rules, starting near the `left_top` point
567
568        Args:
569            img (MatLike): the input image of a table
570            top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
571            cell_widths (list[int]): the expected widths of the cells (based on a header template)
572            cell_heights (list[int]): the expected height of the rows of data.
573                The last value from this list is used until the image has no more vertical space.
574            visual (bool): whether to show intermediate steps
575            window (str): the name of the OpenCV window to use for visualization
576            goals_width (int | None): the width of the goal region when searching for the next point.
577                If None, defaults to 1.5 * search_radius
578            filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of
579                calculating the filtered image from scratch
580            smooth (bool): if True, smooth the grid after detection
581            smooth_strength (float): blend factor per smoothing iteration (0.0-1.0). Default: 0.5
582            smooth_iterations (int): number of smoothing passes. Default: 3
583            smooth_degree (int): polynomial degree for smoothing regression (1 or 2). Default: 1
584
585        Returns:
586            a SegmentedTable object
587        """
588
589        if goals_width is None:
590            goals_width = self._search_radius * 3 // 2
591
592        if not cell_widths:
593            raise ValueError("cell_widths must contain at least one value")
594
595        if not isinstance(img, np.ndarray):
596            tmp_img = cv.imread(os.fspath(cast(PathLike[str], img)))
597            assert tmp_img is not None
598            img = tmp_img
599
600        img = cast(MatLike, img)
601
602        if filtered is None:
603            filtered = self.apply(img, visual, visual_notebook)
604        else:
605            if not isinstance(filtered, np.ndarray):
606                filtered = cv.imread(os.fspath(filtered))
607
608            filtered = ensure_gray(filtered)
609
610        if visual:
611            imu.show(filtered, window=window)
612        if visual_notebook:
613            imu.show_notebook(filtered, title="filtered")
614
615        if isinstance(cell_heights, int):
616            cell_heights = [cell_heights]
617
618        for i in range(len(top_row)):
619            if top_row[i] is None:
620                continue
621
622            point = top_row[i]
623            assert point is not None
624            adjusted, confidence = self.find_nearest(
625                filtered, point, int(self._search_radius * 2)
626            )
627
628            if confidence < 0.15:
629                top_row[i] = None
630            else:
631                top_row[i] = adjusted
632
633        if not any(top_row):
634            logger.error("No good starting candidates given")
635
636        # resize all parameters according to scale
637        img = cv.resize(img, None, fx=self._scale, fy=self._scale)
638
639        if visual:
640            imu.push(img)
641
642        filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale)
643        cell_widths = [int(w * self._scale) for w in cell_widths]
644        cell_heights = [int(h * self._scale) for h in cell_heights]
645        top_row = [
646            (int(p[0] * self._scale), int(p[1] * self._scale))
647            if p is not None
648            else None
649            for p in top_row
650        ]
651        search_radius = int(self._search_radius * self._scale)
652
653        img_gray = ensure_gray(img)
654        filtered_gray = ensure_gray(filtered)
655
656        table_grower = TableGrower(
657            filtered_gray,
658            cell_widths,
659            cell_heights,
660            top_row,
661            search_radius,
662            self._position_weight,
663            self._extrapolation_distance,
664            self._detection_threshold,
665            self._pathfinding_threshold,
666            self._min_rows,
667            self._growing_resets,
668            self._reset_fraction,
669        )
670
671        def show_grower_progress(wait: bool = False):
672            img_orig = np.copy(img)
673            corners = table_grower.get_all_corners()
674            for y in range(len(corners)):
675                for x in range(len(corners[y])):
676                    if corners[y][x] is not None:
677                        img_orig = imu.draw_points(
678                            img_orig,
679                            [corners[y][x]],  # type:ignore
680                            color=(0, 0, 255),
681                            thickness=30,
682                        )
683
684            edge = table_grower.get_edge_points()
685
686            for point, score in edge:
687                color = (100, int(clamp(score * 255, 0, 255)), 100)
688                imu.draw_point(img_orig, point, color=color, thickness=20)
689
690            imu.show(img_orig, wait=wait)
691
692        if visual:
693            threshold = self._detection_threshold
694
695            # python implementation of rust loops, for visualization purposes
696            # note this is a LOT slower
697            while table_grower.grow_point(img_gray, filtered_gray) is not None:
698                show_grower_progress()
699
700            show_grower_progress(True)
701
702            original_threshold = threshold
703
704            loops_without_change = 0
705
706            while not table_grower.is_table_complete():
707                loops_without_change += 1
708
709                if loops_without_change > 50:
710                    break
711
712                if table_grower.extrapolate_one(img_gray, filtered_gray) is not None:
713                    show_grower_progress()
714
715                    loops_without_change = 0
716
717                    grown = False
718                    while table_grower.grow_point(img_gray, filtered_gray) is not None:
719                        show_grower_progress()
720                        grown = True
721                        threshold = min(0.1 + 0.9 * threshold, original_threshold)
722                        table_grower.set_threshold(threshold)
723
724                    if not grown:
725                        threshold *= 0.9
726                        table_grower.set_threshold(threshold)
727
728                else:
729                    threshold *= 0.9
730                    table_grower.set_threshold(threshold)
731
732                    if table_grower.grow_point(img_gray, filtered_gray) is not None:
733                        show_grower_progress()
734                        loops_without_change = 0
735
736        else:
737            table_grower.grow_table(img_gray, filtered_gray)
738
739        if smooth:
740            table_grower.smooth_grid(smooth_strength, smooth_iterations, smooth_degree)
741        corners = table_grower.get_all_corners()
742        logger.info(
743            f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns"
744        )
745        # rescale corners back to original size
746        if self._scale != 1.0:
747            for y in range(len(corners)):
748                for x in range(len(corners[y])):
749                    if corners[y][x] is not None:
750                        corners[y][x] = (
751                            int(corners[y][x][0] / self._scale),  # type:ignore
752                            int(corners[y][x][1] / self._scale),  # type:ignore
753                        )
754
755        return SegmentedTable(corners)  # type: ignore

Parse the image to a SegmentedTable structure that holds all of the intersections between horizontal and vertical rules, starting near the left_top point

Arguments:

img (MatLike): the input image of a table
top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
cell_widths (list[int]): the expected widths of the cells (based on a header template)
cell_heights (list[int]): the expected height of the rows of data. The last value from this list is used until the image has no more vertical space.
visual (bool): whether to show intermediate steps
window (str): the name of the OpenCV window to use for visualization
goals_width (int | None): the width of the goal region when searching for the next point. If None, defaults to 1.5 * search_radius
filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of calculating the filtered image from scratch
smooth (bool): if True, smooth the grid after detection
smooth_strength (float): blend factor per smoothing iteration (0.0-1.0). Default: 0.5
smooth_iterations (int): number of smoothing passes. Default: 3
smooth_degree (int): polynomial degree for smoothing regression (1 or 2). Default: 1

Returns:

a SegmentedTable object

class TableIndexer(abc.ABC): View Source

 95class TableIndexer(ABC):
 96    """
 97    Abstract base class for table cell indexing and cropping.
 98
 99    Subclasses (`SegmentedTable`, `TableTemplate`) implement the `cols`, `rows`,
100    and `cell_polygon` interface. This base provides shared methods for
101    mapping pixel coordinates to cell indices and cropping cells/regions.
102    """
103
104    def __init__(self):
105        self._col_offset = 0
106
107    @property
108    def col_offset(self) -> int:
109        """Column offset applied when reporting cell coordinates."""
110        return self._col_offset
111
112    @col_offset.setter
113    def col_offset(self, value: int):
114        assert value >= 0
115        self._col_offset = value
116
117    @property
118    @abstractmethod
119    def cols(self) -> int:
120        """Total number of cell columns."""
121        pass
122
123    @property
124    @abstractmethod
125    def rows(self) -> int:
126        """Total number of cell rows."""
127        pass
128
129    def cells(self) -> Generator[tuple[int, int]]:
130        """
131        Generate all cell indices in row-major order.
132
133        Yields (row, col) tuples for every cell in the table, iterating
134        through each row from left to right, top to bottom.
135
136        Yields:
137            tuple[int, int]: Cell indices as (row, col).
138
139        Example:
140            >>> for row, col in grid.cells():
141            ...     cell_img = grid.crop_cell(image, (row, col))
142            ...     process(cell_img)
143        """
144        for row in range(self.rows):
145            for col in range(self.cols):
146                yield (row, col)
147
148    def _check_row_idx(self, row: int):
149        if row < 0:
150            raise TauluException("row number needs to be positive or zero")
151        if row >= self.rows:
152            raise TauluException(f"row number too high: {row} >= {self.rows}")
153
154    def _check_col_idx(self, col: int):
155        if col < 0:
156            raise TauluException("col number needs to be positive or zero")
157        if col >= self.cols:
158            raise TauluException(f"col number too high: {col} >= {self.cols}")
159
160    @abstractmethod
161    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
162        """
163        Returns the coordinate (row, col) of the cell that contains the given position
164
165        Args:
166            point (tuple[float, float]): a location in the input image
167
168        Returns:
169            tuple[int, int]: the cell index (row, col) that contains the given point
170        """
171        pass
172
173    @abstractmethod
174    def cell_polygon(
175        self, cell: tuple[int, int]
176    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
177        """
178        Return the polygon (used e.g. with OpenCV) that encloses ``cell``.
179
180        Args:
181            cell: Cell indices as ``(row, col)``.
182
183        Returns:
184            Four ``(x, y)`` points in order: top-left, top-right,
185            bottom-right, bottom-left.
186        """
187        pass
188
189    def _highlight_cell(
190        self,
191        image: MatLike,
192        cell: tuple[int, int],
193        color: tuple[int, int, int] = (0, 0, 255),
194        thickness: int = 2,
195    ):
196        polygon = self.cell_polygon(cell)
197        points = np.int32(list(polygon))  # type:ignore
198        cv.polylines(image, [points], True, color, thickness, cv.LINE_AA)
199        cv.putText(
200            image,
201            str(cell),
202            (int(polygon[3][0] + 10), int(polygon[3][1] - 10)),
203            cv.FONT_HERSHEY_PLAIN,
204            2.0,
205            (255, 255, 255),
206            2,
207        )
208
209    def highlight_all_cells(
210        self,
211        image: MatLike | os.PathLike[str] | str,
212        color: tuple[int, int, int] = (0, 0, 255),
213        thickness: int = 1,
214    ) -> MatLike:
215        """
216        Return a copy of ``image`` with every cell outlined and labeled.
217
218        Args:
219            image: Source image (path or array, BGR).
220            color: BGR color of the outline.
221            thickness: Line thickness in pixels.
222
223        Returns:
224            MatLike: a new image with all cells highlighted.
225        """
226        if not isinstance(image, np.ndarray):
227            image = cv.imread(os.fspath(image))  # ty:ignore
228        img = np.copy(image)
229
230        for cell in self.cells():
231            self._highlight_cell(img, cell, color, thickness)
232
233        return img
234
235    def select_one_cell(
236        self,
237        image: MatLike,
238        window: str = WINDOW,
239        color: tuple[int, int, int] = (255, 0, 0),
240        thickness: int = 2,
241    ) -> tuple[int, int] | None:
242        """
243        Open an OpenCV window and let the user click one cell.
244
245        Args:
246            image: Source image (BGR).
247            window: OpenCV window name.
248            color: Highlight color in BGR.
249            thickness: Outline thickness in pixels.
250
251        Returns:
252            tuple[int, int] | None: ``(row, col)`` of the clicked cell, or
253            ``None`` if the window was closed without a valid click.
254        """
255        clicked = None
256
257        def click_event(event, x, y, flags, params):
258            nonlocal clicked
259
260            img = np.copy(image)
261            _ = flags
262            _ = params
263            if event == cv.EVENT_LBUTTONDOWN:
264                cell = self.cell((x, y))
265                if cell[0] >= 0:
266                    clicked = cell
267                else:
268                    return
269                self._highlight_cell(img, cell, color, thickness)
270                cv.imshow(window, img)
271
272        imu.show(image, click_event=click_event, title="select one cell", window=window)
273
274        return clicked
275
276    def show_cells(
277        self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW
278    ) -> list[tuple[int, int]] | ShowCellsSession:
279        """
280        Interactively display and highlight table cells.
281
282        In standard environments, shows an OpenCV window where clicking highlights cells.
283        In Jupyter notebooks, returns a ShowCellsSession and displays using matplotlib.
284
285        Args:
286            image: Source image (path or array).
287            window: OpenCV window name (ignored in notebooks).
288
289        Returns:
290            list[tuple[int, int]]: Clicked cell indices (non-notebook).
291            ShowCellsSession: Session object with .cells property (notebook).
292
293        Example:
294            >>> # Standard Python
295            >>> cells = grid.show_cells("table.png")
296            >>>
297            >>> # Jupyter Notebook
298            >>> session = grid.show_cells("table.png")
299            >>> # ... click cells ...
300            >>> cells = session.cells
301        """
302        if not isinstance(image, np.ndarray):
303            image = cv.imread(os.fspath(image))  # ty:ignore
304
305        def running_in_notebook() -> bool:
306            try:
307                from IPython import get_ipython
308
309                ip = get_ipython()
310                return ip is not None and "IPKernelApp" in ip.config
311            except Exception:
312                return False
313
314        use_notebook = running_in_notebook()
315
316        if use_notebook:
317            return self.show_cells_notebook(image)
318        else:
319            img = np.copy(image)
320            cells = []
321
322            def click_event(event, x, y, flags, params):
323                _ = flags
324                _ = params
325                if event == cv.EVENT_LBUTTONDOWN:
326                    cell = self.cell((x, y))
327                    if cell[0] >= 0:
328                        cells.append(cell)
329                    else:
330                        return
331                    self._highlight_cell(img, cell)
332                    cv.imshow(window, img)
333
334            imu.show(
335                img,
336                click_event=click_event,
337                title="click to highlight cells",
338                window=window,
339            )
340
341            return cells
342
343    def show_cells_notebook(
344        self, image: MatLike | os.PathLike[str] | str
345    ) -> ShowCellsSession:
346        """
347        Notebook-compatible version of show_cells using matplotlib.
348
349        Returns a ShowCellsSession immediately. Click on cells to highlight them.
350        Access clicked cells via session.cells.
351
352        Args:
353            image: Source image (path or array).
354
355        Returns:
356            ShowCellsSession: Access .cells to get list of clicked cell indices.
357
358        Example:
359            >>> session = grid.show_cells_notebook("table.png")
360            >>> # Click cells in the interactive plot
361            >>> print(session.cells)  # [(0, 0), (1, 2), ...]
362        """
363        if not isinstance(image, np.ndarray):
364            tmp_image = cv.imread(os.fspath(image))
365            assert tmp_image is not None
366            image = tmp_image
367
368        import ipywidgets as widgets
369        import matplotlib.pyplot as plt
370        from IPython.display import display
371
372        session = ShowCellsSession()
373
374        # Convert BGR to RGB for matplotlib
375        display_img = cv.cvtColor(image, cv.COLOR_BGR2RGB)
376        img_with_highlights = np.copy(display_img)
377
378        fig, ax = plt.subplots(figsize=(15, 12))
379        fig.canvas.toolbar_visible = False  # ty:ignore[unresolved-attribute]
380        fig.canvas.header_visible = False  # ty:ignore[unresolved-attribute]
381
382        im_display = ax.imshow(img_with_highlights)
383        ax.set_title("Click cells to highlight them. Cells clicked: 0")
384        ax.set_axis_off()
385
386        # Create buttons
387        done_button = widgets.Button(
388            description="Done",
389            button_style="success",
390            layout=widgets.Layout(width="150px", height="50px"),
391        )
392        clear_button = widgets.Button(
393            description="Clear All",
394            button_style="warning",
395            layout=widgets.Layout(width="150px", height="50px"),
396        )
397        undo_button = widgets.Button(
398            description="Undo Last",
399            button_style="info",
400            layout=widgets.Layout(width="150px", height="50px"),
401        )
402
403        done_button.style.font_size = "18px"
404        clear_button.style.font_size = "18px"
405        undo_button.style.font_size = "18px"
406
407        status_label = widgets.Label(
408            value="Click on cells to highlight them", style={"font_size": "18px"}
409        )
410
411        def draw_highlight(cell_idx: tuple[int, int]):
412            """Draw a highlighted cell on the image."""
413            polygon = self.cell_polygon(cell_idx)
414            points = np.array(list(polygon), dtype=np.int32)
415
416            # Draw polyline on the RGB image
417            cv.polylines(
418                img_with_highlights,
419                [points],
420                True,
421                (255, 0, 0),  # Red in RGB
422                2,
423                cv.LINE_AA,
424            )
425
426            # Draw cell index text
427            cv.putText(
428                img_with_highlights,
429                str(cell_idx),
430                (int(polygon[3][0] + 10), int(polygon[3][1] - 10)),
431                cv.FONT_HERSHEY_PLAIN,
432                2.0,
433                (255, 255, 255),  # White in RGB
434                2,
435            )
436
437        def redraw_all():
438            """Redraw the image with all current highlights."""
439            nonlocal img_with_highlights
440            img_with_highlights = np.copy(display_img)
441
442            for cell_idx in session._cells:
443                draw_highlight(cell_idx)
444
445            im_display.set_data(img_with_highlights)
446            ax.set_title(
447                f"Click cells to highlight them. Cells clicked: {len(session._cells)}"
448            )
449            fig.canvas.draw_idle()
450
451        def on_click(event):
452            if event.inaxes != ax or event.xdata is None:
453                return
454
455            x, y = int(event.xdata), int(event.ydata)
456
457            if event.button == 1:  # Left click
458                cell_idx = self.cell((x, y))
459                if cell_idx[0] >= 0:
460                    session._cells.append(cell_idx)
461                    draw_highlight(cell_idx)
462                    im_display.set_data(img_with_highlights)
463                    ax.set_title(
464                        f"Click cells to highlight them. Cells clicked: {len(session._cells)}"
465                    )
466                    status_label.value = (
467                        f"Cell {cell_idx} highlighted. Total: {len(session._cells)}"
468                    )
469                    fig.canvas.draw_idle()
470                else:
471                    status_label.value = f"Click at ({x}, {y}) is outside table bounds"
472
473        def on_clear(_):
474            session._cells.clear()
475            redraw_all()
476            status_label.value = "All highlights cleared"
477
478        def on_undo(_):
479            if session._cells:
480                removed = session._cells.pop()
481                redraw_all()
482                status_label.value = (
483                    f"Removed cell {removed}. Remaining: {len(session._cells)}"
484                )
485            else:
486                status_label.value = "No cells to undo"
487
488        def on_done(_):
489            fig.canvas.mpl_disconnect(cid)
490            done_button.disabled = True
491            clear_button.disabled = True
492            undo_button.disabled = True
493            ax.set_title(f"Done! {len(session._cells)} cells highlighted.")
494            status_label.value = "Complete! Access clicked cells via session.cells"
495            fig.canvas.draw_idle()
496
497        done_button.on_click(on_done)
498        clear_button.on_click(on_clear)
499        undo_button.on_click(on_undo)
500
501        cid = fig.canvas.mpl_connect("button_press_event", on_click)
502
503        plt.tight_layout(pad=0)
504        plt.show()
505        display(widgets.HBox([done_button, clear_button, undo_button, status_label]))
506
507        return session
508
509    @abstractmethod
510    def region(
511        self,
512        start: tuple[int, int],
513        end: tuple[int, int],
514    ) -> tuple[Point, Point, Point, Point]:
515        """
516        Get the bounding polygon for the rectangular region from ``start`` to
517        ``end`` (both cells inclusive).
518
519        Args:
520            start: Top-left cell as ``(row, col)``.
521            end: Bottom-right cell as ``(row, col)``.
522
523        Returns:
524            Four points (lt, rt, rb, lb), each as ``(x, y)``.
525        """
526        pass
527
528    def crop_region(
529        self,
530        image: MatLike,
531        start: tuple[int, int],
532        end: tuple[int, int],
533        margin: int = 0,
534        margin_top: int | None = None,
535        margin_bottom: int | None = None,
536        margin_left: int | None = None,
537        margin_right: int | None = None,
538        margin_y: int | None = None,
539        margin_x: int | None = None,
540    ) -> MatLike:
541        """
542        Extract a multi-cell region from the image with perspective correction.
543
544        Crops the image to include all cells from start to end (inclusive),
545        applying a perspective transform to produce a rectangular output.
546
547        Args:
548            image: Source image (BGR or grayscale).
549            start: Top-left cell as (row, col).
550            end: Bottom-right cell as (row, col).
551            margin: Uniform margin in pixels (default 0).
552            margin_top: Override top margin.
553            margin_bottom: Override bottom margin.
554            margin_left: Override left margin.
555            margin_right: Override right margin.
556            margin_y: Override vertical margins (top and bottom).
557            margin_x: Override horizontal margins (left and right).
558
559        Returns:
560            Cropped and perspective-corrected image.
561
562        Example:
563            >>> # Extract a 3x2 region starting at cell (1, 0)
564            >>> region_img = grid.crop_region(image, (1, 0), (3, 1))
565        """
566
567        region = self.region(start, end)
568
569        lt, rt, rb, lb = _apply_margin(
570            *region,
571            margin=margin,
572            margin_top=margin_top,
573            margin_bottom=margin_bottom,
574            margin_left=margin_left,
575            margin_right=margin_right,
576            margin_y=margin_y,
577            margin_x=margin_x,
578        )
579
580        # apply margins according to priority:
581        # margin_top > margin_y > margin (etc.)
582
583        w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2
584        h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2
585
586        # crop by doing a perspective transform to the desired quad
587        src_pts = np.array([lt, rt, rb, lb], dtype="float32")
588        dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32")
589        m = cv.getPerspectiveTransform(src_pts, dst_pts)
590        warped = cv.warpPerspective(image, m, (int(w), int(h)))
591
592        return warped
593
594    @abstractmethod
595    def text_regions(
596        self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0
597    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
598        """
599        Split a row into spans of continuous text crossing column rules.
600
601        Args:
602            img: Source table image.
603            row: Row index to scan.
604            margin_x: Horizontal margin around each rule crop, in pixels.
605            margin_y: Vertical margin around each rule crop, in pixels.
606
607        Returns:
608            List of ``((row, start_col), (row, end_col))`` spans (inclusive).
609        """
610
611        pass
612
613    def crop_cell(
614        self,
615        image,
616        cell: tuple[int, int],
617        margin: int = 0,
618        margin_top: int | None = None,
619        margin_bottom: int | None = None,
620        margin_left: int | None = None,
621        margin_right: int | None = None,
622        margin_y: int | None = None,
623        margin_x: int | None = None,
624    ) -> MatLike:
625        """
626        Extract a single cell from the image with perspective correction.
627
628        Convenience method equivalent to `crop_region(image, cell, cell, margin)`.
629
630        Args:
631            image: Source image (BGR or grayscale).
632            cell: Cell indices as (row, col).
633            margin: Padding in pixels around the cell (default 0).
634
635        Returns:
636            Cropped and perspective-corrected cell image.
637
638        Example:
639            >>> cell_img = grid.crop_cell(image, (0, 0))
640            >>> cv2.imwrite("cell_0_0.png", cell_img)
641        """
642        return self.crop_region(
643            image,
644            cell,
645            cell,
646            margin,
647            margin_top,
648            margin_bottom,
649            margin_left,
650            margin_right,
651            margin_y,
652            margin_x,
653        )

Abstract base class for table cell indexing and cropping.

Subclasses (SegmentedTable, TableTemplate) implement the cols, rows, and cell_polygon interface. This base provides shared methods for mapping pixel coordinates to cell indices and cropping cells/regions.

col_offset: int View Source

107    @property
108    def col_offset(self) -> int:
109        """Column offset applied when reporting cell coordinates."""
110        return self._col_offset

Column offset applied when reporting cell coordinates.

cols: int View Source

117    @property
118    @abstractmethod
119    def cols(self) -> int:
120        """Total number of cell columns."""
121        pass

Total number of cell columns.

rows: int View Source

123    @property
124    @abstractmethod
125    def rows(self) -> int:
126        """Total number of cell rows."""
127        pass

Total number of cell rows.

def cells(self) -> Generator[tuple[int, int]]: View Source

129    def cells(self) -> Generator[tuple[int, int]]:
130        """
131        Generate all cell indices in row-major order.
132
133        Yields (row, col) tuples for every cell in the table, iterating
134        through each row from left to right, top to bottom.
135
136        Yields:
137            tuple[int, int]: Cell indices as (row, col).
138
139        Example:
140            >>> for row, col in grid.cells():
141            ...     cell_img = grid.crop_cell(image, (row, col))
142            ...     process(cell_img)
143        """
144        for row in range(self.rows):
145            for col in range(self.cols):
146                yield (row, col)

Generate all cell indices in row-major order.

Yields (row, col) tuples for every cell in the table, iterating through each row from left to right, top to bottom.

Yields:

tuple[int, int]: Cell indices as (row, col).

Example:

>>> for row, col in grid.cells():
...     cell_img = grid.crop_cell(image, (row, col))
...     process(cell_img)

@abstractmethod

def cell(self, point: tuple[float, float]) -> tuple[int, int]: View Source

160    @abstractmethod
161    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
162        """
163        Returns the coordinate (row, col) of the cell that contains the given position
164
165        Args:
166            point (tuple[float, float]): a location in the input image
167
168        Returns:
169            tuple[int, int]: the cell index (row, col) that contains the given point
170        """
171        pass

Returns the coordinate (row, col) of the cell that contains the given position

Arguments:

point (tuple[float, float]): a location in the input image

Returns:

tuple[int, int]: the cell index (row, col) that contains the given point

@abstractmethod

def cell_polygon( self, cell: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: View Source

173    @abstractmethod
174    def cell_polygon(
175        self, cell: tuple[int, int]
176    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
177        """
178        Return the polygon (used e.g. with OpenCV) that encloses ``cell``.
179
180        Args:
181            cell: Cell indices as ``(row, col)``.
182
183        Returns:
184            Four ``(x, y)`` points in order: top-left, top-right,
185            bottom-right, bottom-left.
186        """
187        pass

Return the polygon (used e.g. with OpenCV) that encloses cell.

Arguments:

cell: Cell indices as (row, col).

Returns:

Four (x, y) points in order: top-left, top-right, bottom-right, bottom-left.

def highlight_all_cells( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str], color: tuple[int, int, int] = (0, 0, 255), thickness: int = 1) -> Union[cv2.Mat, numpy.ndarray]: View Source

209    def highlight_all_cells(
210        self,
211        image: MatLike | os.PathLike[str] | str,
212        color: tuple[int, int, int] = (0, 0, 255),
213        thickness: int = 1,
214    ) -> MatLike:
215        """
216        Return a copy of ``image`` with every cell outlined and labeled.
217
218        Args:
219            image: Source image (path or array, BGR).
220            color: BGR color of the outline.
221            thickness: Line thickness in pixels.
222
223        Returns:
224            MatLike: a new image with all cells highlighted.
225        """
226        if not isinstance(image, np.ndarray):
227            image = cv.imread(os.fspath(image))  # ty:ignore
228        img = np.copy(image)
229
230        for cell in self.cells():
231            self._highlight_cell(img, cell, color, thickness)
232
233        return img

Return a copy of image with every cell outlined and labeled.

Arguments:

image: Source image (path or array, BGR).
color: BGR color of the outline.
thickness: Line thickness in pixels.

Returns:

MatLike: a new image with all cells highlighted.

def select_one_cell( self, image: Union[cv2.Mat, numpy.ndarray], window: str = 'taulu', color: tuple[int, int, int] = (255, 0, 0), thickness: int = 2) -> tuple[int, int] | None: View Source

235    def select_one_cell(
236        self,
237        image: MatLike,
238        window: str = WINDOW,
239        color: tuple[int, int, int] = (255, 0, 0),
240        thickness: int = 2,
241    ) -> tuple[int, int] | None:
242        """
243        Open an OpenCV window and let the user click one cell.
244
245        Args:
246            image: Source image (BGR).
247            window: OpenCV window name.
248            color: Highlight color in BGR.
249            thickness: Outline thickness in pixels.
250
251        Returns:
252            tuple[int, int] | None: ``(row, col)`` of the clicked cell, or
253            ``None`` if the window was closed without a valid click.
254        """
255        clicked = None
256
257        def click_event(event, x, y, flags, params):
258            nonlocal clicked
259
260            img = np.copy(image)
261            _ = flags
262            _ = params
263            if event == cv.EVENT_LBUTTONDOWN:
264                cell = self.cell((x, y))
265                if cell[0] >= 0:
266                    clicked = cell
267                else:
268                    return
269                self._highlight_cell(img, cell, color, thickness)
270                cv.imshow(window, img)
271
272        imu.show(image, click_event=click_event, title="select one cell", window=window)
273
274        return clicked

Open an OpenCV window and let the user click one cell.

Arguments:

image: Source image (BGR).
window: OpenCV window name.
color: Highlight color in BGR.
thickness: Outline thickness in pixels.

Returns:

tuple[int, int] | None: (row, col) of the clicked cell, or None if the window was closed without a valid click.

def show_cells( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str], window: str = 'taulu') -> list[tuple[int, int]] | taulu.table_indexer.ShowCellsSession: View Source

276    def show_cells(
277        self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW
278    ) -> list[tuple[int, int]] | ShowCellsSession:
279        """
280        Interactively display and highlight table cells.
281
282        In standard environments, shows an OpenCV window where clicking highlights cells.
283        In Jupyter notebooks, returns a ShowCellsSession and displays using matplotlib.
284
285        Args:
286            image: Source image (path or array).
287            window: OpenCV window name (ignored in notebooks).
288
289        Returns:
290            list[tuple[int, int]]: Clicked cell indices (non-notebook).
291            ShowCellsSession: Session object with .cells property (notebook).
292
293        Example:
294            >>> # Standard Python
295            >>> cells = grid.show_cells("table.png")
296            >>>
297            >>> # Jupyter Notebook
298            >>> session = grid.show_cells("table.png")
299            >>> # ... click cells ...
300            >>> cells = session.cells
301        """
302        if not isinstance(image, np.ndarray):
303            image = cv.imread(os.fspath(image))  # ty:ignore
304
305        def running_in_notebook() -> bool:
306            try:
307                from IPython import get_ipython
308
309                ip = get_ipython()
310                return ip is not None and "IPKernelApp" in ip.config
311            except Exception:
312                return False
313
314        use_notebook = running_in_notebook()
315
316        if use_notebook:
317            return self.show_cells_notebook(image)
318        else:
319            img = np.copy(image)
320            cells = []
321
322            def click_event(event, x, y, flags, params):
323                _ = flags
324                _ = params
325                if event == cv.EVENT_LBUTTONDOWN:
326                    cell = self.cell((x, y))
327                    if cell[0] >= 0:
328                        cells.append(cell)
329                    else:
330                        return
331                    self._highlight_cell(img, cell)
332                    cv.imshow(window, img)
333
334            imu.show(
335                img,
336                click_event=click_event,
337                title="click to highlight cells",
338                window=window,
339            )
340
341            return cells

Interactively display and highlight table cells.

In standard environments, shows an OpenCV window where clicking highlights cells. In Jupyter notebooks, returns a ShowCellsSession and displays using matplotlib.

Arguments:

image: Source image (path or array).
window: OpenCV window name (ignored in notebooks).

Returns:

list[tuple[int, int]]: Clicked cell indices (non-notebook). ShowCellsSession: Session object with .cells property (notebook).

Example:

>>> # Standard Python
>>> cells = grid.show_cells("table.png")
>>>
>>> # Jupyter Notebook
>>> session = grid.show_cells("table.png")
>>> # ... click cells ...
>>> cells = session.cells

def show_cells_notebook( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str]) -> taulu.table_indexer.ShowCellsSession: View Source

343    def show_cells_notebook(
344        self, image: MatLike | os.PathLike[str] | str
345    ) -> ShowCellsSession:
346        """
347        Notebook-compatible version of show_cells using matplotlib.
348
349        Returns a ShowCellsSession immediately. Click on cells to highlight them.
350        Access clicked cells via session.cells.
351
352        Args:
353            image: Source image (path or array).
354
355        Returns:
356            ShowCellsSession: Access .cells to get list of clicked cell indices.
357
358        Example:
359            >>> session = grid.show_cells_notebook("table.png")
360            >>> # Click cells in the interactive plot
361            >>> print(session.cells)  # [(0, 0), (1, 2), ...]
362        """
363        if not isinstance(image, np.ndarray):
364            tmp_image = cv.imread(os.fspath(image))
365            assert tmp_image is not None
366            image = tmp_image
367
368        import ipywidgets as widgets
369        import matplotlib.pyplot as plt
370        from IPython.display import display
371
372        session = ShowCellsSession()
373
374        # Convert BGR to RGB for matplotlib
375        display_img = cv.cvtColor(image, cv.COLOR_BGR2RGB)
376        img_with_highlights = np.copy(display_img)
377
378        fig, ax = plt.subplots(figsize=(15, 12))
379        fig.canvas.toolbar_visible = False  # ty:ignore[unresolved-attribute]
380        fig.canvas.header_visible = False  # ty:ignore[unresolved-attribute]
381
382        im_display = ax.imshow(img_with_highlights)
383        ax.set_title("Click cells to highlight them. Cells clicked: 0")
384        ax.set_axis_off()
385
386        # Create buttons
387        done_button = widgets.Button(
388            description="Done",
389            button_style="success",
390            layout=widgets.Layout(width="150px", height="50px"),
391        )
392        clear_button = widgets.Button(
393            description="Clear All",
394            button_style="warning",
395            layout=widgets.Layout(width="150px", height="50px"),
396        )
397        undo_button = widgets.Button(
398            description="Undo Last",
399            button_style="info",
400            layout=widgets.Layout(width="150px", height="50px"),
401        )
402
403        done_button.style.font_size = "18px"
404        clear_button.style.font_size = "18px"
405        undo_button.style.font_size = "18px"
406
407        status_label = widgets.Label(
408            value="Click on cells to highlight them", style={"font_size": "18px"}
409        )
410
411        def draw_highlight(cell_idx: tuple[int, int]):
412            """Draw a highlighted cell on the image."""
413            polygon = self.cell_polygon(cell_idx)
414            points = np.array(list(polygon), dtype=np.int32)
415
416            # Draw polyline on the RGB image
417            cv.polylines(
418                img_with_highlights,
419                [points],
420                True,
421                (255, 0, 0),  # Red in RGB
422                2,
423                cv.LINE_AA,
424            )
425
426            # Draw cell index text
427            cv.putText(
428                img_with_highlights,
429                str(cell_idx),
430                (int(polygon[3][0] + 10), int(polygon[3][1] - 10)),
431                cv.FONT_HERSHEY_PLAIN,
432                2.0,
433                (255, 255, 255),  # White in RGB
434                2,
435            )
436
437        def redraw_all():
438            """Redraw the image with all current highlights."""
439            nonlocal img_with_highlights
440            img_with_highlights = np.copy(display_img)
441
442            for cell_idx in session._cells:
443                draw_highlight(cell_idx)
444
445            im_display.set_data(img_with_highlights)
446            ax.set_title(
447                f"Click cells to highlight them. Cells clicked: {len(session._cells)}"
448            )
449            fig.canvas.draw_idle()
450
451        def on_click(event):
452            if event.inaxes != ax or event.xdata is None:
453                return
454
455            x, y = int(event.xdata), int(event.ydata)
456
457            if event.button == 1:  # Left click
458                cell_idx = self.cell((x, y))
459                if cell_idx[0] >= 0:
460                    session._cells.append(cell_idx)
461                    draw_highlight(cell_idx)
462                    im_display.set_data(img_with_highlights)
463                    ax.set_title(
464                        f"Click cells to highlight them. Cells clicked: {len(session._cells)}"
465                    )
466                    status_label.value = (
467                        f"Cell {cell_idx} highlighted. Total: {len(session._cells)}"
468                    )
469                    fig.canvas.draw_idle()
470                else:
471                    status_label.value = f"Click at ({x}, {y}) is outside table bounds"
472
473        def on_clear(_):
474            session._cells.clear()
475            redraw_all()
476            status_label.value = "All highlights cleared"
477
478        def on_undo(_):
479            if session._cells:
480                removed = session._cells.pop()
481                redraw_all()
482                status_label.value = (
483                    f"Removed cell {removed}. Remaining: {len(session._cells)}"
484                )
485            else:
486                status_label.value = "No cells to undo"
487
488        def on_done(_):
489            fig.canvas.mpl_disconnect(cid)
490            done_button.disabled = True
491            clear_button.disabled = True
492            undo_button.disabled = True
493            ax.set_title(f"Done! {len(session._cells)} cells highlighted.")
494            status_label.value = "Complete! Access clicked cells via session.cells"
495            fig.canvas.draw_idle()
496
497        done_button.on_click(on_done)
498        clear_button.on_click(on_clear)
499        undo_button.on_click(on_undo)
500
501        cid = fig.canvas.mpl_connect("button_press_event", on_click)
502
503        plt.tight_layout(pad=0)
504        plt.show()
505        display(widgets.HBox([done_button, clear_button, undo_button, status_label]))
506
507        return session

Notebook-compatible version of show_cells using matplotlib.

Returns a ShowCellsSession immediately. Click on cells to highlight them. Access clicked cells via session.cells.

Arguments:

image: Source image (path or array).

Returns:

ShowCellsSession: Access .cells to get list of clicked cell indices.

Example:

>>> session = grid.show_cells_notebook("table.png")
>>> # Click cells in the interactive plot
>>> print(session.cells)  # [(0, 0), (1, 2), ...]

@abstractmethod

def region( self, start: tuple[int, int], end: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: View Source

509    @abstractmethod
510    def region(
511        self,
512        start: tuple[int, int],
513        end: tuple[int, int],
514    ) -> tuple[Point, Point, Point, Point]:
515        """
516        Get the bounding polygon for the rectangular region from ``start`` to
517        ``end`` (both cells inclusive).
518
519        Args:
520            start: Top-left cell as ``(row, col)``.
521            end: Bottom-right cell as ``(row, col)``.
522
523        Returns:
524            Four points (lt, rt, rb, lb), each as ``(x, y)``.
525        """
526        pass

Get the bounding polygon for the rectangular region from start to end (both cells inclusive).

Arguments:

start: Top-left cell as (row, col).
end: Bottom-right cell as (row, col).

Returns:

Four points (lt, rt, rb, lb), each as (x, y).

def crop_region( self, image: Union[cv2.Mat, numpy.ndarray], start: tuple[int, int], end: tuple[int, int], margin: int = 0, margin_top: int | None = None, margin_bottom: int | None = None, margin_left: int | None = None, margin_right: int | None = None, margin_y: int | None = None, margin_x: int | None = None) -> Union[cv2.Mat, numpy.ndarray]: View Source

528    def crop_region(
529        self,
530        image: MatLike,
531        start: tuple[int, int],
532        end: tuple[int, int],
533        margin: int = 0,
534        margin_top: int | None = None,
535        margin_bottom: int | None = None,
536        margin_left: int | None = None,
537        margin_right: int | None = None,
538        margin_y: int | None = None,
539        margin_x: int | None = None,
540    ) -> MatLike:
541        """
542        Extract a multi-cell region from the image with perspective correction.
543
544        Crops the image to include all cells from start to end (inclusive),
545        applying a perspective transform to produce a rectangular output.
546
547        Args:
548            image: Source image (BGR or grayscale).
549            start: Top-left cell as (row, col).
550            end: Bottom-right cell as (row, col).
551            margin: Uniform margin in pixels (default 0).
552            margin_top: Override top margin.
553            margin_bottom: Override bottom margin.
554            margin_left: Override left margin.
555            margin_right: Override right margin.
556            margin_y: Override vertical margins (top and bottom).
557            margin_x: Override horizontal margins (left and right).
558
559        Returns:
560            Cropped and perspective-corrected image.
561
562        Example:
563            >>> # Extract a 3x2 region starting at cell (1, 0)
564            >>> region_img = grid.crop_region(image, (1, 0), (3, 1))
565        """
566
567        region = self.region(start, end)
568
569        lt, rt, rb, lb = _apply_margin(
570            *region,
571            margin=margin,
572            margin_top=margin_top,
573            margin_bottom=margin_bottom,
574            margin_left=margin_left,
575            margin_right=margin_right,
576            margin_y=margin_y,
577            margin_x=margin_x,
578        )
579
580        # apply margins according to priority:
581        # margin_top > margin_y > margin (etc.)
582
583        w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2
584        h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2
585
586        # crop by doing a perspective transform to the desired quad
587        src_pts = np.array([lt, rt, rb, lb], dtype="float32")
588        dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32")
589        m = cv.getPerspectiveTransform(src_pts, dst_pts)
590        warped = cv.warpPerspective(image, m, (int(w), int(h)))
591
592        return warped

Extract a multi-cell region from the image with perspective correction.

Crops the image to include all cells from start to end (inclusive), applying a perspective transform to produce a rectangular output.

Arguments:

image: Source image (BGR or grayscale).
start: Top-left cell as (row, col).
end: Bottom-right cell as (row, col).
margin: Uniform margin in pixels (default 0).
margin_top: Override top margin.
margin_bottom: Override bottom margin.
margin_left: Override left margin.
margin_right: Override right margin.
margin_y: Override vertical margins (top and bottom).
margin_x: Override horizontal margins (left and right).

Returns:

Cropped and perspective-corrected image.

Example:

>>> # Extract a 3x2 region starting at cell (1, 0)
>>> region_img = grid.crop_region(image, (1, 0), (3, 1))

@abstractmethod

def text_regions( self, img: Union[cv2.Mat, numpy.ndarray], row: int, margin_x: int = 0, margin_y: int = 0) -> list[tuple[tuple[int, int], tuple[int, int]]]: View Source

594    @abstractmethod
595    def text_regions(
596        self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0
597    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
598        """
599        Split a row into spans of continuous text crossing column rules.
600
601        Args:
602            img: Source table image.
603            row: Row index to scan.
604            margin_x: Horizontal margin around each rule crop, in pixels.
605            margin_y: Vertical margin around each rule crop, in pixels.
606
607        Returns:
608            List of ``((row, start_col), (row, end_col))`` spans (inclusive).
609        """
610
611        pass

Split a row into spans of continuous text crossing column rules.

Arguments:

img: Source table image.
row: Row index to scan.
margin_x: Horizontal margin around each rule crop, in pixels.
margin_y: Vertical margin around each rule crop, in pixels.

Returns:

List of ((row, start_col), (row, end_col)) spans (inclusive).

613    def crop_cell(
614        self,
615        image,
616        cell: tuple[int, int],
617        margin: int = 0,
618        margin_top: int | None = None,
619        margin_bottom: int | None = None,
620        margin_left: int | None = None,
621        margin_right: int | None = None,
622        margin_y: int | None = None,
623        margin_x: int | None = None,
624    ) -> MatLike:
625        """
626        Extract a single cell from the image with perspective correction.
627
628        Convenience method equivalent to `crop_region(image, cell, cell, margin)`.
629
630        Args:
631            image: Source image (BGR or grayscale).
632            cell: Cell indices as (row, col).
633            margin: Padding in pixels around the cell (default 0).
634
635        Returns:
636            Cropped and perspective-corrected cell image.
637
638        Example:
639            >>> cell_img = grid.crop_cell(image, (0, 0))
640            >>> cv2.imwrite("cell_0_0.png", cell_img)
641        """
642        return self.crop_region(
643            image,
644            cell,
645            cell,
646            margin,
647            margin_top,
648            margin_bottom,
649            margin_left,
650            margin_right,
651            margin_y,
652            margin_x,
653        )

Extract a single cell from the image with perspective correction.

Convenience method equivalent to crop_region(image, cell, cell, margin).

Arguments:

image: Source image (BGR or grayscale).
cell: Cell indices as (row, col).
margin: Padding in pixels around the cell (default 0).

Returns:

Cropped and perspective-corrected cell image.

Example:

>>> cell_img = grid.crop_cell(image, (0, 0))
>>> cv2.imwrite("cell_0_0.png", cell_img)

class Taulu: View Source

 42class Taulu:
 43    """
 44    High-level API for table segmentation from images.
 45
 46    Taulu orchestrates header alignment, grid detection, and table segmentation
 47    into a single workflow.
 48
 49    Workflow:
 50        1. Create annotated header images via `Taulu.annotate()`
 51        2. Initialize Taulu with header(s) and parameters
 52        3. Call `segment_table()` to get a `SegmentedTable` with cell boundaries
 53
 54    For two-page tables, use `Split[T]` to provide different parameters for
 55    left and right sides.
 56
 57    Example:
 58        >>> import cv2
 59        >>> from taulu import Taulu
 60        >>> Taulu.annotate("table_image.png", "header.png")
 61        >>> taulu = Taulu("header.png")
 62        >>> grid = taulu.segment_table("table_page_01.png")
 63        >>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0))
 64    """
 65
 66    def __init__(
 67        self,
 68        template_path: Splittable[PathLike[str]] | Splittable[str],
 69        row_height_factor: Splittable[float] | Splittable[list[float]] | None = None,
 70        annotation_path: Splittable[PathLike[str]] | Splittable[str] | None = None,
 71        binarization_sensitivity: Splittable[float] = 0.25,
 72        search_radius: Splittable[int] = 60,
 73        position_weight: Splittable[float] = 0.4,
 74        line_thickness: Splittable[int] = 10,
 75        line_gap_fill: Splittable[int] = 4,
 76        intersection_kernel_size: Splittable[int] = 41,
 77        detection_scale: Splittable[float] = 1.0,
 78        pathfinding_threshold: Splittable[float] = 0.2,
 79        min_rows: Splittable[int] = 5,
 80        extrapolation_distance: Splittable[int] = 3,
 81        detection_threshold: Splittable[float] = 0.3,
 82        smooth: bool = False,
 83        smooth_strength: float = 0.5,
 84        smooth_iterations: int = 1,
 85        smooth_degree: int = 1,
 86        growing_resets: Splittable[int] = 0,
 87        reset_fraction: Splittable[float] = 0.5,
 88        feature_detector: Splittable[FeatureDetector] = "akaze",
 89        matching_scale: float = 1.0,
 90        auto_row_heights: bool = False,
 91        min_row_height_factor: Splittable[float] = 0.5,
 92        max_row_height_factor: Splittable[float] = 1.5,
 93        row_detection_path_scale: float = 0.25,
 94    ):
 95        """
 96        Args:
 97            template_path: Path to header template image(s). Use `Split` for two-page tables.
 98            row_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
 99            annotation_path: Explicit annotation JSON path. Default: inferred from image path.
100            binarization_sensitivity: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
101            search_radius: Corner search area in pixels. Default: 60
102            position_weight: Position penalty weight [0, 1]. Default: 0.4
103            line_thickness: Cross-kernel width matching line thickness. Default: 10
104            line_gap_fill: Morphological dilation size. Default: 4
105            intersection_kernel_size: Cross-kernel size (odd). Default: 41
106            detection_scale: Image downscale factor (0, 1]. Default: 1.0
107            pathfinding_threshold: Confidence to skip A* pathfinding. Default: 0.2
108            min_rows: Minimum rows before completion. Default: 5
109            extrapolation_distance: Rows to examine for extrapolation. Default: 3
110            detection_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
111            smooth: Apply grid smoothing after detection. Default: False
112            smooth_strength: Blend factor per smoothing iteration (0.0-1.0). Default: 0.5
113            smooth_iterations: Number of smoothing passes. Default: 1
114            smooth_degree: Polynomial degree for smoothing regression (1 or 2). Default: 1
115            growing_resets: Number of grid cuts during growing. Default: 0
116            reset_fraction: Fraction of points to delete per cut. Default: 0.5
117            feature_detector: Feature matching method for header alignment. One of "orb"
118                (fast, patent-free), "sift" (robust, uses FLANN), or "akaze" (robust,
119                patent-free). Default: "akaze"
120            matching_scale: Downscale factor (0, 1] for header alignment only. Lower
121                values speed up feature matching. Default: 1.0
122            auto_row_heights: If True, detect variable per-row heights from the
123                cross-correlation map at runtime (overriding `row_height_factor`).
124                Default: False
125            min_row_height_factor: Minimum row height as a fraction of the header
126                height when `auto_row_heights` is enabled. Default: 0.5
127            max_row_height_factor: Maximum row height as a fraction of the header
128                height when `auto_row_heights` is enabled. Default: 1.5
129            row_detection_path_scale: Downscale factor (0, 1] for the A* path
130                following used by `auto_row_heights`. Lower = faster, less precise.
131                Default: 0.25
132        """
133        self._detection_scale = detection_scale
134        self._auto_row_heights = auto_row_heights
135        self._min_row_height_factor = min_row_height_factor
136        self._max_row_height_factor = max_row_height_factor
137        self._row_detection_path_scale = row_detection_path_scale
138        self._smooth = smooth
139        self._smooth_strength = smooth_strength
140        self._smooth_iterations = smooth_iterations
141        self._smooth_degree = smooth_degree
142
143        if row_height_factor is None:
144            row_height_factor = [1.0]
145
146        self._row_height_factor = row_height_factor
147
148        if isinstance(template_path, Split) or isinstance(annotation_path, Split):
149            header = Split(Path(template_path.left), Path(template_path.right))  # ty:ignore[unresolved-attribute]
150
151            if not exists(header.left.with_suffix(".png")) or not exists(
152                header.right.with_suffix(".png")
153            ):
154                raise TauluException(
155                    "The header images you provided do not exist (or they aren't .png files)"
156                )
157
158            if annotation_path is None:
159                if not exists(header.left.with_suffix(".json")) or not exists(
160                    header.right.with_suffix(".json")
161                ):
162                    raise TauluException(
163                        "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method"
164                    )
165
166                template_left = TableTemplate.from_saved(
167                    header.left.with_suffix(".json")
168                )
169                template_right = TableTemplate.from_saved(
170                    header.right.with_suffix(".json")
171                )
172
173            else:
174                if not exists(annotation_path.left) or not exists(  # ty: ignore[unresolved-attribute]
175                    annotation_path.right  # ty: ignore[unresolved-attribute]
176                ):
177                    raise TauluException(
178                        "The header annotation files you provided do not exist (or they aren't .json files)"
179                    )
180
181                template_left = TableTemplate.from_saved(annotation_path.left)  # ty: ignore[unresolved-attribute]
182                template_right = TableTemplate.from_saved(annotation_path.right)  # ty: ignore[unresolved-attribute]
183
184            self._header = Split(
185                cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right))
186            )
187
188            self._aligner = Split(
189                TemplateMatcher(
190                    self._header.left,
191                    method=get_param(feature_detector, "left"),
192                    scale=matching_scale,
193                ),
194                TemplateMatcher(
195                    self._header.right,
196                    method=get_param(feature_detector, "right"),
197                    scale=matching_scale,
198                ),
199            )
200
201            self._template = Split(template_left, template_right)
202
203            self._cell_heights = Split(
204                self._template.left.cell_heights(get_param(row_height_factor, "left")),
205                self._template.right.cell_heights(
206                    get_param(row_height_factor, "right")
207                ),
208            )
209
210            # Create TableDetector for left and right with potentially different parameters
211            self._grid_detector = Split(
212                TableDetector(
213                    intersection_kernel_size=get_param(
214                        intersection_kernel_size, "left"
215                    ),
216                    line_thickness=get_param(line_thickness, "left"),
217                    line_gap_fill=get_param(line_gap_fill, "left"),
218                    search_radius=get_param(search_radius, "left"),
219                    binarization_sensitivity=get_param(
220                        binarization_sensitivity, "left"
221                    ),
222                    position_weight=get_param(position_weight, "left"),
223                    detection_scale=get_param(self._detection_scale, "left"),
224                    pathfinding_threshold=get_param(pathfinding_threshold, "left"),
225                    min_rows=get_param(min_rows, "left"),
226                    extrapolation_distance=get_param(extrapolation_distance, "left"),
227                    detection_threshold=get_param(detection_threshold, "left"),
228                    growing_resets=get_param(growing_resets, "left"),
229                    reset_fraction=get_param(reset_fraction, "left"),
230                ),
231                TableDetector(
232                    intersection_kernel_size=get_param(
233                        intersection_kernel_size, "right"
234                    ),
235                    line_thickness=get_param(line_thickness, "right"),
236                    line_gap_fill=get_param(line_gap_fill, "right"),
237                    search_radius=get_param(search_radius, "right"),
238                    binarization_sensitivity=get_param(
239                        binarization_sensitivity, "right"
240                    ),
241                    position_weight=get_param(position_weight, "right"),
242                    detection_scale=get_param(self._detection_scale, "right"),
243                    pathfinding_threshold=get_param(pathfinding_threshold, "right"),
244                    min_rows=get_param(min_rows, "right"),
245                    extrapolation_distance=get_param(extrapolation_distance, "right"),
246                    detection_threshold=get_param(detection_threshold, "right"),
247                    growing_resets=get_param(growing_resets, "right"),
248                    reset_fraction=get_param(reset_fraction, "right"),
249                ),
250            )
251
252        else:
253            template_path = Path(template_path)
254            self._header = cv2.imread(os.fspath(template_path))
255            self._aligner = TemplateMatcher(
256                self._header,
257                method=cast(FeatureDetector, feature_detector),
258                scale=matching_scale,
259            )
260            self._template = TableTemplate.from_saved(
261                template_path.with_suffix(".json")
262            )
263
264            # For single header, parameters should not be Split objects
265            if any(
266                isinstance(param, Split)
267                for param in [
268                    binarization_sensitivity,
269                    search_radius,
270                    position_weight,
271                    line_thickness,
272                    line_gap_fill,
273                    intersection_kernel_size,
274                    detection_scale,
275                    min_rows,
276                    extrapolation_distance,
277                    detection_threshold,
278                    row_height_factor,
279                    growing_resets,
280                    reset_fraction,
281                    feature_detector,
282                ]
283            ):
284                raise TauluException(
285                    "Split parameters can only be used with split headers (tuple header_path)"
286                )
287
288            self._cell_heights = self._template.cell_heights(
289                cast(list[float] | float, self._row_height_factor)
290            )
291
292            self._grid_detector = TableDetector(
293                intersection_kernel_size=intersection_kernel_size,  # ty: ignore
294                line_thickness=line_thickness,  # ty: ignore
295                line_gap_fill=line_gap_fill,  # ty: ignore
296                search_radius=search_radius,  # ty: ignore
297                binarization_sensitivity=binarization_sensitivity,  # ty: ignore
298                position_weight=position_weight,  # ty: ignore
299                detection_scale=self._detection_scale,  # ty: ignore
300                pathfinding_threshold=pathfinding_threshold,  # ty: ignore
301                min_rows=min_rows,  # ty: ignore
302                extrapolation_distance=extrapolation_distance,  # ty: ignore
303                detection_threshold=detection_threshold,  # ty: ignore
304                growing_resets=growing_resets,  # ty:ignore
305                reset_fraction=reset_fraction,  # ty:ignore
306            )
307
308    @classmethod
309    def from_config(cls, config: TauluConfig) -> "Taulu":
310        """
311        Create a :class:`Taulu` instance from a :class:`~taulu.config.TauluConfig`.
312
313        Args:
314            config: A populated :class:`~taulu.config.TauluConfig` instance.
315
316        Returns:
317            A :class:`Taulu` instance configured according to ``config``.
318
319        Example:
320            >>> from taulu import Taulu
321            >>> from taulu.config import TauluConfig
322            >>> config = TauluConfig.from_toml("my_table.toml")
323            >>> taulu = Taulu.from_config(config)
324        """
325        return cls(**{name: getattr(config, name) for name in config.model_fields})
326
327    @staticmethod
328    def annotate(
329        image_path: PathLike[str] | str,
330        output_path: PathLike[str] | str,
331        *,
332        backend: Literal["auto", "gui", "notebook"] = "auto",
333    ):
334        """
335        Interactive tool to create header annotations for table segmentation.
336
337        This method guides you through a two-step annotation process:
338
339        1. **Crop the header**: Click four corners to define the header region
340        2. **Annotate lines**: Click pairs of points to define each vertical and
341           horizontal line in the header
342
343        The annotations are saved as:
344        - A cropped header image (.png) at `output_path`
345        - A JSON file (.json) containing line coordinates
346
347        ## Annotation Guidelines
348
349        **Which lines to annotate:**
350        - All vertical lines that extend into the table body (column separators)
351        - The top horizontal line of the header
352        - The bottom horizontal line of the header (top of data rows)
353
354        **Order doesn't matter** - annotate lines in any order that's convenient.
355
356        **To annotate a line:**
357        1. Click once at one endpoint
358        2. Click again at the other endpoint
359        3. A green line appears showing your annotation
360
361        **To undo:**
362        - Right-click anywhere to remove the last line you drew
363
364        **When finished:**
365        - Press 'n' to save and exit
366        - Press 'q' to quit without saving
367
368        Args:
369            image_path (PathLike[str] | str): Path to a table image containing
370                a clear view of the header. This can be a full table image.
371            output_path (PathLike[str] | str): Where to save the cropped header
372                image. The annotation JSON will be saved with the same name but
373                .json extension.
374
375        Raises:
376            TauluException: If image_path doesn't exist or output_path is a directory
377
378        Examples:
379            Annotate a single header:
380
381            >>> from taulu import Taulu
382            >>> Taulu.annotate("scan_page_01.png", "header.png")
383            # Interactive window opens
384            # After annotation: creates header.png and header.json
385
386            Annotate left and right headers for a split table:
387
388            >>> Taulu.annotate("scan_page_01.png", "header_left.png")
389            >>> Taulu.annotate("scan_page_01.png", "header_right.png")
390            # Creates header_left.{png,json} and header_right.{png,json}
391
392        Notes:
393            - The header image doesn't need to be perfectly cropped initially -
394              the tool will help you crop it precisely
395            - Annotation accuracy is important: misaligned lines will cause
396              segmentation errors
397            - You can re-run this method to update annotations if needed
398        """
399
400        if not exists(image_path):
401            raise TauluException(f"Image path {image_path} does not exist")
402
403        if os.path.isdir(output_path):
404            raise TauluException("Output path should be a file")
405
406        output_path = Path(output_path)
407
408        def running_in_notebook() -> bool:
409            try:
410                from IPython import get_ipython
411
412                ip = get_ipython()
413                return ip is not None and "IPKernelApp" in ip.config
414            except Exception:
415                return False
416
417        # Decide backend
418        if backend not in ("auto", "gui", "notebook"):
419            raise TauluException("backend must be one of: 'auto', 'gui', 'notebook'")
420        if backend == "auto":
421            use_notebook = running_in_notebook()
422        else:
423            use_notebook = backend == "notebook"
424
425        if use_notebook:
426            # Notebook way
427            logger.info(
428                "\x1b[32mNotebook environment detected/selected. Using notebook annotation backend."
429            )
430            session = TableTemplate.annotate_image_notebook(
431                os.fspath(image_path), crop=output_path.with_suffix(".png")
432            )
433            session._save_path = output_path.with_suffix(".json")  # ty: ignore[unresolved-attribute]
434            return session
435
436        else:
437            # GUI way
438            template = TableTemplate.annotate_image(
439                os.fspath(image_path), crop=output_path.with_suffix(".png")
440            )
441            template.save(output_path.with_suffix(".json"))
442
443    def segment_table(
444        self,
445        image: MatLike | PathLike[str] | str,
446        filtered: MatLike | PathLike[str] | str | None = None,
447        debug_view: bool = False,
448        debug_view_notebook: bool = False,
449    ) -> SegmentedTable:
450        """
451        Segment a table image into a grid of cells.
452
453        Orchestrates header alignment, grid detection, corner growing, and
454        extrapolation to produce a complete grid structure.
455
456        Args:
457            image: Table image to segment (file path or numpy array).
458            filtered: Optional pre-filtered binary image for corner detection.
459                If provided, binarization parameters are ignored.
460            debug_view: Show intermediate processing steps via OpenCV windows. Press 'n' to advance,
461                'q' to quit. Default: False
462            debug_view_notebook: Show intermediate processing steps inline in a Jupyter notebook
463                using matplotlib. Default: False
464
465        Returns:
466            SegmentedTable: Grid structure with methods for cell access (`crop_cell`,
467                `cell_polygon`), visualization (`show_cells`), and persistence
468                (`save`, `from_saved`).
469
470        Raises:
471            TauluException: If image cannot be loaded or grid detection fails.
472        """
473
474        if not isinstance(image, MatLike):
475            image = cast(str | PathLike[str], image)
476            tmp_image = cv2.imread(os.fspath(image))
477            assert tmp_image is not None
478            image = tmp_image
479
480        now = perf_counter()
481        h = self._aligner.align(
482            image,  # ty: ignore[invalid-argument-type]
483            visual=debug_view,
484            visual_notebook=debug_view_notebook,
485        )
486        align_time = perf_counter() - now
487        logger.info(f"Header alignment took {align_time:.2f} seconds")
488
489        # find the starting point for the table grid algorithm
490
491        def make_top_row(template: TableTemplate, aligner: TemplateMatcher, h: NDArray):
492            top_row = []
493            for x in range(template.cols + 1):
494                on_template = template.intersection((1, x))
495                on_template = (int(on_template[0]), int(on_template[1]))
496
497                on_img = aligner.template_to_img(h, on_template)
498
499                top_row.append(on_img)
500
501            return top_row
502
503        if isinstance(self._aligner, Split):
504            top_row = Split(
505                make_top_row(self._template.left, self._aligner.left, h.left),  # ty:ignore
506                make_top_row(self._template.right, self._aligner.right, h.right),  # ty:ignore
507            )
508        else:
509            top_row = make_top_row(self._template, self._aligner, h)  # ty:ignore
510
511        cell_heights = self._cell_heights
512        filtered_pre: MatLike | Split | None = None
513
514        if self._auto_row_heights:
515            now_ar = perf_counter()
516            if isinstance(self._grid_detector, Split):
517                filtered_pre = self._grid_detector.apply(image, visual=debug_view)  # ty:ignore
518                assert isinstance(self._template, Split)
519                header_h = Split(
520                    self._template.left.cell_height(1.0),  # ty:ignore[unresolved-attribute]
521                    self._template.right.cell_height(1.0),  # ty:ignore[unresolved-attribute]
522                )
523                min_h = Split(
524                    int(header_h.left * get_param(self._min_row_height_factor, "left")),
525                    int(
526                        header_h.right * get_param(self._min_row_height_factor, "right")
527                    ),
528                )
529                max_h = Split(
530                    int(header_h.left * get_param(self._max_row_height_factor, "left")),
531                    int(
532                        header_h.right * get_param(self._max_row_height_factor, "right")
533                    ),
534                )
535                detected = self._grid_detector.detect_row_heights(
536                    image,
537                    filtered_pre,
538                    top_row,
539                    min_h,
540                    max_h,
541                    path_scale=self._row_detection_path_scale,
542                )
543                # detected is Split[list[int]]; fall back per side if empty.
544                cell_heights = Split(
545                    detected.left or self._cell_heights.left,  # ty:ignore[unresolved-attribute]
546                    detected.right or self._cell_heights.right,  # ty:ignore[unresolved-attribute]
547                )
548            else:
549                filtered_pre = self._grid_detector.apply(image, visual=debug_view)  # ty:ignore
550                header_h_one = self._template.cell_height(1.0)
551                min_h_one = int(header_h_one * cast(float, self._min_row_height_factor))
552                max_h_one = int(header_h_one * cast(float, self._max_row_height_factor))
553                detected_one = self._grid_detector.detect_row_heights(
554                    image,  # ty:ignore[invalid-argument-type]
555                    filtered_pre,
556                    top_row,  # ty:ignore
557                    min_h_one,
558                    max_h_one,
559                    path_scale=self._row_detection_path_scale,
560                )
561                cell_heights = detected_one or self._cell_heights
562            ar_time = perf_counter() - now_ar
563            logger.info(f"Row-height detection took {ar_time:.2f} seconds")
564
565        now = perf_counter()
566        table = self._grid_detector.find_table_points(
567            image,  # ty:ignore
568            top_row,  # ty:ignore
569            self._template.cell_widths(0),
570            cell_heights,  # ty:ignore
571            visual=debug_view,
572            visual_notebook=debug_view_notebook,
573            filtered=filtered if filtered is not None else filtered_pre,  # ty:ignore
574            smooth=self._smooth,
575            smooth_strength=self._smooth_strength,
576            smooth_iterations=self._smooth_iterations,
577            smooth_degree=self._smooth_degree,
578        )
579        grid_time = perf_counter() - now
580        logger.info(f"Grid detection took {grid_time:.2f} seconds")
581
582        if debug_view_notebook:
583            self._aligner.show_matches_notebook()
584
585        if isinstance(table, Split):
586            table = SegmentedTable.from_split(table, (0, 0))  # ty: ignore
587
588        return table

High-level API for table segmentation from images.

Taulu orchestrates header alignment, grid detection, and table segmentation into a single workflow.

Workflow:

Create annotated header images via Taulu.annotate()

Initialize Taulu with header(s) and parameters

Call segment_table() to get a SegmentedTable with cell boundaries

For two-page tables, use Split[T] to provide different parameters for left and right sides.

Example:

>>> import cv2
>>> from taulu import Taulu
>>> Taulu.annotate("table_image.png", "header.png")
>>> taulu = Taulu("header.png")
>>> grid = taulu.segment_table("table_page_01.png")
>>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0))

Taulu( template_path: Splittable[os.PathLike[str]] | Splittable[str], row_height_factor: Splittable[float] | Splittable[list[float]] | None = None, annotation_path: Splittable[os.PathLike[str]] | Splittable[str] | None = None, binarization_sensitivity: Splittable[float] = 0.25, search_radius: Splittable[int] = 60, position_weight: Splittable[float] = 0.4, line_thickness: Splittable[int] = 10, line_gap_fill: Splittable[int] = 4, intersection_kernel_size: Splittable[int] = 41, detection_scale: Splittable[float] = 1.0, pathfinding_threshold: Splittable[float] = 0.2, min_rows: Splittable[int] = 5, extrapolation_distance: Splittable[int] = 3, detection_threshold: Splittable[float] = 0.3, smooth: bool = False, smooth_strength: float = 0.5, smooth_iterations: int = 1, smooth_degree: int = 1, growing_resets: Splittable[int] = 0, reset_fraction: Splittable[float] = 0.5, feature_detector: Splittable[typing.Literal['orb', 'sift', 'surf', 'akaze']] = 'akaze', matching_scale: float = 1.0, auto_row_heights: bool = False, min_row_height_factor: Splittable[float] = 0.5, max_row_height_factor: Splittable[float] = 1.5, row_detection_path_scale: float = 0.25) View Source

 66    def __init__(
 67        self,
 68        template_path: Splittable[PathLike[str]] | Splittable[str],
 69        row_height_factor: Splittable[float] | Splittable[list[float]] | None = None,
 70        annotation_path: Splittable[PathLike[str]] | Splittable[str] | None = None,
 71        binarization_sensitivity: Splittable[float] = 0.25,
 72        search_radius: Splittable[int] = 60,
 73        position_weight: Splittable[float] = 0.4,
 74        line_thickness: Splittable[int] = 10,
 75        line_gap_fill: Splittable[int] = 4,
 76        intersection_kernel_size: Splittable[int] = 41,
 77        detection_scale: Splittable[float] = 1.0,
 78        pathfinding_threshold: Splittable[float] = 0.2,
 79        min_rows: Splittable[int] = 5,
 80        extrapolation_distance: Splittable[int] = 3,
 81        detection_threshold: Splittable[float] = 0.3,
 82        smooth: bool = False,
 83        smooth_strength: float = 0.5,
 84        smooth_iterations: int = 1,
 85        smooth_degree: int = 1,
 86        growing_resets: Splittable[int] = 0,
 87        reset_fraction: Splittable[float] = 0.5,
 88        feature_detector: Splittable[FeatureDetector] = "akaze",
 89        matching_scale: float = 1.0,
 90        auto_row_heights: bool = False,
 91        min_row_height_factor: Splittable[float] = 0.5,
 92        max_row_height_factor: Splittable[float] = 1.5,
 93        row_detection_path_scale: float = 0.25,
 94    ):
 95        """
 96        Args:
 97            template_path: Path to header template image(s). Use `Split` for two-page tables.
 98            row_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
 99            annotation_path: Explicit annotation JSON path. Default: inferred from image path.
100            binarization_sensitivity: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
101            search_radius: Corner search area in pixels. Default: 60
102            position_weight: Position penalty weight [0, 1]. Default: 0.4
103            line_thickness: Cross-kernel width matching line thickness. Default: 10
104            line_gap_fill: Morphological dilation size. Default: 4
105            intersection_kernel_size: Cross-kernel size (odd). Default: 41
106            detection_scale: Image downscale factor (0, 1]. Default: 1.0
107            pathfinding_threshold: Confidence to skip A* pathfinding. Default: 0.2
108            min_rows: Minimum rows before completion. Default: 5
109            extrapolation_distance: Rows to examine for extrapolation. Default: 3
110            detection_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
111            smooth: Apply grid smoothing after detection. Default: False
112            smooth_strength: Blend factor per smoothing iteration (0.0-1.0). Default: 0.5
113            smooth_iterations: Number of smoothing passes. Default: 1
114            smooth_degree: Polynomial degree for smoothing regression (1 or 2). Default: 1
115            growing_resets: Number of grid cuts during growing. Default: 0
116            reset_fraction: Fraction of points to delete per cut. Default: 0.5
117            feature_detector: Feature matching method for header alignment. One of "orb"
118                (fast, patent-free), "sift" (robust, uses FLANN), or "akaze" (robust,
119                patent-free). Default: "akaze"
120            matching_scale: Downscale factor (0, 1] for header alignment only. Lower
121                values speed up feature matching. Default: 1.0
122            auto_row_heights: If True, detect variable per-row heights from the
123                cross-correlation map at runtime (overriding `row_height_factor`).
124                Default: False
125            min_row_height_factor: Minimum row height as a fraction of the header
126                height when `auto_row_heights` is enabled. Default: 0.5
127            max_row_height_factor: Maximum row height as a fraction of the header
128                height when `auto_row_heights` is enabled. Default: 1.5
129            row_detection_path_scale: Downscale factor (0, 1] for the A* path
130                following used by `auto_row_heights`. Lower = faster, less precise.
131                Default: 0.25
132        """
133        self._detection_scale = detection_scale
134        self._auto_row_heights = auto_row_heights
135        self._min_row_height_factor = min_row_height_factor
136        self._max_row_height_factor = max_row_height_factor
137        self._row_detection_path_scale = row_detection_path_scale
138        self._smooth = smooth
139        self._smooth_strength = smooth_strength
140        self._smooth_iterations = smooth_iterations
141        self._smooth_degree = smooth_degree
142
143        if row_height_factor is None:
144            row_height_factor = [1.0]
145
146        self._row_height_factor = row_height_factor
147
148        if isinstance(template_path, Split) or isinstance(annotation_path, Split):
149            header = Split(Path(template_path.left), Path(template_path.right))  # ty:ignore[unresolved-attribute]
150
151            if not exists(header.left.with_suffix(".png")) or not exists(
152                header.right.with_suffix(".png")
153            ):
154                raise TauluException(
155                    "The header images you provided do not exist (or they aren't .png files)"
156                )
157
158            if annotation_path is None:
159                if not exists(header.left.with_suffix(".json")) or not exists(
160                    header.right.with_suffix(".json")
161                ):
162                    raise TauluException(
163                        "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method"
164                    )
165
166                template_left = TableTemplate.from_saved(
167                    header.left.with_suffix(".json")
168                )
169                template_right = TableTemplate.from_saved(
170                    header.right.with_suffix(".json")
171                )
172
173            else:
174                if not exists(annotation_path.left) or not exists(  # ty: ignore[unresolved-attribute]
175                    annotation_path.right  # ty: ignore[unresolved-attribute]
176                ):
177                    raise TauluException(
178                        "The header annotation files you provided do not exist (or they aren't .json files)"
179                    )
180
181                template_left = TableTemplate.from_saved(annotation_path.left)  # ty: ignore[unresolved-attribute]
182                template_right = TableTemplate.from_saved(annotation_path.right)  # ty: ignore[unresolved-attribute]
183
184            self._header = Split(
185                cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right))
186            )
187
188            self._aligner = Split(
189                TemplateMatcher(
190                    self._header.left,
191                    method=get_param(feature_detector, "left"),
192                    scale=matching_scale,
193                ),
194                TemplateMatcher(
195                    self._header.right,
196                    method=get_param(feature_detector, "right"),
197                    scale=matching_scale,
198                ),
199            )
200
201            self._template = Split(template_left, template_right)
202
203            self._cell_heights = Split(
204                self._template.left.cell_heights(get_param(row_height_factor, "left")),
205                self._template.right.cell_heights(
206                    get_param(row_height_factor, "right")
207                ),
208            )
209
210            # Create TableDetector for left and right with potentially different parameters
211            self._grid_detector = Split(
212                TableDetector(
213                    intersection_kernel_size=get_param(
214                        intersection_kernel_size, "left"
215                    ),
216                    line_thickness=get_param(line_thickness, "left"),
217                    line_gap_fill=get_param(line_gap_fill, "left"),
218                    search_radius=get_param(search_radius, "left"),
219                    binarization_sensitivity=get_param(
220                        binarization_sensitivity, "left"
221                    ),
222                    position_weight=get_param(position_weight, "left"),
223                    detection_scale=get_param(self._detection_scale, "left"),
224                    pathfinding_threshold=get_param(pathfinding_threshold, "left"),
225                    min_rows=get_param(min_rows, "left"),
226                    extrapolation_distance=get_param(extrapolation_distance, "left"),
227                    detection_threshold=get_param(detection_threshold, "left"),
228                    growing_resets=get_param(growing_resets, "left"),
229                    reset_fraction=get_param(reset_fraction, "left"),
230                ),
231                TableDetector(
232                    intersection_kernel_size=get_param(
233                        intersection_kernel_size, "right"
234                    ),
235                    line_thickness=get_param(line_thickness, "right"),
236                    line_gap_fill=get_param(line_gap_fill, "right"),
237                    search_radius=get_param(search_radius, "right"),
238                    binarization_sensitivity=get_param(
239                        binarization_sensitivity, "right"
240                    ),
241                    position_weight=get_param(position_weight, "right"),
242                    detection_scale=get_param(self._detection_scale, "right"),
243                    pathfinding_threshold=get_param(pathfinding_threshold, "right"),
244                    min_rows=get_param(min_rows, "right"),
245                    extrapolation_distance=get_param(extrapolation_distance, "right"),
246                    detection_threshold=get_param(detection_threshold, "right"),
247                    growing_resets=get_param(growing_resets, "right"),
248                    reset_fraction=get_param(reset_fraction, "right"),
249                ),
250            )
251
252        else:
253            template_path = Path(template_path)
254            self._header = cv2.imread(os.fspath(template_path))
255            self._aligner = TemplateMatcher(
256                self._header,
257                method=cast(FeatureDetector, feature_detector),
258                scale=matching_scale,
259            )
260            self._template = TableTemplate.from_saved(
261                template_path.with_suffix(".json")
262            )
263
264            # For single header, parameters should not be Split objects
265            if any(
266                isinstance(param, Split)
267                for param in [
268                    binarization_sensitivity,
269                    search_radius,
270                    position_weight,
271                    line_thickness,
272                    line_gap_fill,
273                    intersection_kernel_size,
274                    detection_scale,
275                    min_rows,
276                    extrapolation_distance,
277                    detection_threshold,
278                    row_height_factor,
279                    growing_resets,
280                    reset_fraction,
281                    feature_detector,
282                ]
283            ):
284                raise TauluException(
285                    "Split parameters can only be used with split headers (tuple header_path)"
286                )
287
288            self._cell_heights = self._template.cell_heights(
289                cast(list[float] | float, self._row_height_factor)
290            )
291
292            self._grid_detector = TableDetector(
293                intersection_kernel_size=intersection_kernel_size,  # ty: ignore
294                line_thickness=line_thickness,  # ty: ignore
295                line_gap_fill=line_gap_fill,  # ty: ignore
296                search_radius=search_radius,  # ty: ignore
297                binarization_sensitivity=binarization_sensitivity,  # ty: ignore
298                position_weight=position_weight,  # ty: ignore
299                detection_scale=self._detection_scale,  # ty: ignore
300                pathfinding_threshold=pathfinding_threshold,  # ty: ignore
301                min_rows=min_rows,  # ty: ignore
302                extrapolation_distance=extrapolation_distance,  # ty: ignore
303                detection_threshold=detection_threshold,  # ty: ignore
304                growing_resets=growing_resets,  # ty:ignore
305                reset_fraction=reset_fraction,  # ty:ignore
306            )

Arguments:

template_path: Path to header template image(s). Use Split for two-page tables.
row_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
annotation_path: Explicit annotation JSON path. Default: inferred from image path.
binarization_sensitivity: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
search_radius: Corner search area in pixels. Default: 60
position_weight: Position penalty weight [0, 1]. Default: 0.4
line_thickness: Cross-kernel width matching line thickness. Default: 10
line_gap_fill: Morphological dilation size. Default: 4
intersection_kernel_size: Cross-kernel size (odd). Default: 41
detection_scale: Image downscale factor (0, 1]. Default: 1.0
pathfinding_threshold: Confidence to skip A* pathfinding. Default: 0.2
min_rows: Minimum rows before completion. Default: 5
extrapolation_distance: Rows to examine for extrapolation. Default: 3
detection_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
smooth: Apply grid smoothing after detection. Default: False
smooth_strength: Blend factor per smoothing iteration (0.0-1.0). Default: 0.5
smooth_iterations: Number of smoothing passes. Default: 1
smooth_degree: Polynomial degree for smoothing regression (1 or 2). Default: 1
growing_resets: Number of grid cuts during growing. Default: 0
reset_fraction: Fraction of points to delete per cut. Default: 0.5
feature_detector: Feature matching method for header alignment. One of "orb" (fast, patent-free), "sift" (robust, uses FLANN), or "akaze" (robust, patent-free). Default: "akaze"
matching_scale: Downscale factor (0, 1] for header alignment only. Lower values speed up feature matching. Default: 1.0
auto_row_heights: If True, detect variable per-row heights from the cross-correlation map at runtime (overriding row_height_factor). Default: False
min_row_height_factor: Minimum row height as a fraction of the header height when auto_row_heights is enabled. Default: 0.5
max_row_height_factor: Maximum row height as a fraction of the header height when auto_row_heights is enabled. Default: 1.5
row_detection_path_scale: Downscale factor (0, 1] for the A* path following used by auto_row_heights. Lower = faster, less precise. Default: 0.25

@classmethod

def from_config(cls, config: TauluConfig) -> Taulu: View Source

308    @classmethod
309    def from_config(cls, config: TauluConfig) -> "Taulu":
310        """
311        Create a :class:`Taulu` instance from a :class:`~taulu.config.TauluConfig`.
312
313        Args:
314            config: A populated :class:`~taulu.config.TauluConfig` instance.
315
316        Returns:
317            A :class:`Taulu` instance configured according to ``config``.
318
319        Example:
320            >>> from taulu import Taulu
321            >>> from taulu.config import TauluConfig
322            >>> config = TauluConfig.from_toml("my_table.toml")
323            >>> taulu = Taulu.from_config(config)
324        """
325        return cls(**{name: getattr(config, name) for name in config.model_fields})

Create a Taulu instance from a ~taulu.config.TauluConfig.

Arguments:

config: A populated ~taulu.config.TauluConfig instance.

Returns:

A Taulu instance configured according to config.

Example:

>>> from taulu import Taulu
>>> from taulu.config import TauluConfig
>>> config = TauluConfig.from_toml("my_table.toml")
>>> taulu = Taulu.from_config(config)

@staticmethod

def annotate( image_path: os.PathLike[str] | str, output_path: os.PathLike[str] | str, *, backend: Literal['auto', 'gui', 'notebook'] = 'auto'): View Source

327    @staticmethod
328    def annotate(
329        image_path: PathLike[str] | str,
330        output_path: PathLike[str] | str,
331        *,
332        backend: Literal["auto", "gui", "notebook"] = "auto",
333    ):
334        """
335        Interactive tool to create header annotations for table segmentation.
336
337        This method guides you through a two-step annotation process:
338
339        1. **Crop the header**: Click four corners to define the header region
340        2. **Annotate lines**: Click pairs of points to define each vertical and
341           horizontal line in the header
342
343        The annotations are saved as:
344        - A cropped header image (.png) at `output_path`
345        - A JSON file (.json) containing line coordinates
346
347        ## Annotation Guidelines
348
349        **Which lines to annotate:**
350        - All vertical lines that extend into the table body (column separators)
351        - The top horizontal line of the header
352        - The bottom horizontal line of the header (top of data rows)
353
354        **Order doesn't matter** - annotate lines in any order that's convenient.
355
356        **To annotate a line:**
357        1. Click once at one endpoint
358        2. Click again at the other endpoint
359        3. A green line appears showing your annotation
360
361        **To undo:**
362        - Right-click anywhere to remove the last line you drew
363
364        **When finished:**
365        - Press 'n' to save and exit
366        - Press 'q' to quit without saving
367
368        Args:
369            image_path (PathLike[str] | str): Path to a table image containing
370                a clear view of the header. This can be a full table image.
371            output_path (PathLike[str] | str): Where to save the cropped header
372                image. The annotation JSON will be saved with the same name but
373                .json extension.
374
375        Raises:
376            TauluException: If image_path doesn't exist or output_path is a directory
377
378        Examples:
379            Annotate a single header:
380
381            >>> from taulu import Taulu
382            >>> Taulu.annotate("scan_page_01.png", "header.png")
383            # Interactive window opens
384            # After annotation: creates header.png and header.json
385
386            Annotate left and right headers for a split table:
387
388            >>> Taulu.annotate("scan_page_01.png", "header_left.png")
389            >>> Taulu.annotate("scan_page_01.png", "header_right.png")
390            # Creates header_left.{png,json} and header_right.{png,json}
391
392        Notes:
393            - The header image doesn't need to be perfectly cropped initially -
394              the tool will help you crop it precisely
395            - Annotation accuracy is important: misaligned lines will cause
396              segmentation errors
397            - You can re-run this method to update annotations if needed
398        """
399
400        if not exists(image_path):
401            raise TauluException(f"Image path {image_path} does not exist")
402
403        if os.path.isdir(output_path):
404            raise TauluException("Output path should be a file")
405
406        output_path = Path(output_path)
407
408        def running_in_notebook() -> bool:
409            try:
410                from IPython import get_ipython
411
412                ip = get_ipython()
413                return ip is not None and "IPKernelApp" in ip.config
414            except Exception:
415                return False
416
417        # Decide backend
418        if backend not in ("auto", "gui", "notebook"):
419            raise TauluException("backend must be one of: 'auto', 'gui', 'notebook'")
420        if backend == "auto":
421            use_notebook = running_in_notebook()
422        else:
423            use_notebook = backend == "notebook"
424
425        if use_notebook:
426            # Notebook way
427            logger.info(
428                "\x1b[32mNotebook environment detected/selected. Using notebook annotation backend."
429            )
430            session = TableTemplate.annotate_image_notebook(
431                os.fspath(image_path), crop=output_path.with_suffix(".png")
432            )
433            session._save_path = output_path.with_suffix(".json")  # ty: ignore[unresolved-attribute]
434            return session
435
436        else:
437            # GUI way
438            template = TableTemplate.annotate_image(
439                os.fspath(image_path), crop=output_path.with_suffix(".png")
440            )
441            template.save(output_path.with_suffix(".json"))

Interactive tool to create header annotations for table segmentation.

This method guides you through a two-step annotation process:

Crop the header: Click four corners to define the header region
Annotate lines: Click pairs of points to define each vertical and horizontal line in the header

The annotations are saved as:

A cropped header image (.png) at output_path
A JSON file (.json) containing line coordinates

Annotation Guidelines

Which lines to annotate:

All vertical lines that extend into the table body (column separators)
The top horizontal line of the header
The bottom horizontal line of the header (top of data rows)

Order doesn't matter - annotate lines in any order that's convenient.

To annotate a line:

Click once at one endpoint
Click again at the other endpoint
A green line appears showing your annotation

To undo:

Right-click anywhere to remove the last line you drew

When finished:

Press 'n' to save and exit
Press 'q' to quit without saving

Arguments:

image_path (PathLike[str] | str): Path to a table image containing a clear view of the header. This can be a full table image.
output_path (PathLike[str] | str): Where to save the cropped header image. The annotation JSON will be saved with the same name but .json extension.

Raises:

TauluException: If image_path doesn't exist or output_path is a directory

Examples:

Annotate a single header:

>>> from taulu import Taulu
>>> Taulu.annotate("scan_page_01.png", "header.png")
<h1 id="interactive-window-opens">Interactive window opens</h1>

After annotation: creates header.png and header.json

Annotate left and right headers for a split table:

>>> Taulu.annotate("scan_page_01.png", "header_left.png")
>>> Taulu.annotate("scan_page_01.png", "header_right.png")
<h1 id="creates-header_leftpngjson-and-header_rightpngjson">Creates header_left.{png,json} and header_right.{png,json}</h1>

Notes:

The header image doesn't need to be perfectly cropped initially - the tool will help you crop it precisely

Annotation accuracy is important: misaligned lines will cause segmentation errors

You can re-run this method to update annotations if needed

def segment_table( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str], filtered: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str, NoneType] = None, debug_view: bool = False, debug_view_notebook: bool = False) -> SegmentedTable: View Source

443    def segment_table(
444        self,
445        image: MatLike | PathLike[str] | str,
446        filtered: MatLike | PathLike[str] | str | None = None,
447        debug_view: bool = False,
448        debug_view_notebook: bool = False,
449    ) -> SegmentedTable:
450        """
451        Segment a table image into a grid of cells.
452
453        Orchestrates header alignment, grid detection, corner growing, and
454        extrapolation to produce a complete grid structure.
455
456        Args:
457            image: Table image to segment (file path or numpy array).
458            filtered: Optional pre-filtered binary image for corner detection.
459                If provided, binarization parameters are ignored.
460            debug_view: Show intermediate processing steps via OpenCV windows. Press 'n' to advance,
461                'q' to quit. Default: False
462            debug_view_notebook: Show intermediate processing steps inline in a Jupyter notebook
463                using matplotlib. Default: False
464
465        Returns:
466            SegmentedTable: Grid structure with methods for cell access (`crop_cell`,
467                `cell_polygon`), visualization (`show_cells`), and persistence
468                (`save`, `from_saved`).
469
470        Raises:
471            TauluException: If image cannot be loaded or grid detection fails.
472        """
473
474        if not isinstance(image, MatLike):
475            image = cast(str | PathLike[str], image)
476            tmp_image = cv2.imread(os.fspath(image))
477            assert tmp_image is not None
478            image = tmp_image
479
480        now = perf_counter()
481        h = self._aligner.align(
482            image,  # ty: ignore[invalid-argument-type]
483            visual=debug_view,
484            visual_notebook=debug_view_notebook,
485        )
486        align_time = perf_counter() - now
487        logger.info(f"Header alignment took {align_time:.2f} seconds")
488
489        # find the starting point for the table grid algorithm
490
491        def make_top_row(template: TableTemplate, aligner: TemplateMatcher, h: NDArray):
492            top_row = []
493            for x in range(template.cols + 1):
494                on_template = template.intersection((1, x))
495                on_template = (int(on_template[0]), int(on_template[1]))
496
497                on_img = aligner.template_to_img(h, on_template)
498
499                top_row.append(on_img)
500
501            return top_row
502
503        if isinstance(self._aligner, Split):
504            top_row = Split(
505                make_top_row(self._template.left, self._aligner.left, h.left),  # ty:ignore
506                make_top_row(self._template.right, self._aligner.right, h.right),  # ty:ignore
507            )
508        else:
509            top_row = make_top_row(self._template, self._aligner, h)  # ty:ignore
510
511        cell_heights = self._cell_heights
512        filtered_pre: MatLike | Split | None = None
513
514        if self._auto_row_heights:
515            now_ar = perf_counter()
516            if isinstance(self._grid_detector, Split):
517                filtered_pre = self._grid_detector.apply(image, visual=debug_view)  # ty:ignore
518                assert isinstance(self._template, Split)
519                header_h = Split(
520                    self._template.left.cell_height(1.0),  # ty:ignore[unresolved-attribute]
521                    self._template.right.cell_height(1.0),  # ty:ignore[unresolved-attribute]
522                )
523                min_h = Split(
524                    int(header_h.left * get_param(self._min_row_height_factor, "left")),
525                    int(
526                        header_h.right * get_param(self._min_row_height_factor, "right")
527                    ),
528                )
529                max_h = Split(
530                    int(header_h.left * get_param(self._max_row_height_factor, "left")),
531                    int(
532                        header_h.right * get_param(self._max_row_height_factor, "right")
533                    ),
534                )
535                detected = self._grid_detector.detect_row_heights(
536                    image,
537                    filtered_pre,
538                    top_row,
539                    min_h,
540                    max_h,
541                    path_scale=self._row_detection_path_scale,
542                )
543                # detected is Split[list[int]]; fall back per side if empty.
544                cell_heights = Split(
545                    detected.left or self._cell_heights.left,  # ty:ignore[unresolved-attribute]
546                    detected.right or self._cell_heights.right,  # ty:ignore[unresolved-attribute]
547                )
548            else:
549                filtered_pre = self._grid_detector.apply(image, visual=debug_view)  # ty:ignore
550                header_h_one = self._template.cell_height(1.0)
551                min_h_one = int(header_h_one * cast(float, self._min_row_height_factor))
552                max_h_one = int(header_h_one * cast(float, self._max_row_height_factor))
553                detected_one = self._grid_detector.detect_row_heights(
554                    image,  # ty:ignore[invalid-argument-type]
555                    filtered_pre,
556                    top_row,  # ty:ignore
557                    min_h_one,
558                    max_h_one,
559                    path_scale=self._row_detection_path_scale,
560                )
561                cell_heights = detected_one or self._cell_heights
562            ar_time = perf_counter() - now_ar
563            logger.info(f"Row-height detection took {ar_time:.2f} seconds")
564
565        now = perf_counter()
566        table = self._grid_detector.find_table_points(
567            image,  # ty:ignore
568            top_row,  # ty:ignore
569            self._template.cell_widths(0),
570            cell_heights,  # ty:ignore
571            visual=debug_view,
572            visual_notebook=debug_view_notebook,
573            filtered=filtered if filtered is not None else filtered_pre,  # ty:ignore
574            smooth=self._smooth,
575            smooth_strength=self._smooth_strength,
576            smooth_iterations=self._smooth_iterations,
577            smooth_degree=self._smooth_degree,
578        )
579        grid_time = perf_counter() - now
580        logger.info(f"Grid detection took {grid_time:.2f} seconds")
581
582        if debug_view_notebook:
583            self._aligner.show_matches_notebook()
584
585        if isinstance(table, Split):
586            table = SegmentedTable.from_split(table, (0, 0))  # ty: ignore
587
588        return table

Segment a table image into a grid of cells.

Orchestrates header alignment, grid detection, corner growing, and extrapolation to produce a complete grid structure.

Arguments:

image: Table image to segment (file path or numpy array).
filtered: Optional pre-filtered binary image for corner detection. If provided, binarization parameters are ignored.
debug_view: Show intermediate processing steps via OpenCV windows. Press 'n' to advance, 'q' to quit. Default: False
debug_view_notebook: Show intermediate processing steps inline in a Jupyter notebook using matplotlib. Default: False

Returns:

SegmentedTable: Grid structure with methods for cell access (crop_cell, cell_polygon), visualization (show_cells), and persistence (save, from_saved).

Raises:

TauluException: If image cannot be loaded or grid detection fails.

class TauluConfig(pydantic.main.BaseModel): View Source

 47class TauluConfig(BaseModel):
 48    """
 49    Configuration for :class:`~taulu.Taulu`.
 50
 51    All parameters mirror the ``Taulu.__init__`` signature. Any parameter that
 52    accepts a ``Split[T]`` can be given as a ``Split`` instance or as a plain
 53    scalar (applied to both sides).
 54
 55    Use :meth:`from_toml` to load from a ``.toml`` file, then pass to
 56    :meth:`Taulu.from_config <taulu.Taulu.from_config>`.
 57    """
 58
 59    model_config = ConfigDict(arbitrary_types_allowed=True)
 60
 61    template_path: Splittable[str] = Field(
 62        description="Path to header template image(s). Use left/right split for two-page tables.",
 63    )
 64    row_height_factor: Splittable[float] | Splittable[list[float]] | None = Field(
 65        default=None,
 66        description="Row height relative to header (e.g. 0.8 for 80%). Default: [1.0]",
 67    )
 68    annotation_path: Splittable[str] | None = Field(
 69        default=None,
 70        description="Explicit annotation JSON path. Default: inferred from template_path.",
 71    )
 72    binarization_sensitivity: Splittable[float] = Field(
 73        default=0.25,
 74        description="Binarization threshold (0.0-1.0). Higher = less noise.",
 75    )
 76    search_radius: Splittable[int] = Field(
 77        default=60,
 78        description="Corner search area in pixels.",
 79    )
 80    position_weight: Splittable[float] = Field(
 81        default=0.4,
 82        description="Position penalty weight [0, 1].",
 83    )
 84    line_thickness: Splittable[int] = Field(
 85        default=10,
 86        description="Cross-kernel width matching line thickness.",
 87    )
 88    line_gap_fill: Splittable[int] = Field(
 89        default=4,
 90        description="Morphological dilation size for gap filling.",
 91    )
 92    intersection_kernel_size: Splittable[int] = Field(
 93        default=41,
 94        description="Cross-kernel size (must be odd).",
 95    )
 96    detection_scale: Splittable[float] = Field(
 97        default=1.0,
 98        description="Image downscale factor (0, 1].",
 99    )
100    pathfinding_threshold: Splittable[float] = Field(
101        default=0.2,
102        description="Confidence threshold to skip A* pathfinding.",
103    )
104    min_rows: Splittable[int] = Field(
105        default=5,
106        description="Minimum rows before completion.",
107    )
108    extrapolation_distance: Splittable[int] = Field(
109        default=3,
110        description="Rows to examine for extrapolation.",
111    )
112    detection_threshold: Splittable[float] = Field(
113        default=0.3,
114        description="Corner acceptance confidence [0, 1].",
115    )
116    smooth: bool = Field(
117        default=False,
118        description="Apply grid smoothing after detection.",
119    )
120    smooth_strength: float = Field(
121        default=0.5,
122        description="Blend factor per smoothing iteration (0.0-1.0).",
123    )
124    smooth_iterations: int = Field(
125        default=1,
126        description="Number of smoothing passes.",
127    )
128    smooth_degree: int = Field(
129        default=1,
130        description="Polynomial degree for smoothing regression (1 or 2).",
131    )
132    growing_resets: Splittable[int] = Field(
133        default=0,
134        description="Number of grid resets during growing.",
135    )
136    reset_fraction: Splittable[float] = Field(
137        default=0.5,
138        description="Fraction of points to delete per reset.",
139    )
140    feature_detector: Splittable[Literal["orb", "sift", "akaze"]] = Field(
141        default="akaze",
142        description="Feature matching method: 'orb' (fast), 'sift' (robust), 'akaze'.",
143    )
144    matching_scale: float = Field(
145        default=1.0,
146        description="Downscale factor (0, 1] for header alignment only.",
147    )
148    auto_row_heights: bool = Field(
149        default=False,
150        description="If True, detect variable per-row heights from the cross-correlation map (overrides row_height_factor).",
151    )
152    min_row_height_factor: Splittable[float] = Field(
153        default=0.5,
154        description="Minimum row height as a fraction of header height when auto_row_heights is enabled.",
155    )
156    max_row_height_factor: Splittable[float] = Field(
157        default=1.5,
158        description="Maximum row height as a fraction of header height when auto_row_heights is enabled.",
159    )
160    row_detection_path_scale: float = Field(
161        default=0.25,
162        description="Downscale factor (0, 1] for the A* path following used by auto row height detection.",
163    )
164
165    @classmethod
166    def from_toml(cls, *paths: PathLike[str] | str) -> "TauluConfig":
167        """
168        Load a :class:`TauluConfig` from one or more TOML files.
169
170        When multiple paths are given, files are merged in order: later files
171        override keys from earlier ones. Use this to share a common base config
172        and override only the fields that differ::
173
174            config = TauluConfig.from_toml("common.toml", "left.toml")
175
176        Args:
177            *paths: One or more paths to ``.toml`` configuration files.
178
179        Returns:
180            A fully populated :class:`TauluConfig` instance.
181
182        Raises:
183            KeyError: If a required field (``template_path``) is missing.
184            TypeError: If a field value has an unexpected type.
185        """
186        merged: dict = {}
187        for path in paths:
188            with open(path, "rb") as f:
189                data = tomllib.load(f)
190            merged.update(data)
191
192        parsed = {
193            key: _parse_value(value)
194            for key, value in merged.items()
195            if not key.startswith("$")
196        }
197        return cls(**parsed)

Configuration for ~taulu.Taulu.

All parameters mirror the Taulu.__init__ signature. Any parameter that accepts a Split[T] can be given as a Split instance or as a plain scalar (applied to both sides).

Use from_toml() to load from a .toml file, then pass to Taulu.from_config <taulu.Taulu.from_config>().

model_config = {'arbitrary_types_allowed': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

template_path: Splittable[str]

row_height_factor: Splittable[float] | Splittable[list[float]] | None

annotation_path: Splittable[str] | None

binarization_sensitivity: Splittable[float]

search_radius: Splittable[int]

position_weight: Splittable[float]

line_thickness: Splittable[int]

line_gap_fill: Splittable[int]

intersection_kernel_size: Splittable[int]

detection_scale: Splittable[float]

pathfinding_threshold: Splittable[float]

min_rows: Splittable[int]

extrapolation_distance: Splittable[int]

detection_threshold: Splittable[float]

smooth: bool

smooth_strength: float

smooth_iterations: int

smooth_degree: int

growing_resets: Splittable[int]

reset_fraction: Splittable[float]

feature_detector: Splittable[typing.Literal['orb', 'sift', 'akaze']]

matching_scale: float

auto_row_heights: bool

min_row_height_factor: Splittable[float]

max_row_height_factor: Splittable[float]

row_detection_path_scale: float

@classmethod

def from_toml(cls, *paths: os.PathLike[str] | str) -> TauluConfig: View Source

165    @classmethod
166    def from_toml(cls, *paths: PathLike[str] | str) -> "TauluConfig":
167        """
168        Load a :class:`TauluConfig` from one or more TOML files.
169
170        When multiple paths are given, files are merged in order: later files
171        override keys from earlier ones. Use this to share a common base config
172        and override only the fields that differ::
173
174            config = TauluConfig.from_toml("common.toml", "left.toml")
175
176        Args:
177            *paths: One or more paths to ``.toml`` configuration files.
178
179        Returns:
180            A fully populated :class:`TauluConfig` instance.
181
182        Raises:
183            KeyError: If a required field (``template_path``) is missing.
184            TypeError: If a field value has an unexpected type.
185        """
186        merged: dict = {}
187        for path in paths:
188            with open(path, "rb") as f:
189                data = tomllib.load(f)
190            merged.update(data)
191
192        parsed = {
193            key: _parse_value(value)
194            for key, value in merged.items()
195            if not key.startswith("$")
196        }
197        return cls(**parsed)

Load a TauluConfig from one or more TOML files.

When multiple paths are given, files are merged in order: later files override keys from earlier ones. Use this to share a common base config and override only the fields that differ::

config = TauluConfig.from_toml("common.toml", "left.toml")

Arguments:

*paths: One or more paths to .toml configuration files.

Returns:

A fully populated TauluConfig instance.

Raises:

KeyError: If a required field (template_path) is missing.
TypeError: If a field value has an unexpected type.

class TemplateMatcher: View Source

 27class TemplateMatcher:
 28    """
 29    Aligns table header templates to subject images using feature-based registration.
 30
 31    This class supports multiple feature detection and matching methods to compute
 32    a homography transformation that maps points from a header template image to
 33    their corresponding locations in full table images.
 34
 35    ## How it Works
 36
 37    1. **Feature Detection**: Extracts keypoints from both template and subject
 38    2. **Feature Matching**: Finds correspondences using the selected matcher
 39    3. **Filtering**: Keeps top matches and prunes based on spatial consistency
 40    4. **Homography Estimation**: Computes perspective transform using RANSAC
 41
 42    The computed homography can then transform any point from template space to
 43    image space, allowing you to locate table structures based on your annotation.
 44
 45    ## Available Methods
 46
 47    - **orb** (default): ORB features with BFMatcher (Hamming distance). Fast and
 48      patent-free. Good for most use cases.
 49    - **sift**: SIFT features with FLANN-based matcher. More robust to scale and
 50      rotation changes. Slower but often more accurate.
 51    - **surf**: SURF features with BFMatcher (L2 norm). Requires opencv-contrib-python
 52      with non-free modules enabled. Fast and robust.
 53    - **akaze**: AKAZE features with BFMatcher (Hamming distance). Patent-free,
 54      handles scale/rotation well, and often more robust than ORB on documents.
 55
 56    ## Preprocessing Options
 57
 58    - Set `k` parameter to apply Sauvola thresholding before feature detection.
 59      This can improve matching on documents with variable lighting.
 60    - Set `k=None` to use raw images (just extract blue channel for BGR images)
 61
 62    ## Tuning Guidelines
 63
 64    - **max_features**: Increase if matching fails on complex templates
 65    - **match_fraction**: Decrease if you get many incorrect matches
 66    - **max_dist**: Increase for documents with more warping/distortion
 67    - **scale**: Decrease (<1.0) to speed up on high-resolution images
 68
 69    Args:
 70        template (MatLike | PathLike[str] | str | None): Header template image or path.
 71            This should contain a clear, representative view of the table header.
 72        method (FeatureDetector): Feature detection/matching method. One of "orb", "sift",
 73            or "surf". Default is "orb".
 74        max_features (int): Maximum features to detect. More features = slower
 75            but potentially more robust matching.
 76        patch_size (int): ORB patch size for feature extraction (only used with "orb").
 77        match_fraction (float): Fraction [0, 1] of matches to keep after sorting by
 78            quality. Higher = more matches but potentially more outliers.
 79        scale (float): Image downscaling factor (0, 1] for processing speed.
 80        max_dist (float): Maximum allowed distance (relative to image size) between
 81            matched keypoints. Filters out spatially inconsistent matches.
 82        k (float | None): Sauvola threshold parameter for preprocessing. If None,
 83            no thresholding is applied. Typical range: 0.03-0.15.
 84    """
 85
 86    def __init__(
 87        self,
 88        template: None | MatLike | PathLike[str] | str = None,
 89        method: FeatureDetector = "orb",
 90        max_features: int = 100_000,
 91        patch_size: int = 31,
 92        match_fraction: float = 0.3,
 93        scale: float = 1.0,
 94        max_dist: float = 1.00,
 95        k: float | None = None,
 96    ):
 97        """
 98        Args:
 99            template (MatLike | str): (path of) template image, with the table template clearly visible
100            method (FeatureDetector): feature detection/matching method ("orb", "sift", or "surf")
101            max_features (int): maximal number of features that will be extracted
102            patch_size (int): for ORB feature extractor (only used with method="orb")
103            match_fraction (float): best fraction of matches that are kept
104            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
105            max_dist (float): maximum distance (relative to image size) of matched features.
106                Increase this value if the warping between image and template needs to be more agressive
107            k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
108        """
109
110        if type(template) is str or type(template) is PathLike:
111            value = cv.imread(fspath(template))
112            template = value
113
114        self._method = method
115        self._k = k
116        if scale > 1.0:
117            raise TauluException(
118                "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0"
119            )
120        if scale == 0:
121            raise TauluException("Use 0 < scale <= 1.0")
122
123        self._scale = scale
124        self._template = self._scale_img(cast(MatLike, template))
125        self._template_orig: None | MatLike = None
126        self._preprocess_template()
127        self._max_features = max_features
128        self._patch_size = patch_size
129        self._match_fraction = match_fraction
130        self._max_dist = max_dist
131        self._validate_method()
132        self._matches_notebook_img = None
133
134    def _scale_img(self, img: MatLike) -> MatLike:
135        if self._scale == 1.0:
136            return img
137
138        return cv.resize(img, None, fx=self._scale, fy=self._scale)
139
140    def _unscale_img(self, img: MatLike) -> MatLike:
141        if self._scale == 1.0:
142            return img
143
144        return cv.resize(img, None, fx=1 / self._scale, fy=1 / self._scale)
145
146    def _unscale_homography(self, h: np.ndarray) -> np.ndarray:
147        if self._scale == 1.0:
148            return h
149
150        scale_matrix = np.diag([self._scale, self._scale, 1.0])
151        # inv_scale_matrix = np.linalg.inv(scale_matrix)
152        inv_scale_matrix = np.diag([1.0 / self._scale, 1.0 / self._scale, 1.0])
153        # return inv_scale_matrix @ h @ scale_matrix
154        return inv_scale_matrix @ h @ scale_matrix
155
156    @property
157    def method(self) -> FeatureDetector:
158        """The feature detection/matching method being used."""
159        return self._method
160
161    @property
162    def template(self):
163        """The template image that subject images are aligned to"""
164        return self._template
165
166    @template.setter
167    def template(self, value: MatLike | str):
168        """Set the template image as a path or an image"""
169
170        if type(value) is str:
171            tmp_value = cv.imread(value)
172            assert tmp_value is not None
173            value = tmp_value
174            self._template = value
175
176        # TODO: check if the image has the right properties (dimensions etc.)
177        self._template = cast(MatLike, value)
178
179        self._preprocess_template()
180
181    def _preprocess_template(self):
182        self._template_orig = cv.cvtColor(self._template, cv.COLOR_BGR2GRAY)
183        if self._k is not None:
184            self._template = imu.sauvola(self._template, self._k)
185            self._template = cv.bitwise_not(self._template)
186        else:
187            _, _, self._template = cv.split(self._template)
188
189    def _preprocess_image(self, img: MatLike):
190        if self._template_orig is None:
191            raise TauluException("process the template first")
192
193        if self._k is not None:
194            img = imu.sauvola(img, self._k)
195            img = cv.bitwise_not(img)
196        else:
197            _, _, img = cv.split(img)
198
199        return img
200
201    def _validate_method(self):
202        """Validate that the selected method is available."""
203        if self._method == "surf":
204            if not hasattr(cv, "xfeatures2d"):
205                raise TauluException(
206                    "SURF requires opencv-contrib-python with non-free modules. "
207                    "Install with: pip install opencv-contrib-python"
208                )
209
210    def _create_detector(self):
211        """Create the feature detector based on the selected method."""
212        if self._method == "orb":
213            return cv.ORB_create(  # type:ignore
214                self._max_features,
215                patchSize=self._patch_size,
216            )
217        elif self._method == "sift":
218            return cv.SIFT_create(  # type:ignore
219                nfeatures=self._max_features, sigma=2.5, edgeThreshold=10
220            )
221        elif self._method == "akaze":
222            return cv.AKAZE_create()  # type:ignore
223        elif self._method == "surf":
224            # SURF is in xfeatures2d (requires opencv-contrib-python)
225            return cv.xfeatures2d.SURF_create(hessianThreshold=400)  # ty:ignore[unresolved-attribute]
226        else:
227            raise TauluException(f"Unknown method: {self._method}")
228
229    def _create_matcher(self):
230        """Create the feature matcher based on the selected method."""
231        if self._method == "orb":
232            # ORB uses binary descriptors -> Hamming distance
233            return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)
234        elif self._method == "sift":
235            # SIFT uses float descriptors -> L2 norm with crossCheck
236            return cv.BFMatcher(cv.NORM_L2, crossCheck=True)
237        elif self._method == "akaze":
238            # AKAZE uses binary descriptors -> Hamming distance
239            return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)
240        elif self._method == "surf":
241            # SURF uses float descriptors -> L2 norm
242            return cv.BFMatcher(cv.NORM_L2, crossCheck=True)
243        else:
244            raise TauluException(f"Unknown method: {self._method}")
245
246    def _match_features(self, matcher, descriptors_im, descriptors_tg):
247        """Match features using BFMatcher with crossCheck for all methods."""
248        return list(matcher.match(descriptors_im, descriptors_tg))
249
250    @log_calls(level=logging.DEBUG, include_return=True)
251    def _find_transform_of_template_on(
252        self,
253        im: MatLike,
254        visual: bool = False,
255        visual_notebook: bool = False,
256        window: str = WINDOW,
257    ):
258        im = self._scale_img(im)
259
260        # Create detector and matcher based on selected method
261        detector = self._create_detector()
262        matcher = self._create_matcher()
263
264        # Detect features and compute descriptors
265        keypoints_im, descriptors_im = detector.detectAndCompute(im, None)
266        keypoints_tg, descriptors_tg = detector.detectAndCompute(self._template, None)
267
268        if descriptors_im is None or descriptors_tg is None:
269            raise TauluException("No features detected in one or both images")
270
271        # Match features
272        matches = self._match_features(matcher, descriptors_im, descriptors_tg)
273
274        # Sort matches by score
275        matches = sorted(matches, key=lambda x: x.distance)
276
277        # Remove not so good matches
278        num_good_matches = int(len(matches) * self._match_fraction)
279        matches = matches[:num_good_matches]
280
281        if visual or visual_notebook:
282            final_img_filtered = cv.drawMatches(
283                im,
284                keypoints_im,
285                self._template,
286                keypoints_tg,
287                matches[:10],
288                None,
289                cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
290            )
291            if visual:
292                imu.show(final_img_filtered, title="matches", window=window)
293            if visual_notebook:
294                self._matches_notebook_img = final_img_filtered
295
296        # Extract location of good matches
297        points1 = np.zeros((len(matches), 2), dtype=np.float32)
298        points2 = np.zeros((len(matches), 2), dtype=np.float32)
299
300        for i, match in enumerate(matches):
301            points1[i, :] = keypoints_tg[match.trainIdx].pt
302            points2[i, :] = keypoints_im[match.queryIdx].pt
303
304        # Prune reference points based upon distance between
305        # key points. This assumes a fairly good alignment to start with
306        # due to the protocol used (location of the sheets)
307        p1 = pd.DataFrame(data=points1)
308        p2 = pd.DataFrame(data=points2)
309        refdist = abs(p1 - p2)
310
311        mask_x = refdist.loc[:, 0] < (im.shape[0] * self._max_dist)
312        mask_y = refdist.loc[:, 1] < (im.shape[1] * self._max_dist)
313        mask = mask_x & mask_y
314        mask_array = mask.to_numpy()
315        points1 = points1[mask_array]
316        points2 = points2[mask_array]
317
318        # Filter matches for visualization
319        filtered_matches = [
320            m for m, keep in zip(matches, mask_array, strict=False) if keep
321        ]
322
323        if visual:
324            final_img_filtered = cv.drawMatches(
325                im,
326                keypoints_im,
327                self._template,
328                keypoints_tg,
329                filtered_matches[:100],
330                None,
331                cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
332            )
333            imu.show(final_img_filtered, title="matches", window=window)
334
335        # Find homography
336        h, _ = cv.findHomography(points1, points2, cv.RANSAC)
337
338        return self._unscale_homography(h)
339
340    def show_matches_notebook(self):
341        """Display the stored feature matches image in the notebook (call after grid detection)."""
342        if self._matches_notebook_img is not None:
343            imu.show_notebook(self._matches_notebook_img, title="matches")
344            self._matches_notebook_img = None
345
346    def view_alignment(self, img: MatLike, h: NDArray):
347        """
348        Show the alignment of the template on the given image by transforming
349        it with ``h`` and overlaying both on separate color channels.
350
351        Args:
352            img (MatLike): the image on which the template is overlaid
353            h (NDArray): the homography matrix from `align`
354
355        Returns:
356            int | None: the key code returned by the OpenCV window, if any.
357        """
358
359        im = imu.ensure_gray(img)
360        header = imu.ensure_gray(self._unscale_img(self._template))
361        height, width = im.shape
362
363        header_warped = cv.warpPerspective(header, h, (width, height))
364
365        merged = np.full((height, width, 3), 255, dtype=np.uint8)
366
367        merged[..., 1] = im
368        merged[..., 2] = header_warped
369
370        return imu.show(merged)
371
372    @log_calls(level=logging.DEBUG, include_return=True)
373    def align(
374        self,
375        img: MatLike | str,
376        visual: bool = False,
377        visual_notebook: bool = False,
378        window: str = WINDOW,
379    ) -> NDArray:
380        """
381        Compute a homography that maps template pixels onto ``img``.
382
383        Args:
384            img: Subject image (path or array).
385            visual: Show match visualization in an OpenCV window.
386            visual_notebook: Store the match visualization for later display
387                via `show_matches_notebook`.
388            window: OpenCV window name when ``visual=True``.
389
390        Returns:
391            NDArray: the ``(3, 3)`` homography from template to image space.
392        """
393
394        logger.info("Aligning header with supplied table image")
395
396        if type(img) is str:
397            tmp_img = cv.imread(img)
398            assert tmp_img is not None
399            img = tmp_img
400        img = cast(MatLike, img)
401
402        img = self._preprocess_image(img)
403
404        h = self._find_transform_of_template_on(img, visual, visual_notebook, window)
405
406        if visual:
407            self.view_alignment(img, h)
408
409        return h
410
411    def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]:
412        """
413        Transform a template-space point through the homography ``h``
414        (obtained from `align`).
415
416        Args:
417            h (NDArray): transformation matrix of shape ``(3, 3)``
418            point (Iterable[int]): the to-be-transformed point as ``(x, y)``
419
420        Returns:
421            tuple[int, int]: the transformed point in image space.
422        """
423
424        point = np.array([[point[0], point[1], 1]])  # type:ignore
425        transformed = np.dot(h, point.T)
426
427        transformed /= transformed[2]
428
429        return int(transformed[0][0]), int(transformed[1][0])

Aligns table header templates to subject images using feature-based registration.

This class supports multiple feature detection and matching methods to compute a homography transformation that maps points from a header template image to their corresponding locations in full table images.

How it Works

Feature Detection: Extracts keypoints from both template and subject
Feature Matching: Finds correspondences using the selected matcher
Filtering: Keeps top matches and prunes based on spatial consistency
Homography Estimation: Computes perspective transform using RANSAC

The computed homography can then transform any point from template space to image space, allowing you to locate table structures based on your annotation.

Available Methods

orb (default): ORB features with BFMatcher (Hamming distance). Fast and patent-free. Good for most use cases.
sift: SIFT features with FLANN-based matcher. More robust to scale and rotation changes. Slower but often more accurate.
surf: SURF features with BFMatcher (L2 norm). Requires opencv-contrib-python with non-free modules enabled. Fast and robust.
akaze: AKAZE features with BFMatcher (Hamming distance). Patent-free, handles scale/rotation well, and often more robust than ORB on documents.

Preprocessing Options

Set k parameter to apply Sauvola thresholding before feature detection. This can improve matching on documents with variable lighting.
Set k=None to use raw images (just extract blue channel for BGR images)

Tuning Guidelines

max_features: Increase if matching fails on complex templates
match_fraction: Decrease if you get many incorrect matches
max_dist: Increase for documents with more warping/distortion
scale: Decrease (<1.0) to speed up on high-resolution images

Arguments:

template (MatLike | PathLike[str] | str | None): Header template image or path. This should contain a clear, representative view of the table header.
method (FeatureDetector): Feature detection/matching method. One of "orb", "sift", or "surf". Default is "orb".
max_features (int): Maximum features to detect. More features = slower but potentially more robust matching.
patch_size (int): ORB patch size for feature extraction (only used with "orb").
match_fraction (float): Fraction [0, 1] of matches to keep after sorting by quality. Higher = more matches but potentially more outliers.
scale (float): Image downscaling factor (0, 1] for processing speed.
max_dist (float): Maximum allowed distance (relative to image size) between matched keypoints. Filters out spatially inconsistent matches.
k (float | None): Sauvola threshold parameter for preprocessing. If None, no thresholding is applied. Typical range: 0.03-0.15.

TemplateMatcher( template: Union[NoneType, cv2.Mat, numpy.ndarray, os.PathLike[str], str] = None, method: Literal['orb', 'sift', 'surf', 'akaze'] = 'orb', max_features: int = 100000, patch_size: int = 31, match_fraction: float = 0.3, scale: float = 1.0, max_dist: float = 1.0, k: float | None = None) View Source

 86    def __init__(
 87        self,
 88        template: None | MatLike | PathLike[str] | str = None,
 89        method: FeatureDetector = "orb",
 90        max_features: int = 100_000,
 91        patch_size: int = 31,
 92        match_fraction: float = 0.3,
 93        scale: float = 1.0,
 94        max_dist: float = 1.00,
 95        k: float | None = None,
 96    ):
 97        """
 98        Args:
 99            template (MatLike | str): (path of) template image, with the table template clearly visible
100            method (FeatureDetector): feature detection/matching method ("orb", "sift", or "surf")
101            max_features (int): maximal number of features that will be extracted
102            patch_size (int): for ORB feature extractor (only used with method="orb")
103            match_fraction (float): best fraction of matches that are kept
104            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
105            max_dist (float): maximum distance (relative to image size) of matched features.
106                Increase this value if the warping between image and template needs to be more agressive
107            k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
108        """
109
110        if type(template) is str or type(template) is PathLike:
111            value = cv.imread(fspath(template))
112            template = value
113
114        self._method = method
115        self._k = k
116        if scale > 1.0:
117            raise TauluException(
118                "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0"
119            )
120        if scale == 0:
121            raise TauluException("Use 0 < scale <= 1.0")
122
123        self._scale = scale
124        self._template = self._scale_img(cast(MatLike, template))
125        self._template_orig: None | MatLike = None
126        self._preprocess_template()
127        self._max_features = max_features
128        self._patch_size = patch_size
129        self._match_fraction = match_fraction
130        self._max_dist = max_dist
131        self._validate_method()
132        self._matches_notebook_img = None

Arguments:

template (MatLike | str): (path of) template image, with the table template clearly visible
method (FeatureDetector): feature detection/matching method ("orb", "sift", or "surf")
max_features (int): maximal number of features that will be extracted
patch_size (int): for ORB feature extractor (only used with method="orb")
match_fraction (float): best fraction of matches that are kept
scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
max_dist (float): maximum distance (relative to image size) of matched features. Increase this value if the warping between image and template needs to be more agressive
k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done

method: Literal['orb', 'sift', 'surf', 'akaze'] View Source

156    @property
157    def method(self) -> FeatureDetector:
158        """The feature detection/matching method being used."""
159        return self._method

The feature detection/matching method being used.

template View Source

161    @property
162    def template(self):
163        """The template image that subject images are aligned to"""
164        return self._template

The template image that subject images are aligned to

def show_matches_notebook(self): View Source

340    def show_matches_notebook(self):
341        """Display the stored feature matches image in the notebook (call after grid detection)."""
342        if self._matches_notebook_img is not None:
343            imu.show_notebook(self._matches_notebook_img, title="matches")
344            self._matches_notebook_img = None

Display the stored feature matches image in the notebook (call after grid detection).

def view_alignment( self, img: Union[cv2.Mat, numpy.ndarray], h: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[~_ScalarT]]): View Source

346    def view_alignment(self, img: MatLike, h: NDArray):
347        """
348        Show the alignment of the template on the given image by transforming
349        it with ``h`` and overlaying both on separate color channels.
350
351        Args:
352            img (MatLike): the image on which the template is overlaid
353            h (NDArray): the homography matrix from `align`
354
355        Returns:
356            int | None: the key code returned by the OpenCV window, if any.
357        """
358
359        im = imu.ensure_gray(img)
360        header = imu.ensure_gray(self._unscale_img(self._template))
361        height, width = im.shape
362
363        header_warped = cv.warpPerspective(header, h, (width, height))
364
365        merged = np.full((height, width, 3), 255, dtype=np.uint8)
366
367        merged[..., 1] = im
368        merged[..., 2] = header_warped
369
370        return imu.show(merged)

Show the alignment of the template on the given image by transforming it with h and overlaying both on separate color channels.

Arguments:

img (MatLike): the image on which the template is overlaid
h (NDArray): the homography matrix from align

Returns:

int | None: the key code returned by the OpenCV window, if any.

@log_calls(level=logging.DEBUG, include_return=True)

def align( self, img: Union[cv2.Mat, numpy.ndarray, str], visual: bool = False, visual_notebook: bool = False, window: str = 'taulu') -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[~_ScalarT]]: View Source

372    @log_calls(level=logging.DEBUG, include_return=True)
373    def align(
374        self,
375        img: MatLike | str,
376        visual: bool = False,
377        visual_notebook: bool = False,
378        window: str = WINDOW,
379    ) -> NDArray:
380        """
381        Compute a homography that maps template pixels onto ``img``.
382
383        Args:
384            img: Subject image (path or array).
385            visual: Show match visualization in an OpenCV window.
386            visual_notebook: Store the match visualization for later display
387                via `show_matches_notebook`.
388            window: OpenCV window name when ``visual=True``.
389
390        Returns:
391            NDArray: the ``(3, 3)`` homography from template to image space.
392        """
393
394        logger.info("Aligning header with supplied table image")
395
396        if type(img) is str:
397            tmp_img = cv.imread(img)
398            assert tmp_img is not None
399            img = tmp_img
400        img = cast(MatLike, img)
401
402        img = self._preprocess_image(img)
403
404        h = self._find_transform_of_template_on(img, visual, visual_notebook, window)
405
406        if visual:
407            self.view_alignment(img, h)
408
409        return h

Compute a homography that maps template pixels onto img.

Arguments:

img: Subject image (path or array).
visual: Show match visualization in an OpenCV window.
visual_notebook: Store the match visualization for later display via show_matches_notebook.
window: OpenCV window name when visual=True.

Returns:

NDArray: the (3, 3) homography from template to image space.

def template_to_img( self, h: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[~_ScalarT]], point: Iterable[int]) -> tuple[int, int]: View Source

411    def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]:
412        """
413        Transform a template-space point through the homography ``h``
414        (obtained from `align`).
415
416        Args:
417            h (NDArray): transformation matrix of shape ``(3, 3)``
418            point (Iterable[int]): the to-be-transformed point as ``(x, y)``
419
420        Returns:
421            tuple[int, int]: the transformed point in image space.
422        """
423
424        point = np.array([[point[0], point[1], 1]])  # type:ignore
425        transformed = np.dot(h, point.T)
426
427        transformed /= transformed[2]
428
429        return int(transformed[0][0]), int(transformed[1][0])

Transform a template-space point through the homography h (obtained from align).

Arguments:

h (NDArray): transformation matrix of shape (3, 3)
point (Iterable[int]): the to-be-transformed point as (x, y)

Returns:

tuple[int, int]: the transformed point in image space.