taulu
Taulu - segment tables from images
Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).
To use this package, you first need to make an annotation of the headers in your table images. The idea is that these headers will be similar across your full set of images, and they will be used as a starting point for the search algorithm that finds the table grid.
Here is an example python script of how to use Taulu:
from taulu import Taulu, Split
import os
def setup():
# create an Annotation file of the headers in the image
# (one for the left header, one for the right)
# and store them in the examples directory
print("Annotating the LEFT header...")
Taulu.annotate("../data/table_00.png", "table_00_header_left.png")
print("Annotating the RIGHT header...")
Taulu.annotate("../data/table_00.png", "table_00_header_right.png")
def main():
taulu = Taulu(Split("table_00_header_left.png", "table_00_header_right.png"))
table = taulu.segment_table("../data/table_00.png", debug_view=True)
table.show_cells("../data/table_00.png")
if __name__ == "__main__":
if os.path.exists("table_00_header_left.png") and os.path.exists(
"table_00_header_right.png"
):
main()
else:
setup()
main()
If you want a high-level overview of how to use Taulu, see .taulu.Taulu">the Taulu class
1""" 2Taulu - *segment tables from images* 3 4Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells). 5 6To use this package, you first need to make an annotation of the headers in your table images. 7The idea is that these headers will be similar across your full set of images, and they will be 8used as a starting point for the search algorithm that finds the table grid. 9 10Here is an example python script of how to use Taulu: 11```python 12from taulu import Taulu, Split 13import os 14 15 16def setup(): 17 # create an Annotation file of the headers in the image 18 # (one for the left header, one for the right) 19 # and store them in the examples directory 20 print("Annotating the LEFT header...") 21 Taulu.annotate("../data/table_00.png", "table_00_header_left.png") 22 23 print("Annotating the RIGHT header...") 24 Taulu.annotate("../data/table_00.png", "table_00_header_right.png") 25 26 27def main(): 28 taulu = Taulu(Split("table_00_header_left.png", "table_00_header_right.png")) 29 table = taulu.segment_table("../data/table_00.png", debug_view=True) 30 31 table.show_cells("../data/table_00.png") 32 33 34if __name__ == "__main__": 35 if os.path.exists("table_00_header_left.png") and os.path.exists( 36 "table_00_header_right.png" 37 ): 38 main() 39 else: 40 setup() 41 main() 42 43``` 44 45If you want a high-level overview of how to use Taulu, see [the Taulu class](./taulu.html#taulu.taulu.Taulu) 46""" 47 48from .config import TauluConfig 49from .grid import SegmentedTable, TableDetector 50from .split import Split 51from .table_indexer import TableIndexer 52from .table_template import TableTemplate 53from .taulu import Taulu 54from .template_matcher import FeatureDetector, TemplateMatcher 55 56__pdoc__ = {} 57__pdoc__["constants"] = False 58__pdoc__["main"] = False 59__pdoc__["decorators"] = False 60__pdoc__["error"] = False 61__pdoc__["types"] = False 62__pdoc__["img_util"] = False 63 64__all__ = [ 65 "FeatureDetector", 66 "SegmentedTable", 67 "Split", 68 "TableDetector", 69 "TableIndexer", 70 "TableTemplate", 71 "Taulu", 72 "TauluConfig", 73 "TemplateMatcher", 74] 75 76try: 77 from . import gpu # noqa: F401 # ty: ignore[unresolved-import] 78 79 __all__.append("gpu") 80except ImportError: 81 pass
895class SegmentedTable(TableIndexer): 896 """ 897 Represents a detected table grid as a 2D array of intersection points. 898 899 Returned by `Taulu.segment_table`. Provides methods for querying cell 900 locations, cropping cells/regions from the source image, and interactive 901 visualization. Can be saved to and restored from JSON. 902 """ 903 904 _right_offset: int | None = None 905 906 def __init__(self, points: list[list[Point]], right_offset: int | None = None): 907 """ 908 Args: 909 points: 2D list of intersections between horizontal and vertical 910 rules, in row-major order. 911 right_offset: For tables built from a `Split`, the column index 912 where the right half begins. ``None`` for single-page tables. 913 """ 914 self._points = points 915 self._right_offset = right_offset 916 917 @property 918 def points(self) -> list[list[Point]]: 919 """The raw 2D grid of intersection points.""" 920 return self._points 921 922 def row(self, i: int) -> list[Point]: 923 """Return the ``i``-th row of intersection points.""" 924 assert 0 <= i and i < len(self._points) 925 return self._points[i] 926 927 @property 928 def cols(self) -> int: 929 """Number of cell columns (one fewer than vertical rules; two fewer 930 for split tables, accounting for the seam between halves).""" 931 if self._right_offset is not None: 932 return len(self.row(0)) - 2 933 else: 934 return len(self.row(0)) - 1 935 936 @property 937 def rows(self) -> int: 938 """Number of cell rows (one fewer than horizontal rules).""" 939 return len(self._points) - 1 940 941 @property 942 def right_offset(self) -> int | None: 943 """Column index where the right half begins, or ``None``.""" 944 return self._right_offset 945 946 @staticmethod 947 def from_split( 948 split_grids: Split["SegmentedTable"], offsets: Split[Point] 949 ) -> "SegmentedTable": 950 """ 951 Convert two ``SegmentedTable`` objects into one that can segment the original (non-cropped) image. 952 953 Args: 954 split_grids (Split[SegmentedTable]): SegmentedTable objects for the left and right part of the table 955 offsets (Split[tuple[int, int]]): the offsets in the original image where each crop started 956 957 Returns: 958 SegmentedTable: a merged grid spanning both halves. 959 960 Raises: 961 ValueError: if no row is fully populated in both halves. 962 """ 963 964 def offset_points(points, offset): 965 return [ 966 [ 967 (p[0] + offset[0], p[1] + offset[1]) if p is not None else None 968 for p in row 969 ] 970 for row in points 971 ] 972 973 split_points = split_grids.apply( 974 lambda grid, offset: offset_points(grid.points, offset), offsets 975 ) 976 points = [] 977 rows = min(split_grids.left.rows, split_grids.right.rows) 978 for row in range(rows + 1): 979 left_row = split_points.left[row] 980 right_row = split_points.right[row] 981 982 # Skip rows that contain None values 983 if any(p is None for p in left_row) or any(p is None for p in right_row): 984 logger.warning( 985 f"Skipping row {row} in from_split due to incomplete grid data" 986 ) 987 continue 988 989 row_points = [] 990 row_points.extend(left_row) 991 row_points.extend(right_row) 992 points.append(row_points) 993 if not points: 994 raise ValueError( 995 "Cannot create SegmentedTable from split: no complete rows found in both grids" 996 ) 997 table_grid = SegmentedTable(points, split_grids.left.cols) 998 return table_grid 999 1000 def save(self, path: str | Path): 1001 """ 1002 Persist the table grid to a JSON file. 1003 1004 Saves the grid corner points and right_offset (for split tables) to disk, 1005 allowing the grid to be reloaded later without re-running detection. 1006 1007 Args: 1008 path: Path to save the JSON file. 1009 1010 Example: 1011 >>> grid = taulu.segment_table("table.png") 1012 >>> grid.save("grid.json") 1013 """ 1014 with open(path, "w") as f: 1015 json.dump({"points": self.points, "right_offset": self._right_offset}, f) 1016 1017 @staticmethod 1018 def from_saved(path: str | Path) -> "SegmentedTable": 1019 """ 1020 Load a previously saved SegmentedTable from a JSON file. 1021 1022 Args: 1023 path: Path to the JSON file created by `save()`. 1024 1025 Returns: 1026 A SegmentedTable instance with the saved corner points. 1027 1028 Example: 1029 >>> grid = SegmentedTable.from_saved("grid.json") 1030 >>> cell = grid.crop_cell(image, (0, 0)) 1031 """ 1032 with open(path) as f: 1033 points = json.load(f) 1034 right_offset = points.get("right_offset", None) 1035 points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]] 1036 return SegmentedTable(points, right_offset) 1037 1038 def add_left_col(self, width: int): 1039 """ 1040 Prepend a column to the grid by shifting the first column ``width`` 1041 pixels to the left and inserting it as a new column. 1042 1043 Args: 1044 width: Width of the new column in pixels. 1045 """ 1046 for row in self._points: 1047 first = row[0] 1048 new_first = (first[0] - width, first[1]) 1049 row.insert(0, new_first) 1050 1051 def add_top_row(self, height: int): 1052 """ 1053 Prepend a row to the grid by shifting the first row ``height`` pixels 1054 upward and inserting it as a new row. 1055 1056 Args: 1057 height: Height of the new row in pixels. 1058 """ 1059 new_row = [] 1060 for point in self._points[0]: 1061 new_row.append((point[0], point[1] - height)) 1062 1063 self.points.insert(0, new_row) 1064 1065 def _surrounds(self, rect: list[Point], point: tuple[float, float]) -> bool: 1066 """Check if ``point`` (x, y) lies inside the quadrilateral ``rect`` 1067 (lt, rt, rb, lb).""" 1068 lt, rt, rb, lb = rect 1069 x, y = point 1070 1071 top = _Rule(*lt, *rt) 1072 if top._y_at_x(x) > y: 1073 return False 1074 1075 right = _Rule(*rt, *rb) 1076 if right._x_at_y(y) < x: 1077 return False 1078 1079 bottom = _Rule(*lb, *rb) 1080 if bottom._y_at_x(x) < y: 1081 return False 1082 1083 left = _Rule(*lb, *lt) 1084 if left._x_at_y(y) > x: 1085 return False 1086 1087 return True 1088 1089 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 1090 """ 1091 Get the cell indices (row, col) containing a pixel coordinate. 1092 1093 Searches through all cells to find which one contains the given point, 1094 accounting for the non-rectangular (perspective-warped) cell boundaries. 1095 1096 Args: 1097 point: Pixel coordinates (x, y) in the original image. 1098 1099 Returns: 1100 (row, col) indices of the containing cell, or (-1, -1) if the point 1101 is outside all cells. 1102 1103 Example: 1104 >>> grid = taulu.segment_table("table.png") 1105 >>> row, col = grid.cell((150, 200)) 1106 >>> if row >= 0: 1107 ... print(f"Point is in cell ({row}, {col})") 1108 """ 1109 for r in range(len(self._points) - 1): 1110 offset = 0 1111 for c in range(len(self.row(0)) - 1): 1112 if self._right_offset is not None and c == self._right_offset: 1113 offset = -1 1114 continue 1115 1116 if self._surrounds( 1117 [ 1118 self._points[r][c], 1119 self._points[r][c + 1], 1120 self._points[r + 1][c + 1], 1121 self._points[r + 1][c], 1122 ], 1123 point, 1124 ): 1125 return (r, c + offset) 1126 1127 return (-1, -1) 1128 1129 def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]: 1130 """ 1131 Get the four corner coordinates of a cell. 1132 1133 Returns the corners in clockwise order starting from top-left, 1134 suitable for use with OpenCV drawing functions. 1135 1136 Args: 1137 cell: Cell indices as (row, col). 1138 1139 Returns: 1140 Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order: 1141 top-left, top-right, bottom-right, bottom-left. 1142 1143 Raises: 1144 TauluException: If row or col indices are out of bounds. 1145 1146 Example: 1147 >>> lt, rt, rb, lb = grid.cell_polygon((0, 0)) 1148 >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32) 1149 >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2) 1150 """ 1151 r, c = cell 1152 1153 self._check_row_idx(r) 1154 self._check_col_idx(c) 1155 1156 if self._right_offset is not None and c >= self._right_offset: 1157 c = c + 1 1158 1159 return ( 1160 self._points[r][c], 1161 self._points[r][c + 1], 1162 self._points[r + 1][c + 1], 1163 self._points[r + 1][c], 1164 ) 1165 1166 def region( 1167 self, start: tuple[int, int], end: tuple[int, int] 1168 ) -> tuple[Point, Point, Point, Point]: 1169 """ 1170 Get the bounding polygon for a rectangular region of cells. 1171 1172 Returns the four corner coordinates that enclose all cells from 1173 start to end (inclusive). 1174 1175 Args: 1176 start: Top-left cell as (row, col). 1177 end: Bottom-right cell as (row, col). 1178 1179 Returns: 1180 Four corner points (lt, rt, rb, lb) enclosing the region, 1181 each as (x, y) pixel coordinates. 1182 1183 Raises: 1184 TauluException: If any row or col indices are out of bounds. 1185 1186 Example: 1187 >>> # Get bounding box for cells (0,0) through (2,3) 1188 >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3)) 1189 """ 1190 r0, c0 = start 1191 r1, c1 = end 1192 1193 self._check_row_idx(r0) 1194 self._check_row_idx(r1) 1195 self._check_col_idx(c0) 1196 self._check_col_idx(c1) 1197 1198 if self._right_offset is not None and c0 >= self._right_offset: 1199 c0 = c0 + 1 1200 1201 if self._right_offset is not None and c1 >= self._right_offset: 1202 c1 = c1 + 1 1203 1204 lt = self._points[r0][c0] 1205 rt = self._points[r0][c1 + 1] 1206 rb = self._points[r1 + 1][c1 + 1] 1207 lb = self._points[r1 + 1][c0] 1208 1209 return lt, rt, rb, lb 1210 1211 def visualize_points(self, img: MatLike): 1212 """ 1213 Draw the detected table points on the image for visual verification 1214 """ 1215 import colorsys 1216 1217 def clr(index, total_steps): 1218 hue = index / total_steps # Normalized hue between 0 and 1 1219 r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0) 1220 return int(r * 255), int(g * 255), int(b * 255) 1221 1222 for i, row in enumerate(self._points): 1223 for p in row: 1224 cv.circle(img, p, 4, clr(i, len(self._points)), -1) 1225 1226 imu.show(img) 1227 1228 def text_regions( 1229 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3 1230 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 1231 """ 1232 Split a row into spans of consecutive cells whose vertical separators 1233 are obscured by text (i.e. continuous handwriting crosses the rule). 1234 1235 Args: 1236 img: Source table image. 1237 row: Row index to scan. 1238 margin_x: Horizontal margin around each rule crop, in pixels. 1239 margin_y: Vertical margin around each rule crop, in pixels. 1240 1241 Returns: 1242 List of ``((row, start_col), (row, end_col))`` spans (inclusive). 1243 """ 1244 1245 def vertical_rule_crop(row: int, col: int): 1246 self._check_col_idx(col) 1247 self._check_row_idx(row) 1248 1249 if self._right_offset is not None and col >= self._right_offset: 1250 col = col + 1 1251 1252 top = self._points[row][col] 1253 bottom = self._points[row + 1][col] 1254 1255 left = int(min(top[0], bottom[0])) 1256 right = int(max(top[0], bottom[0])) 1257 1258 return img[ 1259 int(top[1]) - margin_y : int(bottom[1]) + margin_y, 1260 left - margin_x : right + margin_x, 1261 ] 1262 1263 result = [] 1264 1265 start = None 1266 for col in range(self.cols): 1267 crop = vertical_rule_crop(row, col) 1268 text_over_score = imu.text_presence_score(crop) 1269 text_over = text_over_score > -0.10 1270 1271 if not text_over: 1272 if start is not None: 1273 result.append(((row, start), (row, col - 1))) 1274 start = col 1275 1276 if start is not None: 1277 result.append(((row, start), (row, self.cols - 1))) 1278 1279 return result
Represents a detected table grid as a 2D array of intersection points.
Returned by Taulu.segment_table. Provides methods for querying cell
locations, cropping cells/regions from the source image, and interactive
visualization. Can be saved to and restored from JSON.
906 def __init__(self, points: list[list[Point]], right_offset: int | None = None): 907 """ 908 Args: 909 points: 2D list of intersections between horizontal and vertical 910 rules, in row-major order. 911 right_offset: For tables built from a `Split`, the column index 912 where the right half begins. ``None`` for single-page tables. 913 """ 914 self._points = points 915 self._right_offset = right_offset
Arguments:
- points: 2D list of intersections between horizontal and vertical rules, in row-major order.
- right_offset: For tables built from a
Split, the column index where the right half begins.Nonefor single-page tables.
917 @property 918 def points(self) -> list[list[Point]]: 919 """The raw 2D grid of intersection points.""" 920 return self._points
The raw 2D grid of intersection points.
922 def row(self, i: int) -> list[Point]: 923 """Return the ``i``-th row of intersection points.""" 924 assert 0 <= i and i < len(self._points) 925 return self._points[i]
Return the i-th row of intersection points.
927 @property 928 def cols(self) -> int: 929 """Number of cell columns (one fewer than vertical rules; two fewer 930 for split tables, accounting for the seam between halves).""" 931 if self._right_offset is not None: 932 return len(self.row(0)) - 2 933 else: 934 return len(self.row(0)) - 1
Number of cell columns (one fewer than vertical rules; two fewer for split tables, accounting for the seam between halves).
936 @property 937 def rows(self) -> int: 938 """Number of cell rows (one fewer than horizontal rules).""" 939 return len(self._points) - 1
Number of cell rows (one fewer than horizontal rules).
941 @property 942 def right_offset(self) -> int | None: 943 """Column index where the right half begins, or ``None``.""" 944 return self._right_offset
Column index where the right half begins, or None.
946 @staticmethod 947 def from_split( 948 split_grids: Split["SegmentedTable"], offsets: Split[Point] 949 ) -> "SegmentedTable": 950 """ 951 Convert two ``SegmentedTable`` objects into one that can segment the original (non-cropped) image. 952 953 Args: 954 split_grids (Split[SegmentedTable]): SegmentedTable objects for the left and right part of the table 955 offsets (Split[tuple[int, int]]): the offsets in the original image where each crop started 956 957 Returns: 958 SegmentedTable: a merged grid spanning both halves. 959 960 Raises: 961 ValueError: if no row is fully populated in both halves. 962 """ 963 964 def offset_points(points, offset): 965 return [ 966 [ 967 (p[0] + offset[0], p[1] + offset[1]) if p is not None else None 968 for p in row 969 ] 970 for row in points 971 ] 972 973 split_points = split_grids.apply( 974 lambda grid, offset: offset_points(grid.points, offset), offsets 975 ) 976 points = [] 977 rows = min(split_grids.left.rows, split_grids.right.rows) 978 for row in range(rows + 1): 979 left_row = split_points.left[row] 980 right_row = split_points.right[row] 981 982 # Skip rows that contain None values 983 if any(p is None for p in left_row) or any(p is None for p in right_row): 984 logger.warning( 985 f"Skipping row {row} in from_split due to incomplete grid data" 986 ) 987 continue 988 989 row_points = [] 990 row_points.extend(left_row) 991 row_points.extend(right_row) 992 points.append(row_points) 993 if not points: 994 raise ValueError( 995 "Cannot create SegmentedTable from split: no complete rows found in both grids" 996 ) 997 table_grid = SegmentedTable(points, split_grids.left.cols) 998 return table_grid
Convert two SegmentedTable objects into one that can segment the original (non-cropped) image.
Arguments:
- split_grids (Split[SegmentedTable]): SegmentedTable objects for the left and right part of the table
- offsets (Split[tuple[int, int]]): the offsets in the original image where each crop started
Returns:
SegmentedTable: a merged grid spanning both halves.
Raises:
- ValueError: if no row is fully populated in both halves.
1000 def save(self, path: str | Path): 1001 """ 1002 Persist the table grid to a JSON file. 1003 1004 Saves the grid corner points and right_offset (for split tables) to disk, 1005 allowing the grid to be reloaded later without re-running detection. 1006 1007 Args: 1008 path: Path to save the JSON file. 1009 1010 Example: 1011 >>> grid = taulu.segment_table("table.png") 1012 >>> grid.save("grid.json") 1013 """ 1014 with open(path, "w") as f: 1015 json.dump({"points": self.points, "right_offset": self._right_offset}, f)
Persist the table grid to a JSON file.
Saves the grid corner points and right_offset (for split tables) to disk, allowing the grid to be reloaded later without re-running detection.
Arguments:
- path: Path to save the JSON file.
Example:
>>> grid = taulu.segment_table("table.png") >>> grid.save("grid.json")
1017 @staticmethod 1018 def from_saved(path: str | Path) -> "SegmentedTable": 1019 """ 1020 Load a previously saved SegmentedTable from a JSON file. 1021 1022 Args: 1023 path: Path to the JSON file created by `save()`. 1024 1025 Returns: 1026 A SegmentedTable instance with the saved corner points. 1027 1028 Example: 1029 >>> grid = SegmentedTable.from_saved("grid.json") 1030 >>> cell = grid.crop_cell(image, (0, 0)) 1031 """ 1032 with open(path) as f: 1033 points = json.load(f) 1034 right_offset = points.get("right_offset", None) 1035 points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]] 1036 return SegmentedTable(points, right_offset)
Load a previously saved SegmentedTable from a JSON file.
Arguments:
- path: Path to the JSON file created by
save().
Returns:
A SegmentedTable instance with the saved corner points.
Example:
>>> grid = SegmentedTable.from_saved("grid.json") >>> cell = grid.crop_cell(image, (0, 0))
1038 def add_left_col(self, width: int): 1039 """ 1040 Prepend a column to the grid by shifting the first column ``width`` 1041 pixels to the left and inserting it as a new column. 1042 1043 Args: 1044 width: Width of the new column in pixels. 1045 """ 1046 for row in self._points: 1047 first = row[0] 1048 new_first = (first[0] - width, first[1]) 1049 row.insert(0, new_first)
Prepend a column to the grid by shifting the first column width
pixels to the left and inserting it as a new column.
Arguments:
- width: Width of the new column in pixels.
1051 def add_top_row(self, height: int): 1052 """ 1053 Prepend a row to the grid by shifting the first row ``height`` pixels 1054 upward and inserting it as a new row. 1055 1056 Args: 1057 height: Height of the new row in pixels. 1058 """ 1059 new_row = [] 1060 for point in self._points[0]: 1061 new_row.append((point[0], point[1] - height)) 1062 1063 self.points.insert(0, new_row)
Prepend a row to the grid by shifting the first row height pixels
upward and inserting it as a new row.
Arguments:
- height: Height of the new row in pixels.
1089 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 1090 """ 1091 Get the cell indices (row, col) containing a pixel coordinate. 1092 1093 Searches through all cells to find which one contains the given point, 1094 accounting for the non-rectangular (perspective-warped) cell boundaries. 1095 1096 Args: 1097 point: Pixel coordinates (x, y) in the original image. 1098 1099 Returns: 1100 (row, col) indices of the containing cell, or (-1, -1) if the point 1101 is outside all cells. 1102 1103 Example: 1104 >>> grid = taulu.segment_table("table.png") 1105 >>> row, col = grid.cell((150, 200)) 1106 >>> if row >= 0: 1107 ... print(f"Point is in cell ({row}, {col})") 1108 """ 1109 for r in range(len(self._points) - 1): 1110 offset = 0 1111 for c in range(len(self.row(0)) - 1): 1112 if self._right_offset is not None and c == self._right_offset: 1113 offset = -1 1114 continue 1115 1116 if self._surrounds( 1117 [ 1118 self._points[r][c], 1119 self._points[r][c + 1], 1120 self._points[r + 1][c + 1], 1121 self._points[r + 1][c], 1122 ], 1123 point, 1124 ): 1125 return (r, c + offset) 1126 1127 return (-1, -1)
Get the cell indices (row, col) containing a pixel coordinate.
Searches through all cells to find which one contains the given point, accounting for the non-rectangular (perspective-warped) cell boundaries.
Arguments:
- point: Pixel coordinates (x, y) in the original image.
Returns:
(row, col) indices of the containing cell, or (-1, -1) if the point is outside all cells.
Example:
>>> grid = taulu.segment_table("table.png") >>> row, col = grid.cell((150, 200)) >>> if row >= 0: ... print(f"Point is in cell ({row}, {col})")
1129 def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]: 1130 """ 1131 Get the four corner coordinates of a cell. 1132 1133 Returns the corners in clockwise order starting from top-left, 1134 suitable for use with OpenCV drawing functions. 1135 1136 Args: 1137 cell: Cell indices as (row, col). 1138 1139 Returns: 1140 Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order: 1141 top-left, top-right, bottom-right, bottom-left. 1142 1143 Raises: 1144 TauluException: If row or col indices are out of bounds. 1145 1146 Example: 1147 >>> lt, rt, rb, lb = grid.cell_polygon((0, 0)) 1148 >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32) 1149 >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2) 1150 """ 1151 r, c = cell 1152 1153 self._check_row_idx(r) 1154 self._check_col_idx(c) 1155 1156 if self._right_offset is not None and c >= self._right_offset: 1157 c = c + 1 1158 1159 return ( 1160 self._points[r][c], 1161 self._points[r][c + 1], 1162 self._points[r + 1][c + 1], 1163 self._points[r + 1][c], 1164 )
Get the four corner coordinates of a cell.
Returns the corners in clockwise order starting from top-left, suitable for use with OpenCV drawing functions.
Arguments:
- cell: Cell indices as (row, col).
Returns:
Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order: top-left, top-right, bottom-right, bottom-left.
Raises:
- TauluException: If row or col indices are out of bounds.
Example:
>>> lt, rt, rb, lb = grid.cell_polygon((0, 0)) >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32) >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
1166 def region( 1167 self, start: tuple[int, int], end: tuple[int, int] 1168 ) -> tuple[Point, Point, Point, Point]: 1169 """ 1170 Get the bounding polygon for a rectangular region of cells. 1171 1172 Returns the four corner coordinates that enclose all cells from 1173 start to end (inclusive). 1174 1175 Args: 1176 start: Top-left cell as (row, col). 1177 end: Bottom-right cell as (row, col). 1178 1179 Returns: 1180 Four corner points (lt, rt, rb, lb) enclosing the region, 1181 each as (x, y) pixel coordinates. 1182 1183 Raises: 1184 TauluException: If any row or col indices are out of bounds. 1185 1186 Example: 1187 >>> # Get bounding box for cells (0,0) through (2,3) 1188 >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3)) 1189 """ 1190 r0, c0 = start 1191 r1, c1 = end 1192 1193 self._check_row_idx(r0) 1194 self._check_row_idx(r1) 1195 self._check_col_idx(c0) 1196 self._check_col_idx(c1) 1197 1198 if self._right_offset is not None and c0 >= self._right_offset: 1199 c0 = c0 + 1 1200 1201 if self._right_offset is not None and c1 >= self._right_offset: 1202 c1 = c1 + 1 1203 1204 lt = self._points[r0][c0] 1205 rt = self._points[r0][c1 + 1] 1206 rb = self._points[r1 + 1][c1 + 1] 1207 lb = self._points[r1 + 1][c0] 1208 1209 return lt, rt, rb, lb
Get the bounding polygon for a rectangular region of cells.
Returns the four corner coordinates that enclose all cells from start to end (inclusive).
Arguments:
- start: Top-left cell as (row, col).
- end: Bottom-right cell as (row, col).
Returns:
Four corner points (lt, rt, rb, lb) enclosing the region, each as (x, y) pixel coordinates.
Raises:
- TauluException: If any row or col indices are out of bounds.
Example:
>>> # Get bounding box for cells (0,0) through (2,3) >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
1211 def visualize_points(self, img: MatLike): 1212 """ 1213 Draw the detected table points on the image for visual verification 1214 """ 1215 import colorsys 1216 1217 def clr(index, total_steps): 1218 hue = index / total_steps # Normalized hue between 0 and 1 1219 r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0) 1220 return int(r * 255), int(g * 255), int(b * 255) 1221 1222 for i, row in enumerate(self._points): 1223 for p in row: 1224 cv.circle(img, p, 4, clr(i, len(self._points)), -1) 1225 1226 imu.show(img)
Draw the detected table points on the image for visual verification
1228 def text_regions( 1229 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3 1230 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 1231 """ 1232 Split a row into spans of consecutive cells whose vertical separators 1233 are obscured by text (i.e. continuous handwriting crosses the rule). 1234 1235 Args: 1236 img: Source table image. 1237 row: Row index to scan. 1238 margin_x: Horizontal margin around each rule crop, in pixels. 1239 margin_y: Vertical margin around each rule crop, in pixels. 1240 1241 Returns: 1242 List of ``((row, start_col), (row, end_col))`` spans (inclusive). 1243 """ 1244 1245 def vertical_rule_crop(row: int, col: int): 1246 self._check_col_idx(col) 1247 self._check_row_idx(row) 1248 1249 if self._right_offset is not None and col >= self._right_offset: 1250 col = col + 1 1251 1252 top = self._points[row][col] 1253 bottom = self._points[row + 1][col] 1254 1255 left = int(min(top[0], bottom[0])) 1256 right = int(max(top[0], bottom[0])) 1257 1258 return img[ 1259 int(top[1]) - margin_y : int(bottom[1]) + margin_y, 1260 left - margin_x : right + margin_x, 1261 ] 1262 1263 result = [] 1264 1265 start = None 1266 for col in range(self.cols): 1267 crop = vertical_rule_crop(row, col) 1268 text_over_score = imu.text_presence_score(crop) 1269 text_over = text_over_score > -0.10 1270 1271 if not text_over: 1272 if start is not None: 1273 result.append(((row, start), (row, col - 1))) 1274 start = col 1275 1276 if start is not None: 1277 result.append(((row, start), (row, self.cols - 1))) 1278 1279 return result
Split a row into spans of consecutive cells whose vertical separators are obscured by text (i.e. continuous handwriting crosses the rule).
Arguments:
- img: Source table image.
- row: Row index to scan.
- margin_x: Horizontal margin around each rule crop, in pixels.
- margin_y: Vertical margin around each rule crop, in pixels.
Returns:
List of
((row, start_col), (row, end_col))spans (inclusive).
19class Split[T]: 20 """ 21 Container for paired left/right data with convenient manipulation methods. 22 23 The Split class is designed for working with table images that span two pages 24 or have distinct left and right sections. It allows you to: 25 - Store related data for both sides 26 - Apply functions to both sides simultaneously 27 - Access attributes/methods of contained objects transparently 28 29 Examples: 30 >>> # Create a split with different parameters for each side 31 >>> thresholds = Split(0.25, 0.30) 32 >>> 33 >>> # Apply a function to both sides 34 >>> images = Split(left_img, right_img) 35 >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5))) 36 >>> 37 >>> # Use with different parameters per side 38 >>> results = images.apply( 39 ... lambda img, k: sauvola_threshold(img, k), 40 ... k=thresholds # k.left used for left img, k.right for right 41 ... ) 42 >>> 43 >>> # Access methods of contained objects directly 44 >>> templates = Split(template_left, template_right) 45 >>> widths = templates.cell_widths(0) # Calls on both templates 46 47 Type Parameters: 48 T: The type of objects stored in left and right 49 """ 50 51 @classmethod 52 def __get_pydantic_core_schema__( 53 cls, 54 source_type: Any, 55 handler: GetCoreSchemaHandler, 56 ) -> core_schema.CoreSchema: 57 args = get_args(source_type) 58 inner_type = args[0] if args else Any 59 60 inner_schema = handler.generate_schema(inner_type) 61 62 def validate_split(value: Any) -> Split: 63 if isinstance(value, Split): 64 return value 65 if isinstance(value, dict) and "left" in value and "right" in value: 66 return Split(value["left"], value["right"]) 67 raise ValueError( 68 f"Expected Split instance or dict with 'left'/'right' keys, got {type(value)}" 69 ) 70 71 return core_schema.no_info_plain_validator_function( 72 validate_split, 73 serialization=core_schema.plain_serializer_function_ser_schema( 74 lambda v: {"left": v.left, "right": v.right}, 75 info_arg=False, 76 ), 77 metadata={ 78 "pydantic_js_functions": [ 79 lambda _schema, handler: { 80 "type": "object", 81 "properties": { 82 "left": handler(inner_schema), 83 "right": handler(inner_schema), 84 }, 85 "required": ["left", "right"], 86 "additionalProperties": False, 87 } 88 ] 89 }, 90 ) 91 92 def __init__(self, left: T | None = None, right: T | None = None): 93 """ 94 Initialize a Split container. 95 96 Args: 97 left: Data for the left side 98 right: Data for the right side 99 100 Note: 101 Both can initially be None. Use the `append` method or set 102 properties directly to populate. 103 """ 104 self._left = left 105 self._right = right 106 107 @property 108 def left(self) -> T: 109 """The left value. Asserts it has been set.""" 110 assert self._left is not None 111 return self._left 112 113 @left.setter 114 def left(self, value: T): 115 self._left = value 116 117 @property 118 def right(self) -> T: 119 """The right value. Asserts it has been set.""" 120 assert self._right is not None 121 return self._right 122 123 @right.setter 124 def right(self, value: T): 125 self._right = value 126 127 def append(self, value: T): 128 """Set ``left`` if unset, otherwise set ``right``.""" 129 if self._left is None: 130 self._left = value 131 else: 132 self._right = value 133 134 def __repr__(self) -> str: 135 return f"left: {self._left}, right: {self._right}" 136 137 def __iter__(self): 138 assert self._left is not None 139 assert self._right is not None 140 return iter((self._left, self._right)) 141 142 def __getitem__(self, index: bool | int) -> T: 143 assert self._left is not None 144 assert self._right is not None 145 if int(index) == 0: 146 return self._left 147 else: 148 return self._right 149 150 def apply( 151 self, 152 funcs: Split[Callable[..., V]] | Callable[..., V], 153 *args, 154 **kwargs, 155 ) -> Split[V]: 156 """ 157 Call ``funcs`` on each side and return a new Split of the results. 158 159 ``self.left`` (resp. ``self.right``) is passed as the first positional 160 argument. Any extra ``args``/``kwargs`` that are themselves a `Split` 161 are unpacked per side; non-Split values are forwarded unchanged. 162 163 Args: 164 funcs: A single callable applied to both sides, or a `Split` of 165 callables for per-side functions. 166 167 Returns: 168 Split[V]: results of the per-side calls. 169 """ 170 if not isinstance(funcs, Split): 171 funcs = Split(funcs, funcs) 172 173 def get_arg(side: str, arg): 174 if isinstance(arg, Split): 175 return getattr(arg, side) 176 return arg 177 178 def call(side: str): 179 func = getattr(funcs, side) 180 target = getattr(self, side) 181 182 side_args = [get_arg(side, arg) for arg in args] 183 side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()} 184 185 return func(target, *side_args, **side_kwargs) 186 187 return Split(call("left"), call("right")) 188 189 def __getattr__(self, attr_name: str): 190 if attr_name in self.__dict__: 191 return getattr(self, attr_name) 192 193 def wrapper(*args, **kwargs): 194 return self.apply( 195 Split( 196 getattr(self.left.__class__, attr_name), 197 getattr(self.right.__class__, attr_name), 198 ), 199 *args, 200 **kwargs, 201 ) 202 203 return wrapper
Container for paired left/right data with convenient manipulation methods.
The Split class is designed for working with table images that span two pages or have distinct left and right sections. It allows you to:
- Store related data for both sides
- Apply functions to both sides simultaneously
- Access attributes/methods of contained objects transparently
Examples:
>>> # Create a split with different parameters for each side >>> thresholds = Split(0.25, 0.30) >>> >>> # Apply a function to both sides >>> images = Split(left_img, right_img) >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5))) >>> >>> # Use with different parameters per side >>> results = images.apply( ... lambda img, k: sauvola_threshold(img, k), ... k=thresholds # k.left used for left img, k.right for right ... ) >>> >>> # Access methods of contained objects directly >>> templates = Split(template_left, template_right) >>> widths = templates.cell_widths(0) # Calls on both templates
Type Parameters:
T: The type of objects stored in left and right
92 def __init__(self, left: T | None = None, right: T | None = None): 93 """ 94 Initialize a Split container. 95 96 Args: 97 left: Data for the left side 98 right: Data for the right side 99 100 Note: 101 Both can initially be None. Use the `append` method or set 102 properties directly to populate. 103 """ 104 self._left = left 105 self._right = right
Initialize a Split container.
Arguments:
- left: Data for the left side
- right: Data for the right side
Note:
Both can initially be None. Use the
appendmethod or set properties directly to populate.
107 @property 108 def left(self) -> T: 109 """The left value. Asserts it has been set.""" 110 assert self._left is not None 111 return self._left
The left value. Asserts it has been set.
117 @property 118 def right(self) -> T: 119 """The right value. Asserts it has been set.""" 120 assert self._right is not None 121 return self._right
The right value. Asserts it has been set.
150 def apply( 151 self, 152 funcs: Split[Callable[..., V]] | Callable[..., V], 153 *args, 154 **kwargs, 155 ) -> Split[V]: 156 """ 157 Call ``funcs`` on each side and return a new Split of the results. 158 159 ``self.left`` (resp. ``self.right``) is passed as the first positional 160 argument. Any extra ``args``/``kwargs`` that are themselves a `Split` 161 are unpacked per side; non-Split values are forwarded unchanged. 162 163 Args: 164 funcs: A single callable applied to both sides, or a `Split` of 165 callables for per-side functions. 166 167 Returns: 168 Split[V]: results of the per-side calls. 169 """ 170 if not isinstance(funcs, Split): 171 funcs = Split(funcs, funcs) 172 173 def get_arg(side: str, arg): 174 if isinstance(arg, Split): 175 return getattr(arg, side) 176 return arg 177 178 def call(side: str): 179 func = getattr(funcs, side) 180 target = getattr(self, side) 181 182 side_args = [get_arg(side, arg) for arg in args] 183 side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()} 184 185 return func(target, *side_args, **side_kwargs) 186 187 return Split(call("left"), call("right"))
Call funcs on each side and return a new Split of the results.
self.left (resp. self.right) is passed as the first positional
argument. Any extra args/kwargs that are themselves a Split
are unpacked per side; non-Split values are forwarded unchanged.
Arguments:
- funcs: A single callable applied to both sides, or a
Splitof callables for per-side functions.
Returns:
Split[V]: results of the per-side calls.
121class TableDetector: 122 """ 123 Detects table grid intersections using morphological filtering and template matching. 124 125 This detector implements a multi-stage pipeline: 126 127 1. **Binarization**: Sauvola adaptive thresholding to handle varying lighting 128 2. **Morphological operations**: Dilation to connect broken rule segments 129 3. **Cross-kernel matching**: Template matching with a cross-shaped kernel to find 130 rule intersections where horizontal and vertical lines meet 131 4. **Grid growing**: Iterative point detection starting from a known seed point 132 133 The cross-kernel is designed to match the specific geometry of your table rules. 134 It should be sized so that after morphology, it aligns with actual corner shapes. 135 136 ## Tuning Guidelines 137 138 - **intersection_kernel_size**: Increase if you need more selectivity (fewer false positives) 139 - **line_thickness/line_thickness_horizontal**: Should match rule thickness after morphology 140 - **line_gap_fill**: Increase to connect more broken lines, but this thickens rules 141 - **binarization_sensitivity**: Increase to threshold more aggressively (remove noise) 142 - **search_radius**: Increase for documents with more warping/distortion 143 - **position_weight**: Increase to prefer corners closer to expected positions 144 145 ## Visual Debugging 146 147 Set `visual=True` in methods to see intermediate results and tune parameters. 148 """ 149 150 def __init__( 151 self, 152 intersection_kernel_size: int = 21, 153 line_thickness: int = 6, 154 line_thickness_horizontal: int | None = None, 155 line_gap_fill: int | None = None, 156 binarization_sensitivity: float = 0.04, 157 binarization_window: int = 15, 158 detection_scale: float = 1.0, 159 search_radius: int = 40, 160 position_weight: float = 0.4, 161 pathfinding_threshold: float = 0.2, 162 min_rows: int = 5, 163 detection_threshold: float = 0.3, 164 extrapolation_distance: int = 4, 165 growing_resets: int = 3, 166 reset_fraction: float = 0.5, 167 ): 168 """ 169 Args: 170 intersection_kernel_size (int): the size of the cross kernel 171 a larger kernel size often means that more penalty is applied, often leading 172 to more sparse results 173 line_thickness (int): the width of one of the edges in the cross filter, should be 174 roughly equal to the width of the rules in the image after morphology is applied 175 line_thickness_horizontal (int | None): useful if the horizontal rules and vertical rules 176 have different sizes 177 line_gap_fill (int | None): the size of the morphology operators that are applied before 178 the cross kernel. 'bridges the gaps' of broken-up lines 179 binarization_sensitivity (float): threshold parameter for sauvola thresholding 180 binarization_window (int): window_size parameter for sauvola thresholding 181 detection_scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 182 search_radius (int): area in which to search for a new max value in `find_nearest` etc. 183 position_weight (float): how much the point finding algorithm penalizes points that are further in the region [0, 1] 184 pathfinding_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skip astar pathfinding 185 min_rows (int): minimum number of rows to find before stopping the table finding algorithm 186 detection_threshold (float): the threshold for accepting a new point when growing the table 187 extrapolation_distance (int): how many points away to look when calculating the median slope 188 growing_resets (int): The amount of cuts (large deletions) to do in the grid during table growing 189 reset_fraction (float): The portion of the already-chosen corner points to delete during cutting 190 """ 191 self._validate_parameters( 192 intersection_kernel_size, 193 line_thickness, 194 line_thickness_horizontal, 195 line_gap_fill, 196 search_radius, 197 binarization_sensitivity, 198 binarization_window, 199 position_weight, 200 pathfinding_threshold, 201 growing_resets, 202 reset_fraction, 203 ) 204 205 self._intersection_kernel_size = intersection_kernel_size 206 self._line_thickness = line_thickness 207 self._line_thickness_horizontal = ( 208 line_thickness 209 if line_thickness_horizontal is None 210 else line_thickness_horizontal 211 ) 212 self._line_gap_fill = ( 213 line_gap_fill if line_gap_fill is not None else line_thickness 214 ) 215 self._search_radius = search_radius 216 self._binarization_sensitivity = binarization_sensitivity 217 self._binarization_window = binarization_window 218 self._position_weight = position_weight 219 self._scale = detection_scale 220 self._pathfinding_threshold = pathfinding_threshold 221 self._min_rows = min_rows 222 self._detection_threshold = detection_threshold 223 self._extrapolation_distance = extrapolation_distance 224 self._growing_resets = growing_resets 225 self._reset_fraction = reset_fraction 226 227 self._cross_kernel = self._create_cross_kernel() 228 229 def _validate_parameters( 230 self, 231 intersection_kernel_size: int, 232 line_thickness: int, 233 line_thickness_horizontal: int | None, 234 line_gap_fill: int | None, 235 search_radius: int, 236 binarization_sensitivity: float, 237 binarization_window: int, 238 position_weight: float, 239 pathfinding_threshold: float, 240 growing_resets: int, 241 reset_fraction: float, 242 ) -> None: 243 """Validate initialization parameters.""" 244 if intersection_kernel_size % 2 == 0: 245 raise ValueError("intersection_kernel_size must be odd") 246 if ( 247 intersection_kernel_size <= 0 248 or line_thickness <= 0 249 or search_radius <= 0 250 or binarization_window <= 0 251 ): 252 raise ValueError("Size parameters must be positive") 253 if line_thickness_horizontal is not None and line_thickness_horizontal <= 0: 254 raise ValueError("line_thickness_horizontal must be positive") 255 if line_gap_fill is not None and line_gap_fill <= 0: 256 raise ValueError("line_gap_fill must be positive") 257 if not 0 <= position_weight <= 1: 258 raise ValueError("position_weight must be in [0, 1]") 259 if binarization_sensitivity <= 0: 260 raise ValueError("binarization_sensitivity must be positive") 261 if pathfinding_threshold < 0 or pathfinding_threshold > 1: 262 raise ValueError("pathfinding_threshold must be in [0, 1]") 263 if reset_fraction < 0 or reset_fraction > 1: 264 raise ValueError("reset_fraction must be in [0, 1]") 265 if growing_resets < 0: 266 raise ValueError("growing_resets must be zero or positive") 267 268 def _create_gaussian_weights(self, region_size: int) -> NDArray: 269 """ 270 Create a square 2D Gaussian weight mask used to bias `find_nearest` 271 toward points close to the search center. 272 273 Args: 274 region_size (int): Side length of the square mask. 275 276 Returns: 277 NDArray: ``(region_size, region_size)`` float32 weight mask, peak 1.0 278 at the center, falling off to ``1 - position_weight`` at the edge. 279 """ 280 if self._position_weight == 0: 281 return np.ones((region_size, region_size), dtype=np.float32) 282 283 y = np.linspace(-1, 1, region_size) 284 x = np.linspace(-1, 1, region_size) 285 xv, yv = np.meshgrid(x, y) 286 dist_squared = xv**2 + yv**2 287 288 # Prevent log(0) when position_weight is 1 289 if self._position_weight >= 0.999: 290 sigma = 0.1 # Small sigma for very sharp peak 291 else: 292 sigma = np.sqrt(-1 / (2 * np.log(1 - self._position_weight))) 293 294 weights = np.exp(-dist_squared / (2 * sigma**2)) 295 296 return weights.astype(np.float32) 297 298 def _create_cross_kernel(self) -> NDArray: 299 kernel = np.zeros( 300 (self._intersection_kernel_size, self._intersection_kernel_size), 301 dtype=np.uint8, 302 ) 303 center = self._intersection_kernel_size // 2 304 305 # Create horizontal bar 306 h_start = max(0, center - self._line_thickness_horizontal // 2) 307 h_end = min( 308 self._intersection_kernel_size, 309 center + (self._line_thickness_horizontal + 1) // 2, 310 ) 311 kernel[h_start:h_end, :] = 255 312 313 # Create vertical bar 314 v_start = max(0, center - self._line_thickness // 2) 315 v_end = min( 316 self._intersection_kernel_size, center + (self._line_thickness + 1) // 2 317 ) 318 kernel[:, v_start:v_end] = 255 319 320 return kernel 321 322 def _apply_morphology(self, binary: MatLike) -> MatLike: 323 # Define a horizontal kernel (adjust width as needed) 324 kernel_hor = cv.getStructuringElement(cv.MORPH_RECT, (self._line_gap_fill, 1)) 325 kernel_ver = cv.getStructuringElement(cv.MORPH_RECT, (1, self._line_gap_fill)) 326 327 # Apply dilation 328 dilated = cv.dilate(binary, kernel_hor, iterations=1) 329 dilated = cv.dilate(dilated, kernel_ver, iterations=1) 330 331 return dilated 332 333 def _apply_cross_matching(self, img: MatLike) -> MatLike: 334 """Apply cross kernel template matching.""" 335 pad_y = self._cross_kernel.shape[0] // 2 336 pad_x = self._cross_kernel.shape[1] // 2 337 338 padded = cv.copyMakeBorder( 339 img, pad_y, pad_y, pad_x, pad_x, borderType=cv.BORDER_CONSTANT, value=0 340 ) 341 342 filtered = cv.matchTemplate(padded, self._cross_kernel, cv.TM_SQDIFF_NORMED) 343 # Invert and normalize to 0-255 range 344 filtered = cv.normalize(1.0 - filtered, None, 0, 255, cv.NORM_MINMAX) 345 return filtered.astype(np.uint8) 346 347 def apply( 348 self, img: MatLike, visual: bool = False, visual_notebook: bool = False 349 ) -> MatLike: 350 """ 351 Apply the grid detection filter to the input image. 352 353 Args: 354 img (MatLike): the input image 355 visual (bool): whether to show intermediate steps via OpenCV windows 356 visual_notebook (bool): whether to show intermediate steps inline in a Jupyter notebook 357 358 Returns: 359 MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules 360 """ 361 362 if img is None or img.size == 0: 363 raise ValueError("Input image is empty or None") 364 365 binary = imu.sauvola( 366 img, k=self._binarization_sensitivity, window_size=self._binarization_window 367 ) 368 369 if visual: 370 imu.show(binary, title="thresholded") 371 if visual_notebook: 372 imu.show_notebook(binary, title="thresholded") 373 374 binary = self._apply_morphology(binary) 375 376 if visual: 377 imu.show(binary, title="dilated") 378 if visual_notebook: 379 imu.show_notebook(binary, title="dilated") 380 381 filtered = self._apply_cross_matching(binary) 382 383 return filtered 384 385 @log_calls(level=logging.DEBUG, include_return=True) 386 def find_nearest( 387 self, filtered: MatLike, point: Point, region: int | None = None 388 ) -> tuple[Point, float]: 389 """ 390 Find the nearest 'corner match' in the image, along with its score [0,1] 391 392 Args: 393 filtered (MatLike): the filtered image (obtained through `apply`) 394 point (tuple[int, int]): the approximate target point (x, y) 395 region (None | int): alternative value for search region, 396 overwriting the `__init__` parameter `search_radius` 397 398 Returns: 399 tuple[Point, float]: the best-matching pixel ``(x, y)`` and its 400 confidence in ``[0, 1]``. If the search window falls outside the 401 image, the input ``point`` is returned with confidence ``0.0``. 402 """ 403 404 if filtered is None or filtered.size == 0: 405 raise ValueError("Filtered image is empty or None") 406 407 region_size = region if region is not None else self._search_radius 408 x, y = point 409 410 # Calculate crop boundaries 411 crop_x = max(0, x - region_size // 2) 412 crop_y = max(0, y - region_size // 2) 413 crop_width = min(region_size, filtered.shape[1] - crop_x) 414 crop_height = min(region_size, filtered.shape[0] - crop_y) 415 416 # Handle edge cases 417 if crop_width <= 0 or crop_height <= 0: 418 logger.warning(f"Point {point} is outside image bounds") 419 return point, 0.0 420 421 cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] 422 423 if cropped.size == 0: 424 return point, 0.0 425 426 # Always apply Gaussian weighting by extending crop if needed 427 if cropped.shape[0] == region_size and cropped.shape[1] == region_size: 428 # Perfect size - apply weights directly 429 weights = self._create_gaussian_weights(region_size) 430 weighted = cropped.astype(np.float32) * weights 431 else: 432 # Extend crop to match region_size, apply weights, then restore 433 extended = np.zeros((region_size, region_size), dtype=cropped.dtype) 434 435 # Calculate offset to center the cropped region in extended array 436 offset_y = (region_size - cropped.shape[0]) // 2 437 offset_x = (region_size - cropped.shape[1]) // 2 438 439 # Place cropped region in center of extended array 440 extended[ 441 offset_y : offset_y + cropped.shape[0], 442 offset_x : offset_x + cropped.shape[1], 443 ] = cropped 444 445 # Apply Gaussian weights to extended array 446 weights = self._create_gaussian_weights(region_size) 447 weighted_extended = extended.astype(np.float32) * weights 448 449 # Extract the original region back out 450 weighted = weighted_extended[ 451 offset_y : offset_y + cropped.shape[0], 452 offset_x : offset_x + cropped.shape[1], 453 ] 454 455 best_idx = np.argmax(weighted) 456 best_y, best_x = np.unravel_index(best_idx, cropped.shape) 457 458 result_point = ( 459 int(crop_x + best_x), 460 int(crop_y + best_y), 461 ) 462 result_confidence = float(weighted[best_y, best_x]) / 255.0 463 464 return result_point, result_confidence 465 466 def detect_row_heights( 467 self, 468 img: MatLike, 469 filtered: MatLike, 470 top_row: list[Point | None], 471 min_row_height: int, 472 max_row_height: int, 473 path_scale: float = 0.25, 474 prominence: float = 18.0, 475 cluster_tolerance: int | None = None, 476 min_columns_for_rule: float = 0.4, 477 straight_cost: int = 10, 478 perpendicular_cost: int = 30, 479 darkness_divisor: int = 100, 480 ) -> list[int]: 481 """ 482 Detect variable row heights from the cross-correlation map by following 483 each vertical rule downward via A* and finding peaks of cross-correlation 484 along that path. 485 486 Args: 487 img: Original (full-resolution) table image. 488 filtered: Cross-correlation map produced by `apply()` (full resolution). 489 top_row: Top points of vertical rules in image space. ``None`` entries 490 (where header alignment failed for that rule) are skipped. 491 min_row_height: Minimum allowed row height in pixels. 492 max_row_height: Maximum allowed row height in pixels. 493 path_scale: Downscale factor used when running A* (purely for speed). 494 The detected path is rescaled back to full resolution for sampling. 495 prominence: Minimum peak value [0, 255] in the cross-correlation profile. 496 cluster_tolerance: Cross-column matching tolerance in pixels. 497 Defaults to ``min_row_height // 2``. 498 min_columns_for_rule: Fraction of columns that must agree on a peak. 499 straight_cost: A* cost per straight (down/up) step. 500 perpendicular_cost: A* cost per lateral step. Higher = stronger 501 straight-line bias. 502 darkness_divisor: A* image cost is ``pixel / darkness_divisor``. 503 Higher = lighter line bias. 504 505 Returns: 506 List of per-row heights (consecutive differences of detected offsets). 507 Empty if detection failed. 508 """ 509 valid_points = [(float(p[0]), float(p[1])) for p in top_row if p is not None] 510 if not valid_points: 511 return [] 512 513 gray = ensure_gray(img) 514 if path_scale != 1.0: 515 scaled_gray = cv.resize(gray, None, fx=path_scale, fy=path_scale) 516 else: 517 scaled_gray = gray 518 519 tol = ( 520 cluster_tolerance 521 if cluster_tolerance is not None 522 else max(1, min_row_height // 2) 523 ) 524 525 offsets = rust_detect_row_offsets( 526 filtered, 527 scaled_gray, 528 valid_points, 529 float(path_scale), 530 int(min_row_height), 531 int(max_row_height), 532 float(prominence), 533 int(tol), 534 float(min_columns_for_rule), 535 int(straight_cost), 536 int(perpendicular_cost), 537 int(darkness_divisor), 538 ) 539 540 if not offsets: 541 return [] 542 543 heights: list[int] = [offsets[0]] 544 for i in range(1, len(offsets)): 545 heights.append(offsets[i] - offsets[i - 1]) 546 return heights 547 548 def find_table_points( 549 self, 550 img: MatLike | PathLike[str], 551 top_row: list[Point | None], 552 cell_widths: list[int], 553 cell_heights: list[int] | int, 554 visual: bool = False, 555 visual_notebook: bool = False, 556 window: str = WINDOW, 557 goals_width: int | None = None, 558 filtered: MatLike | PathLike[str] | None = None, 559 smooth: bool = False, 560 smooth_strength: float = 0.5, 561 smooth_iterations: int = 1, 562 smooth_degree: int = 1, 563 ) -> "SegmentedTable": 564 """ 565 Parse the image to a `SegmentedTable` structure that holds all of the 566 intersections between horizontal and vertical rules, starting near the `left_top` point 567 568 Args: 569 img (MatLike): the input image of a table 570 top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching) 571 cell_widths (list[int]): the expected widths of the cells (based on a header template) 572 cell_heights (list[int]): the expected height of the rows of data. 573 The last value from this list is used until the image has no more vertical space. 574 visual (bool): whether to show intermediate steps 575 window (str): the name of the OpenCV window to use for visualization 576 goals_width (int | None): the width of the goal region when searching for the next point. 577 If None, defaults to 1.5 * search_radius 578 filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of 579 calculating the filtered image from scratch 580 smooth (bool): if True, smooth the grid after detection 581 smooth_strength (float): blend factor per smoothing iteration (0.0-1.0). Default: 0.5 582 smooth_iterations (int): number of smoothing passes. Default: 3 583 smooth_degree (int): polynomial degree for smoothing regression (1 or 2). Default: 1 584 585 Returns: 586 a SegmentedTable object 587 """ 588 589 if goals_width is None: 590 goals_width = self._search_radius * 3 // 2 591 592 if not cell_widths: 593 raise ValueError("cell_widths must contain at least one value") 594 595 if not isinstance(img, np.ndarray): 596 tmp_img = cv.imread(os.fspath(cast(PathLike[str], img))) 597 assert tmp_img is not None 598 img = tmp_img 599 600 img = cast(MatLike, img) 601 602 if filtered is None: 603 filtered = self.apply(img, visual, visual_notebook) 604 else: 605 if not isinstance(filtered, np.ndarray): 606 filtered = cv.imread(os.fspath(filtered)) 607 608 filtered = ensure_gray(filtered) 609 610 if visual: 611 imu.show(filtered, window=window) 612 if visual_notebook: 613 imu.show_notebook(filtered, title="filtered") 614 615 if isinstance(cell_heights, int): 616 cell_heights = [cell_heights] 617 618 for i in range(len(top_row)): 619 if top_row[i] is None: 620 continue 621 622 point = top_row[i] 623 assert point is not None 624 adjusted, confidence = self.find_nearest( 625 filtered, point, int(self._search_radius * 2) 626 ) 627 628 if confidence < 0.15: 629 top_row[i] = None 630 else: 631 top_row[i] = adjusted 632 633 if not any(top_row): 634 logger.error("No good starting candidates given") 635 636 # resize all parameters according to scale 637 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 638 639 if visual: 640 imu.push(img) 641 642 filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale) 643 cell_widths = [int(w * self._scale) for w in cell_widths] 644 cell_heights = [int(h * self._scale) for h in cell_heights] 645 top_row = [ 646 (int(p[0] * self._scale), int(p[1] * self._scale)) 647 if p is not None 648 else None 649 for p in top_row 650 ] 651 search_radius = int(self._search_radius * self._scale) 652 653 img_gray = ensure_gray(img) 654 filtered_gray = ensure_gray(filtered) 655 656 table_grower = TableGrower( 657 filtered_gray, 658 cell_widths, 659 cell_heights, 660 top_row, 661 search_radius, 662 self._position_weight, 663 self._extrapolation_distance, 664 self._detection_threshold, 665 self._pathfinding_threshold, 666 self._min_rows, 667 self._growing_resets, 668 self._reset_fraction, 669 ) 670 671 def show_grower_progress(wait: bool = False): 672 img_orig = np.copy(img) 673 corners = table_grower.get_all_corners() 674 for y in range(len(corners)): 675 for x in range(len(corners[y])): 676 if corners[y][x] is not None: 677 img_orig = imu.draw_points( 678 img_orig, 679 [corners[y][x]], # type:ignore 680 color=(0, 0, 255), 681 thickness=30, 682 ) 683 684 edge = table_grower.get_edge_points() 685 686 for point, score in edge: 687 color = (100, int(clamp(score * 255, 0, 255)), 100) 688 imu.draw_point(img_orig, point, color=color, thickness=20) 689 690 imu.show(img_orig, wait=wait) 691 692 if visual: 693 threshold = self._detection_threshold 694 695 # python implementation of rust loops, for visualization purposes 696 # note this is a LOT slower 697 while table_grower.grow_point(img_gray, filtered_gray) is not None: 698 show_grower_progress() 699 700 show_grower_progress(True) 701 702 original_threshold = threshold 703 704 loops_without_change = 0 705 706 while not table_grower.is_table_complete(): 707 loops_without_change += 1 708 709 if loops_without_change > 50: 710 break 711 712 if table_grower.extrapolate_one(img_gray, filtered_gray) is not None: 713 show_grower_progress() 714 715 loops_without_change = 0 716 717 grown = False 718 while table_grower.grow_point(img_gray, filtered_gray) is not None: 719 show_grower_progress() 720 grown = True 721 threshold = min(0.1 + 0.9 * threshold, original_threshold) 722 table_grower.set_threshold(threshold) 723 724 if not grown: 725 threshold *= 0.9 726 table_grower.set_threshold(threshold) 727 728 else: 729 threshold *= 0.9 730 table_grower.set_threshold(threshold) 731 732 if table_grower.grow_point(img_gray, filtered_gray) is not None: 733 show_grower_progress() 734 loops_without_change = 0 735 736 else: 737 table_grower.grow_table(img_gray, filtered_gray) 738 739 if smooth: 740 table_grower.smooth_grid(smooth_strength, smooth_iterations, smooth_degree) 741 corners = table_grower.get_all_corners() 742 logger.info( 743 f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns" 744 ) 745 # rescale corners back to original size 746 if self._scale != 1.0: 747 for y in range(len(corners)): 748 for x in range(len(corners[y])): 749 if corners[y][x] is not None: 750 corners[y][x] = ( 751 int(corners[y][x][0] / self._scale), # type:ignore 752 int(corners[y][x][1] / self._scale), # type:ignore 753 ) 754 755 return SegmentedTable(corners) # type: ignore 756 757 def _visualize_grid(self, img: MatLike, points: list[list[Point]]) -> None: 758 """Visualize the detected grid points.""" 759 all_points = [point for row in points for point in row] 760 drawn = imu.draw_points(img, all_points) 761 imu.show(drawn, wait=True) 762 763 def _visualize_path_finding( 764 self, 765 path: list[Point], 766 current: Point, 767 next_point: Point, 768 previous_row_target: Point | None = None, 769 region_center: Point | None = None, 770 region_size: int | None = None, 771 ) -> None: 772 """Visualize the path finding process for debugging.""" 773 global show_time 774 775 screen = imu.pop() 776 777 # if gray, convert to BGR 778 if len(screen.shape) == 2 or screen.shape[2] == 1: 779 debug_img = cv.cvtColor(screen, cv.COLOR_GRAY2BGR) 780 781 debug_img = imu.draw_points(debug_img, path, color=(200, 200, 0), thickness=2) 782 debug_img = imu.draw_points( 783 debug_img, [current], color=(0, 255, 0), thickness=3 784 ) 785 debug_img = imu.draw_points( 786 debug_img, [next_point], color=(0, 0, 255), thickness=2 787 ) 788 789 # Draw previous row target if available 790 if previous_row_target is not None: 791 debug_img = imu.draw_points( 792 debug_img, [previous_row_target], color=(255, 0, 255), thickness=2 793 ) 794 795 # Draw search region if available 796 if region_center is not None and region_size is not None: 797 top_left = ( 798 max(0, region_center[0] - region_size // 2), 799 max(0, region_center[1] - region_size // 2), 800 ) 801 bottom_right = ( 802 min(debug_img.shape[1], region_center[0] + region_size // 2), 803 min(debug_img.shape[0], region_center[1] + region_size // 2), 804 ) 805 cv.rectangle( 806 debug_img, 807 top_left, 808 bottom_right, 809 color=(255, 0, 0), 810 thickness=2, 811 lineType=cv.LINE_AA, 812 ) 813 814 imu.push(debug_img) 815 816 show_time += 1 817 if show_time % 10 != 1: 818 return 819 820 imu.show(debug_img, title="Next column point", wait=False) 821 # time.sleep(0.003) 822 823 @log_calls(level=logging.DEBUG, include_return=True) 824 def _astar( 825 self, 826 img: np.ndarray, 827 start: tuple[int, int], 828 goals: list[tuple[int, int]], 829 direction: str, 830 ) -> list[Point] | None: 831 """ 832 Find the best path between the start point and one of the goal points on the image. 833 834 Args: 835 img: Grayscale image to follow rules through. 836 start: Starting pixel ``(x, y)``. 837 goals: Candidate end pixels. 838 direction: Either ``"horizontal"`` or ``"vertical"``. 839 840 Returns: 841 list[Point] | None: Path from start to the closest reachable goal, 842 or ``None`` if no path exists. 843 """ 844 845 if not goals: 846 return None 847 848 if self._scale != 1.0: 849 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 850 start = (int(start[0] * self._scale), int(start[1] * self._scale)) 851 goals = [(int(g[0] * self._scale), int(g[1] * self._scale)) for g in goals] 852 853 # calculate bounding box with margin 854 all_points = [*goals, start] 855 xs = [p[0] for p in all_points] 856 ys = [p[1] for p in all_points] 857 858 margin = 30 859 top_left = (max(0, min(xs) - margin), max(0, min(ys) - margin)) 860 bottom_right = ( 861 min(img.shape[1], max(xs) + margin), 862 min(img.shape[0], max(ys) + margin), 863 ) 864 865 # check bounds 866 if ( 867 top_left[0] >= bottom_right[0] 868 or top_left[1] >= bottom_right[1] 869 or top_left[0] >= img.shape[1] 870 or top_left[1] >= img.shape[0] 871 ): 872 return None 873 874 # transform coordinates to cropped image 875 start_local = (start[0] - top_left[0], start[1] - top_left[1]) 876 goals_local = [(g[0] - top_left[0], g[1] - top_left[1]) for g in goals] 877 878 cropped = img[top_left[1] : bottom_right[1], top_left[0] : bottom_right[0]] 879 880 if cropped.size == 0: 881 return None 882 883 path = rust_astar(cropped, start_local, goals_local, direction) 884 885 if path is None: 886 return None 887 888 if self._scale != 1.0: 889 path = [(int(p[0] / self._scale), int(p[1] / self._scale)) for p in path] 890 top_left = (int(top_left[0] / self._scale), int(top_left[1] / self._scale)) 891 892 return [(p[0] + top_left[0], p[1] + top_left[1]) for p in path]
Detects table grid intersections using morphological filtering and template matching.
This detector implements a multi-stage pipeline:
- Binarization: Sauvola adaptive thresholding to handle varying lighting
- Morphological operations: Dilation to connect broken rule segments
- Cross-kernel matching: Template matching with a cross-shaped kernel to find rule intersections where horizontal and vertical lines meet
- Grid growing: Iterative point detection starting from a known seed point
The cross-kernel is designed to match the specific geometry of your table rules. It should be sized so that after morphology, it aligns with actual corner shapes.
Tuning Guidelines
- intersection_kernel_size: Increase if you need more selectivity (fewer false positives)
- line_thickness/line_thickness_horizontal: Should match rule thickness after morphology
- line_gap_fill: Increase to connect more broken lines, but this thickens rules
- binarization_sensitivity: Increase to threshold more aggressively (remove noise)
- search_radius: Increase for documents with more warping/distortion
- position_weight: Increase to prefer corners closer to expected positions
Visual Debugging
Set visual=True in methods to see intermediate results and tune parameters.
150 def __init__( 151 self, 152 intersection_kernel_size: int = 21, 153 line_thickness: int = 6, 154 line_thickness_horizontal: int | None = None, 155 line_gap_fill: int | None = None, 156 binarization_sensitivity: float = 0.04, 157 binarization_window: int = 15, 158 detection_scale: float = 1.0, 159 search_radius: int = 40, 160 position_weight: float = 0.4, 161 pathfinding_threshold: float = 0.2, 162 min_rows: int = 5, 163 detection_threshold: float = 0.3, 164 extrapolation_distance: int = 4, 165 growing_resets: int = 3, 166 reset_fraction: float = 0.5, 167 ): 168 """ 169 Args: 170 intersection_kernel_size (int): the size of the cross kernel 171 a larger kernel size often means that more penalty is applied, often leading 172 to more sparse results 173 line_thickness (int): the width of one of the edges in the cross filter, should be 174 roughly equal to the width of the rules in the image after morphology is applied 175 line_thickness_horizontal (int | None): useful if the horizontal rules and vertical rules 176 have different sizes 177 line_gap_fill (int | None): the size of the morphology operators that are applied before 178 the cross kernel. 'bridges the gaps' of broken-up lines 179 binarization_sensitivity (float): threshold parameter for sauvola thresholding 180 binarization_window (int): window_size parameter for sauvola thresholding 181 detection_scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 182 search_radius (int): area in which to search for a new max value in `find_nearest` etc. 183 position_weight (float): how much the point finding algorithm penalizes points that are further in the region [0, 1] 184 pathfinding_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skip astar pathfinding 185 min_rows (int): minimum number of rows to find before stopping the table finding algorithm 186 detection_threshold (float): the threshold for accepting a new point when growing the table 187 extrapolation_distance (int): how many points away to look when calculating the median slope 188 growing_resets (int): The amount of cuts (large deletions) to do in the grid during table growing 189 reset_fraction (float): The portion of the already-chosen corner points to delete during cutting 190 """ 191 self._validate_parameters( 192 intersection_kernel_size, 193 line_thickness, 194 line_thickness_horizontal, 195 line_gap_fill, 196 search_radius, 197 binarization_sensitivity, 198 binarization_window, 199 position_weight, 200 pathfinding_threshold, 201 growing_resets, 202 reset_fraction, 203 ) 204 205 self._intersection_kernel_size = intersection_kernel_size 206 self._line_thickness = line_thickness 207 self._line_thickness_horizontal = ( 208 line_thickness 209 if line_thickness_horizontal is None 210 else line_thickness_horizontal 211 ) 212 self._line_gap_fill = ( 213 line_gap_fill if line_gap_fill is not None else line_thickness 214 ) 215 self._search_radius = search_radius 216 self._binarization_sensitivity = binarization_sensitivity 217 self._binarization_window = binarization_window 218 self._position_weight = position_weight 219 self._scale = detection_scale 220 self._pathfinding_threshold = pathfinding_threshold 221 self._min_rows = min_rows 222 self._detection_threshold = detection_threshold 223 self._extrapolation_distance = extrapolation_distance 224 self._growing_resets = growing_resets 225 self._reset_fraction = reset_fraction 226 227 self._cross_kernel = self._create_cross_kernel()
Arguments:
- intersection_kernel_size (int): the size of the cross kernel a larger kernel size often means that more penalty is applied, often leading to more sparse results
- line_thickness (int): the width of one of the edges in the cross filter, should be roughly equal to the width of the rules in the image after morphology is applied
- line_thickness_horizontal (int | None): useful if the horizontal rules and vertical rules have different sizes
- line_gap_fill (int | None): the size of the morphology operators that are applied before the cross kernel. 'bridges the gaps' of broken-up lines
- binarization_sensitivity (float): threshold parameter for sauvola thresholding
- binarization_window (int): window_size parameter for sauvola thresholding
- detection_scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
- search_radius (int): area in which to search for a new max value in
find_nearestetc. - position_weight (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
- pathfinding_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skip astar pathfinding
- min_rows (int): minimum number of rows to find before stopping the table finding algorithm
- detection_threshold (float): the threshold for accepting a new point when growing the table
- extrapolation_distance (int): how many points away to look when calculating the median slope
- growing_resets (int): The amount of cuts (large deletions) to do in the grid during table growing
- reset_fraction (float): The portion of the already-chosen corner points to delete during cutting
347 def apply( 348 self, img: MatLike, visual: bool = False, visual_notebook: bool = False 349 ) -> MatLike: 350 """ 351 Apply the grid detection filter to the input image. 352 353 Args: 354 img (MatLike): the input image 355 visual (bool): whether to show intermediate steps via OpenCV windows 356 visual_notebook (bool): whether to show intermediate steps inline in a Jupyter notebook 357 358 Returns: 359 MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules 360 """ 361 362 if img is None or img.size == 0: 363 raise ValueError("Input image is empty or None") 364 365 binary = imu.sauvola( 366 img, k=self._binarization_sensitivity, window_size=self._binarization_window 367 ) 368 369 if visual: 370 imu.show(binary, title="thresholded") 371 if visual_notebook: 372 imu.show_notebook(binary, title="thresholded") 373 374 binary = self._apply_morphology(binary) 375 376 if visual: 377 imu.show(binary, title="dilated") 378 if visual_notebook: 379 imu.show_notebook(binary, title="dilated") 380 381 filtered = self._apply_cross_matching(binary) 382 383 return filtered
Apply the grid detection filter to the input image.
Arguments:
- img (MatLike): the input image
- visual (bool): whether to show intermediate steps via OpenCV windows
- visual_notebook (bool): whether to show intermediate steps inline in a Jupyter notebook
Returns:
MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
385 @log_calls(level=logging.DEBUG, include_return=True) 386 def find_nearest( 387 self, filtered: MatLike, point: Point, region: int | None = None 388 ) -> tuple[Point, float]: 389 """ 390 Find the nearest 'corner match' in the image, along with its score [0,1] 391 392 Args: 393 filtered (MatLike): the filtered image (obtained through `apply`) 394 point (tuple[int, int]): the approximate target point (x, y) 395 region (None | int): alternative value for search region, 396 overwriting the `__init__` parameter `search_radius` 397 398 Returns: 399 tuple[Point, float]: the best-matching pixel ``(x, y)`` and its 400 confidence in ``[0, 1]``. If the search window falls outside the 401 image, the input ``point`` is returned with confidence ``0.0``. 402 """ 403 404 if filtered is None or filtered.size == 0: 405 raise ValueError("Filtered image is empty or None") 406 407 region_size = region if region is not None else self._search_radius 408 x, y = point 409 410 # Calculate crop boundaries 411 crop_x = max(0, x - region_size // 2) 412 crop_y = max(0, y - region_size // 2) 413 crop_width = min(region_size, filtered.shape[1] - crop_x) 414 crop_height = min(region_size, filtered.shape[0] - crop_y) 415 416 # Handle edge cases 417 if crop_width <= 0 or crop_height <= 0: 418 logger.warning(f"Point {point} is outside image bounds") 419 return point, 0.0 420 421 cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] 422 423 if cropped.size == 0: 424 return point, 0.0 425 426 # Always apply Gaussian weighting by extending crop if needed 427 if cropped.shape[0] == region_size and cropped.shape[1] == region_size: 428 # Perfect size - apply weights directly 429 weights = self._create_gaussian_weights(region_size) 430 weighted = cropped.astype(np.float32) * weights 431 else: 432 # Extend crop to match region_size, apply weights, then restore 433 extended = np.zeros((region_size, region_size), dtype=cropped.dtype) 434 435 # Calculate offset to center the cropped region in extended array 436 offset_y = (region_size - cropped.shape[0]) // 2 437 offset_x = (region_size - cropped.shape[1]) // 2 438 439 # Place cropped region in center of extended array 440 extended[ 441 offset_y : offset_y + cropped.shape[0], 442 offset_x : offset_x + cropped.shape[1], 443 ] = cropped 444 445 # Apply Gaussian weights to extended array 446 weights = self._create_gaussian_weights(region_size) 447 weighted_extended = extended.astype(np.float32) * weights 448 449 # Extract the original region back out 450 weighted = weighted_extended[ 451 offset_y : offset_y + cropped.shape[0], 452 offset_x : offset_x + cropped.shape[1], 453 ] 454 455 best_idx = np.argmax(weighted) 456 best_y, best_x = np.unravel_index(best_idx, cropped.shape) 457 458 result_point = ( 459 int(crop_x + best_x), 460 int(crop_y + best_y), 461 ) 462 result_confidence = float(weighted[best_y, best_x]) / 255.0 463 464 return result_point, result_confidence
Find the nearest 'corner match' in the image, along with its score [0,1]
Arguments:
- filtered (MatLike): the filtered image (obtained through
apply) - point (tuple[int, int]): the approximate target point (x, y)
- region (None | int): alternative value for search region,
overwriting the
__init__parametersearch_radius
Returns:
tuple[Point, float]: the best-matching pixel
(x, y)and its confidence in[0, 1]. If the search window falls outside the image, the inputpointis returned with confidence0.0.
466 def detect_row_heights( 467 self, 468 img: MatLike, 469 filtered: MatLike, 470 top_row: list[Point | None], 471 min_row_height: int, 472 max_row_height: int, 473 path_scale: float = 0.25, 474 prominence: float = 18.0, 475 cluster_tolerance: int | None = None, 476 min_columns_for_rule: float = 0.4, 477 straight_cost: int = 10, 478 perpendicular_cost: int = 30, 479 darkness_divisor: int = 100, 480 ) -> list[int]: 481 """ 482 Detect variable row heights from the cross-correlation map by following 483 each vertical rule downward via A* and finding peaks of cross-correlation 484 along that path. 485 486 Args: 487 img: Original (full-resolution) table image. 488 filtered: Cross-correlation map produced by `apply()` (full resolution). 489 top_row: Top points of vertical rules in image space. ``None`` entries 490 (where header alignment failed for that rule) are skipped. 491 min_row_height: Minimum allowed row height in pixels. 492 max_row_height: Maximum allowed row height in pixels. 493 path_scale: Downscale factor used when running A* (purely for speed). 494 The detected path is rescaled back to full resolution for sampling. 495 prominence: Minimum peak value [0, 255] in the cross-correlation profile. 496 cluster_tolerance: Cross-column matching tolerance in pixels. 497 Defaults to ``min_row_height // 2``. 498 min_columns_for_rule: Fraction of columns that must agree on a peak. 499 straight_cost: A* cost per straight (down/up) step. 500 perpendicular_cost: A* cost per lateral step. Higher = stronger 501 straight-line bias. 502 darkness_divisor: A* image cost is ``pixel / darkness_divisor``. 503 Higher = lighter line bias. 504 505 Returns: 506 List of per-row heights (consecutive differences of detected offsets). 507 Empty if detection failed. 508 """ 509 valid_points = [(float(p[0]), float(p[1])) for p in top_row if p is not None] 510 if not valid_points: 511 return [] 512 513 gray = ensure_gray(img) 514 if path_scale != 1.0: 515 scaled_gray = cv.resize(gray, None, fx=path_scale, fy=path_scale) 516 else: 517 scaled_gray = gray 518 519 tol = ( 520 cluster_tolerance 521 if cluster_tolerance is not None 522 else max(1, min_row_height // 2) 523 ) 524 525 offsets = rust_detect_row_offsets( 526 filtered, 527 scaled_gray, 528 valid_points, 529 float(path_scale), 530 int(min_row_height), 531 int(max_row_height), 532 float(prominence), 533 int(tol), 534 float(min_columns_for_rule), 535 int(straight_cost), 536 int(perpendicular_cost), 537 int(darkness_divisor), 538 ) 539 540 if not offsets: 541 return [] 542 543 heights: list[int] = [offsets[0]] 544 for i in range(1, len(offsets)): 545 heights.append(offsets[i] - offsets[i - 1]) 546 return heights
Detect variable row heights from the cross-correlation map by following each vertical rule downward via A* and finding peaks of cross-correlation along that path.
Arguments:
- img: Original (full-resolution) table image.
- filtered: Cross-correlation map produced by
apply()(full resolution). - top_row: Top points of vertical rules in image space.
Noneentries (where header alignment failed for that rule) are skipped. - min_row_height: Minimum allowed row height in pixels.
- max_row_height: Maximum allowed row height in pixels.
- path_scale: Downscale factor used when running A* (purely for speed). The detected path is rescaled back to full resolution for sampling.
- prominence: Minimum peak value [0, 255] in the cross-correlation profile.
- cluster_tolerance: Cross-column matching tolerance in pixels.
Defaults to
min_row_height // 2. - min_columns_for_rule: Fraction of columns that must agree on a peak.
- straight_cost: A* cost per straight (down/up) step.
- perpendicular_cost: A* cost per lateral step. Higher = stronger straight-line bias.
- darkness_divisor: A* image cost is
pixel / darkness_divisor. Higher = lighter line bias.
Returns:
List of per-row heights (consecutive differences of detected offsets). Empty if detection failed.
548 def find_table_points( 549 self, 550 img: MatLike | PathLike[str], 551 top_row: list[Point | None], 552 cell_widths: list[int], 553 cell_heights: list[int] | int, 554 visual: bool = False, 555 visual_notebook: bool = False, 556 window: str = WINDOW, 557 goals_width: int | None = None, 558 filtered: MatLike | PathLike[str] | None = None, 559 smooth: bool = False, 560 smooth_strength: float = 0.5, 561 smooth_iterations: int = 1, 562 smooth_degree: int = 1, 563 ) -> "SegmentedTable": 564 """ 565 Parse the image to a `SegmentedTable` structure that holds all of the 566 intersections between horizontal and vertical rules, starting near the `left_top` point 567 568 Args: 569 img (MatLike): the input image of a table 570 top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching) 571 cell_widths (list[int]): the expected widths of the cells (based on a header template) 572 cell_heights (list[int]): the expected height of the rows of data. 573 The last value from this list is used until the image has no more vertical space. 574 visual (bool): whether to show intermediate steps 575 window (str): the name of the OpenCV window to use for visualization 576 goals_width (int | None): the width of the goal region when searching for the next point. 577 If None, defaults to 1.5 * search_radius 578 filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of 579 calculating the filtered image from scratch 580 smooth (bool): if True, smooth the grid after detection 581 smooth_strength (float): blend factor per smoothing iteration (0.0-1.0). Default: 0.5 582 smooth_iterations (int): number of smoothing passes. Default: 3 583 smooth_degree (int): polynomial degree for smoothing regression (1 or 2). Default: 1 584 585 Returns: 586 a SegmentedTable object 587 """ 588 589 if goals_width is None: 590 goals_width = self._search_radius * 3 // 2 591 592 if not cell_widths: 593 raise ValueError("cell_widths must contain at least one value") 594 595 if not isinstance(img, np.ndarray): 596 tmp_img = cv.imread(os.fspath(cast(PathLike[str], img))) 597 assert tmp_img is not None 598 img = tmp_img 599 600 img = cast(MatLike, img) 601 602 if filtered is None: 603 filtered = self.apply(img, visual, visual_notebook) 604 else: 605 if not isinstance(filtered, np.ndarray): 606 filtered = cv.imread(os.fspath(filtered)) 607 608 filtered = ensure_gray(filtered) 609 610 if visual: 611 imu.show(filtered, window=window) 612 if visual_notebook: 613 imu.show_notebook(filtered, title="filtered") 614 615 if isinstance(cell_heights, int): 616 cell_heights = [cell_heights] 617 618 for i in range(len(top_row)): 619 if top_row[i] is None: 620 continue 621 622 point = top_row[i] 623 assert point is not None 624 adjusted, confidence = self.find_nearest( 625 filtered, point, int(self._search_radius * 2) 626 ) 627 628 if confidence < 0.15: 629 top_row[i] = None 630 else: 631 top_row[i] = adjusted 632 633 if not any(top_row): 634 logger.error("No good starting candidates given") 635 636 # resize all parameters according to scale 637 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 638 639 if visual: 640 imu.push(img) 641 642 filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale) 643 cell_widths = [int(w * self._scale) for w in cell_widths] 644 cell_heights = [int(h * self._scale) for h in cell_heights] 645 top_row = [ 646 (int(p[0] * self._scale), int(p[1] * self._scale)) 647 if p is not None 648 else None 649 for p in top_row 650 ] 651 search_radius = int(self._search_radius * self._scale) 652 653 img_gray = ensure_gray(img) 654 filtered_gray = ensure_gray(filtered) 655 656 table_grower = TableGrower( 657 filtered_gray, 658 cell_widths, 659 cell_heights, 660 top_row, 661 search_radius, 662 self._position_weight, 663 self._extrapolation_distance, 664 self._detection_threshold, 665 self._pathfinding_threshold, 666 self._min_rows, 667 self._growing_resets, 668 self._reset_fraction, 669 ) 670 671 def show_grower_progress(wait: bool = False): 672 img_orig = np.copy(img) 673 corners = table_grower.get_all_corners() 674 for y in range(len(corners)): 675 for x in range(len(corners[y])): 676 if corners[y][x] is not None: 677 img_orig = imu.draw_points( 678 img_orig, 679 [corners[y][x]], # type:ignore 680 color=(0, 0, 255), 681 thickness=30, 682 ) 683 684 edge = table_grower.get_edge_points() 685 686 for point, score in edge: 687 color = (100, int(clamp(score * 255, 0, 255)), 100) 688 imu.draw_point(img_orig, point, color=color, thickness=20) 689 690 imu.show(img_orig, wait=wait) 691 692 if visual: 693 threshold = self._detection_threshold 694 695 # python implementation of rust loops, for visualization purposes 696 # note this is a LOT slower 697 while table_grower.grow_point(img_gray, filtered_gray) is not None: 698 show_grower_progress() 699 700 show_grower_progress(True) 701 702 original_threshold = threshold 703 704 loops_without_change = 0 705 706 while not table_grower.is_table_complete(): 707 loops_without_change += 1 708 709 if loops_without_change > 50: 710 break 711 712 if table_grower.extrapolate_one(img_gray, filtered_gray) is not None: 713 show_grower_progress() 714 715 loops_without_change = 0 716 717 grown = False 718 while table_grower.grow_point(img_gray, filtered_gray) is not None: 719 show_grower_progress() 720 grown = True 721 threshold = min(0.1 + 0.9 * threshold, original_threshold) 722 table_grower.set_threshold(threshold) 723 724 if not grown: 725 threshold *= 0.9 726 table_grower.set_threshold(threshold) 727 728 else: 729 threshold *= 0.9 730 table_grower.set_threshold(threshold) 731 732 if table_grower.grow_point(img_gray, filtered_gray) is not None: 733 show_grower_progress() 734 loops_without_change = 0 735 736 else: 737 table_grower.grow_table(img_gray, filtered_gray) 738 739 if smooth: 740 table_grower.smooth_grid(smooth_strength, smooth_iterations, smooth_degree) 741 corners = table_grower.get_all_corners() 742 logger.info( 743 f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns" 744 ) 745 # rescale corners back to original size 746 if self._scale != 1.0: 747 for y in range(len(corners)): 748 for x in range(len(corners[y])): 749 if corners[y][x] is not None: 750 corners[y][x] = ( 751 int(corners[y][x][0] / self._scale), # type:ignore 752 int(corners[y][x][1] / self._scale), # type:ignore 753 ) 754 755 return SegmentedTable(corners) # type: ignore
Parse the image to a SegmentedTable structure that holds all of the
intersections between horizontal and vertical rules, starting near the left_top point
Arguments:
- img (MatLike): the input image of a table
- top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
- cell_widths (list[int]): the expected widths of the cells (based on a header template)
- cell_heights (list[int]): the expected height of the rows of data. The last value from this list is used until the image has no more vertical space.
- visual (bool): whether to show intermediate steps
- window (str): the name of the OpenCV window to use for visualization
- goals_width (int | None): the width of the goal region when searching for the next point. If None, defaults to 1.5 * search_radius
- filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of calculating the filtered image from scratch
- smooth (bool): if True, smooth the grid after detection
- smooth_strength (float): blend factor per smoothing iteration (0.0-1.0). Default: 0.5
- smooth_iterations (int): number of smoothing passes. Default: 3
- smooth_degree (int): polynomial degree for smoothing regression (1 or 2). Default: 1
Returns:
a SegmentedTable object
95class TableIndexer(ABC): 96 """ 97 Abstract base class for table cell indexing and cropping. 98 99 Subclasses (`SegmentedTable`, `TableTemplate`) implement the `cols`, `rows`, 100 and `cell_polygon` interface. This base provides shared methods for 101 mapping pixel coordinates to cell indices and cropping cells/regions. 102 """ 103 104 def __init__(self): 105 self._col_offset = 0 106 107 @property 108 def col_offset(self) -> int: 109 """Column offset applied when reporting cell coordinates.""" 110 return self._col_offset 111 112 @col_offset.setter 113 def col_offset(self, value: int): 114 assert value >= 0 115 self._col_offset = value 116 117 @property 118 @abstractmethod 119 def cols(self) -> int: 120 """Total number of cell columns.""" 121 pass 122 123 @property 124 @abstractmethod 125 def rows(self) -> int: 126 """Total number of cell rows.""" 127 pass 128 129 def cells(self) -> Generator[tuple[int, int]]: 130 """ 131 Generate all cell indices in row-major order. 132 133 Yields (row, col) tuples for every cell in the table, iterating 134 through each row from left to right, top to bottom. 135 136 Yields: 137 tuple[int, int]: Cell indices as (row, col). 138 139 Example: 140 >>> for row, col in grid.cells(): 141 ... cell_img = grid.crop_cell(image, (row, col)) 142 ... process(cell_img) 143 """ 144 for row in range(self.rows): 145 for col in range(self.cols): 146 yield (row, col) 147 148 def _check_row_idx(self, row: int): 149 if row < 0: 150 raise TauluException("row number needs to be positive or zero") 151 if row >= self.rows: 152 raise TauluException(f"row number too high: {row} >= {self.rows}") 153 154 def _check_col_idx(self, col: int): 155 if col < 0: 156 raise TauluException("col number needs to be positive or zero") 157 if col >= self.cols: 158 raise TauluException(f"col number too high: {col} >= {self.cols}") 159 160 @abstractmethod 161 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 162 """ 163 Returns the coordinate (row, col) of the cell that contains the given position 164 165 Args: 166 point (tuple[float, float]): a location in the input image 167 168 Returns: 169 tuple[int, int]: the cell index (row, col) that contains the given point 170 """ 171 pass 172 173 @abstractmethod 174 def cell_polygon( 175 self, cell: tuple[int, int] 176 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 177 """ 178 Return the polygon (used e.g. with OpenCV) that encloses ``cell``. 179 180 Args: 181 cell: Cell indices as ``(row, col)``. 182 183 Returns: 184 Four ``(x, y)`` points in order: top-left, top-right, 185 bottom-right, bottom-left. 186 """ 187 pass 188 189 def _highlight_cell( 190 self, 191 image: MatLike, 192 cell: tuple[int, int], 193 color: tuple[int, int, int] = (0, 0, 255), 194 thickness: int = 2, 195 ): 196 polygon = self.cell_polygon(cell) 197 points = np.int32(list(polygon)) # type:ignore 198 cv.polylines(image, [points], True, color, thickness, cv.LINE_AA) 199 cv.putText( 200 image, 201 str(cell), 202 (int(polygon[3][0] + 10), int(polygon[3][1] - 10)), 203 cv.FONT_HERSHEY_PLAIN, 204 2.0, 205 (255, 255, 255), 206 2, 207 ) 208 209 def highlight_all_cells( 210 self, 211 image: MatLike | os.PathLike[str] | str, 212 color: tuple[int, int, int] = (0, 0, 255), 213 thickness: int = 1, 214 ) -> MatLike: 215 """ 216 Return a copy of ``image`` with every cell outlined and labeled. 217 218 Args: 219 image: Source image (path or array, BGR). 220 color: BGR color of the outline. 221 thickness: Line thickness in pixels. 222 223 Returns: 224 MatLike: a new image with all cells highlighted. 225 """ 226 if not isinstance(image, np.ndarray): 227 image = cv.imread(os.fspath(image)) # ty:ignore 228 img = np.copy(image) 229 230 for cell in self.cells(): 231 self._highlight_cell(img, cell, color, thickness) 232 233 return img 234 235 def select_one_cell( 236 self, 237 image: MatLike, 238 window: str = WINDOW, 239 color: tuple[int, int, int] = (255, 0, 0), 240 thickness: int = 2, 241 ) -> tuple[int, int] | None: 242 """ 243 Open an OpenCV window and let the user click one cell. 244 245 Args: 246 image: Source image (BGR). 247 window: OpenCV window name. 248 color: Highlight color in BGR. 249 thickness: Outline thickness in pixels. 250 251 Returns: 252 tuple[int, int] | None: ``(row, col)`` of the clicked cell, or 253 ``None`` if the window was closed without a valid click. 254 """ 255 clicked = None 256 257 def click_event(event, x, y, flags, params): 258 nonlocal clicked 259 260 img = np.copy(image) 261 _ = flags 262 _ = params 263 if event == cv.EVENT_LBUTTONDOWN: 264 cell = self.cell((x, y)) 265 if cell[0] >= 0: 266 clicked = cell 267 else: 268 return 269 self._highlight_cell(img, cell, color, thickness) 270 cv.imshow(window, img) 271 272 imu.show(image, click_event=click_event, title="select one cell", window=window) 273 274 return clicked 275 276 def show_cells( 277 self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW 278 ) -> list[tuple[int, int]] | ShowCellsSession: 279 """ 280 Interactively display and highlight table cells. 281 282 In standard environments, shows an OpenCV window where clicking highlights cells. 283 In Jupyter notebooks, returns a ShowCellsSession and displays using matplotlib. 284 285 Args: 286 image: Source image (path or array). 287 window: OpenCV window name (ignored in notebooks). 288 289 Returns: 290 list[tuple[int, int]]: Clicked cell indices (non-notebook). 291 ShowCellsSession: Session object with .cells property (notebook). 292 293 Example: 294 >>> # Standard Python 295 >>> cells = grid.show_cells("table.png") 296 >>> 297 >>> # Jupyter Notebook 298 >>> session = grid.show_cells("table.png") 299 >>> # ... click cells ... 300 >>> cells = session.cells 301 """ 302 if not isinstance(image, np.ndarray): 303 image = cv.imread(os.fspath(image)) # ty:ignore 304 305 def running_in_notebook() -> bool: 306 try: 307 from IPython import get_ipython 308 309 ip = get_ipython() 310 return ip is not None and "IPKernelApp" in ip.config 311 except Exception: 312 return False 313 314 use_notebook = running_in_notebook() 315 316 if use_notebook: 317 return self.show_cells_notebook(image) 318 else: 319 img = np.copy(image) 320 cells = [] 321 322 def click_event(event, x, y, flags, params): 323 _ = flags 324 _ = params 325 if event == cv.EVENT_LBUTTONDOWN: 326 cell = self.cell((x, y)) 327 if cell[0] >= 0: 328 cells.append(cell) 329 else: 330 return 331 self._highlight_cell(img, cell) 332 cv.imshow(window, img) 333 334 imu.show( 335 img, 336 click_event=click_event, 337 title="click to highlight cells", 338 window=window, 339 ) 340 341 return cells 342 343 def show_cells_notebook( 344 self, image: MatLike | os.PathLike[str] | str 345 ) -> ShowCellsSession: 346 """ 347 Notebook-compatible version of show_cells using matplotlib. 348 349 Returns a ShowCellsSession immediately. Click on cells to highlight them. 350 Access clicked cells via session.cells. 351 352 Args: 353 image: Source image (path or array). 354 355 Returns: 356 ShowCellsSession: Access .cells to get list of clicked cell indices. 357 358 Example: 359 >>> session = grid.show_cells_notebook("table.png") 360 >>> # Click cells in the interactive plot 361 >>> print(session.cells) # [(0, 0), (1, 2), ...] 362 """ 363 if not isinstance(image, np.ndarray): 364 tmp_image = cv.imread(os.fspath(image)) 365 assert tmp_image is not None 366 image = tmp_image 367 368 import ipywidgets as widgets 369 import matplotlib.pyplot as plt 370 from IPython.display import display 371 372 session = ShowCellsSession() 373 374 # Convert BGR to RGB for matplotlib 375 display_img = cv.cvtColor(image, cv.COLOR_BGR2RGB) 376 img_with_highlights = np.copy(display_img) 377 378 fig, ax = plt.subplots(figsize=(15, 12)) 379 fig.canvas.toolbar_visible = False # ty:ignore[unresolved-attribute] 380 fig.canvas.header_visible = False # ty:ignore[unresolved-attribute] 381 382 im_display = ax.imshow(img_with_highlights) 383 ax.set_title("Click cells to highlight them. Cells clicked: 0") 384 ax.set_axis_off() 385 386 # Create buttons 387 done_button = widgets.Button( 388 description="Done", 389 button_style="success", 390 layout=widgets.Layout(width="150px", height="50px"), 391 ) 392 clear_button = widgets.Button( 393 description="Clear All", 394 button_style="warning", 395 layout=widgets.Layout(width="150px", height="50px"), 396 ) 397 undo_button = widgets.Button( 398 description="Undo Last", 399 button_style="info", 400 layout=widgets.Layout(width="150px", height="50px"), 401 ) 402 403 done_button.style.font_size = "18px" 404 clear_button.style.font_size = "18px" 405 undo_button.style.font_size = "18px" 406 407 status_label = widgets.Label( 408 value="Click on cells to highlight them", style={"font_size": "18px"} 409 ) 410 411 def draw_highlight(cell_idx: tuple[int, int]): 412 """Draw a highlighted cell on the image.""" 413 polygon = self.cell_polygon(cell_idx) 414 points = np.array(list(polygon), dtype=np.int32) 415 416 # Draw polyline on the RGB image 417 cv.polylines( 418 img_with_highlights, 419 [points], 420 True, 421 (255, 0, 0), # Red in RGB 422 2, 423 cv.LINE_AA, 424 ) 425 426 # Draw cell index text 427 cv.putText( 428 img_with_highlights, 429 str(cell_idx), 430 (int(polygon[3][0] + 10), int(polygon[3][1] - 10)), 431 cv.FONT_HERSHEY_PLAIN, 432 2.0, 433 (255, 255, 255), # White in RGB 434 2, 435 ) 436 437 def redraw_all(): 438 """Redraw the image with all current highlights.""" 439 nonlocal img_with_highlights 440 img_with_highlights = np.copy(display_img) 441 442 for cell_idx in session._cells: 443 draw_highlight(cell_idx) 444 445 im_display.set_data(img_with_highlights) 446 ax.set_title( 447 f"Click cells to highlight them. Cells clicked: {len(session._cells)}" 448 ) 449 fig.canvas.draw_idle() 450 451 def on_click(event): 452 if event.inaxes != ax or event.xdata is None: 453 return 454 455 x, y = int(event.xdata), int(event.ydata) 456 457 if event.button == 1: # Left click 458 cell_idx = self.cell((x, y)) 459 if cell_idx[0] >= 0: 460 session._cells.append(cell_idx) 461 draw_highlight(cell_idx) 462 im_display.set_data(img_with_highlights) 463 ax.set_title( 464 f"Click cells to highlight them. Cells clicked: {len(session._cells)}" 465 ) 466 status_label.value = ( 467 f"Cell {cell_idx} highlighted. Total: {len(session._cells)}" 468 ) 469 fig.canvas.draw_idle() 470 else: 471 status_label.value = f"Click at ({x}, {y}) is outside table bounds" 472 473 def on_clear(_): 474 session._cells.clear() 475 redraw_all() 476 status_label.value = "All highlights cleared" 477 478 def on_undo(_): 479 if session._cells: 480 removed = session._cells.pop() 481 redraw_all() 482 status_label.value = ( 483 f"Removed cell {removed}. Remaining: {len(session._cells)}" 484 ) 485 else: 486 status_label.value = "No cells to undo" 487 488 def on_done(_): 489 fig.canvas.mpl_disconnect(cid) 490 done_button.disabled = True 491 clear_button.disabled = True 492 undo_button.disabled = True 493 ax.set_title(f"Done! {len(session._cells)} cells highlighted.") 494 status_label.value = "Complete! Access clicked cells via session.cells" 495 fig.canvas.draw_idle() 496 497 done_button.on_click(on_done) 498 clear_button.on_click(on_clear) 499 undo_button.on_click(on_undo) 500 501 cid = fig.canvas.mpl_connect("button_press_event", on_click) 502 503 plt.tight_layout(pad=0) 504 plt.show() 505 display(widgets.HBox([done_button, clear_button, undo_button, status_label])) 506 507 return session 508 509 @abstractmethod 510 def region( 511 self, 512 start: tuple[int, int], 513 end: tuple[int, int], 514 ) -> tuple[Point, Point, Point, Point]: 515 """ 516 Get the bounding polygon for the rectangular region from ``start`` to 517 ``end`` (both cells inclusive). 518 519 Args: 520 start: Top-left cell as ``(row, col)``. 521 end: Bottom-right cell as ``(row, col)``. 522 523 Returns: 524 Four points (lt, rt, rb, lb), each as ``(x, y)``. 525 """ 526 pass 527 528 def crop_region( 529 self, 530 image: MatLike, 531 start: tuple[int, int], 532 end: tuple[int, int], 533 margin: int = 0, 534 margin_top: int | None = None, 535 margin_bottom: int | None = None, 536 margin_left: int | None = None, 537 margin_right: int | None = None, 538 margin_y: int | None = None, 539 margin_x: int | None = None, 540 ) -> MatLike: 541 """ 542 Extract a multi-cell region from the image with perspective correction. 543 544 Crops the image to include all cells from start to end (inclusive), 545 applying a perspective transform to produce a rectangular output. 546 547 Args: 548 image: Source image (BGR or grayscale). 549 start: Top-left cell as (row, col). 550 end: Bottom-right cell as (row, col). 551 margin: Uniform margin in pixels (default 0). 552 margin_top: Override top margin. 553 margin_bottom: Override bottom margin. 554 margin_left: Override left margin. 555 margin_right: Override right margin. 556 margin_y: Override vertical margins (top and bottom). 557 margin_x: Override horizontal margins (left and right). 558 559 Returns: 560 Cropped and perspective-corrected image. 561 562 Example: 563 >>> # Extract a 3x2 region starting at cell (1, 0) 564 >>> region_img = grid.crop_region(image, (1, 0), (3, 1)) 565 """ 566 567 region = self.region(start, end) 568 569 lt, rt, rb, lb = _apply_margin( 570 *region, 571 margin=margin, 572 margin_top=margin_top, 573 margin_bottom=margin_bottom, 574 margin_left=margin_left, 575 margin_right=margin_right, 576 margin_y=margin_y, 577 margin_x=margin_x, 578 ) 579 580 # apply margins according to priority: 581 # margin_top > margin_y > margin (etc.) 582 583 w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2 584 h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2 585 586 # crop by doing a perspective transform to the desired quad 587 src_pts = np.array([lt, rt, rb, lb], dtype="float32") 588 dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32") 589 m = cv.getPerspectiveTransform(src_pts, dst_pts) 590 warped = cv.warpPerspective(image, m, (int(w), int(h))) 591 592 return warped 593 594 @abstractmethod 595 def text_regions( 596 self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0 597 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 598 """ 599 Split a row into spans of continuous text crossing column rules. 600 601 Args: 602 img: Source table image. 603 row: Row index to scan. 604 margin_x: Horizontal margin around each rule crop, in pixels. 605 margin_y: Vertical margin around each rule crop, in pixels. 606 607 Returns: 608 List of ``((row, start_col), (row, end_col))`` spans (inclusive). 609 """ 610 611 pass 612 613 def crop_cell( 614 self, 615 image, 616 cell: tuple[int, int], 617 margin: int = 0, 618 margin_top: int | None = None, 619 margin_bottom: int | None = None, 620 margin_left: int | None = None, 621 margin_right: int | None = None, 622 margin_y: int | None = None, 623 margin_x: int | None = None, 624 ) -> MatLike: 625 """ 626 Extract a single cell from the image with perspective correction. 627 628 Convenience method equivalent to `crop_region(image, cell, cell, margin)`. 629 630 Args: 631 image: Source image (BGR or grayscale). 632 cell: Cell indices as (row, col). 633 margin: Padding in pixels around the cell (default 0). 634 635 Returns: 636 Cropped and perspective-corrected cell image. 637 638 Example: 639 >>> cell_img = grid.crop_cell(image, (0, 0)) 640 >>> cv2.imwrite("cell_0_0.png", cell_img) 641 """ 642 return self.crop_region( 643 image, 644 cell, 645 cell, 646 margin, 647 margin_top, 648 margin_bottom, 649 margin_left, 650 margin_right, 651 margin_y, 652 margin_x, 653 )
Abstract base class for table cell indexing and cropping.
Subclasses (SegmentedTable, TableTemplate) implement the cols, rows,
and cell_polygon interface. This base provides shared methods for
mapping pixel coordinates to cell indices and cropping cells/regions.
107 @property 108 def col_offset(self) -> int: 109 """Column offset applied when reporting cell coordinates.""" 110 return self._col_offset
Column offset applied when reporting cell coordinates.
117 @property 118 @abstractmethod 119 def cols(self) -> int: 120 """Total number of cell columns.""" 121 pass
Total number of cell columns.
123 @property 124 @abstractmethod 125 def rows(self) -> int: 126 """Total number of cell rows.""" 127 pass
Total number of cell rows.
129 def cells(self) -> Generator[tuple[int, int]]: 130 """ 131 Generate all cell indices in row-major order. 132 133 Yields (row, col) tuples for every cell in the table, iterating 134 through each row from left to right, top to bottom. 135 136 Yields: 137 tuple[int, int]: Cell indices as (row, col). 138 139 Example: 140 >>> for row, col in grid.cells(): 141 ... cell_img = grid.crop_cell(image, (row, col)) 142 ... process(cell_img) 143 """ 144 for row in range(self.rows): 145 for col in range(self.cols): 146 yield (row, col)
Generate all cell indices in row-major order.
Yields (row, col) tuples for every cell in the table, iterating through each row from left to right, top to bottom.
Yields:
tuple[int, int]: Cell indices as (row, col).
Example:
>>> for row, col in grid.cells(): ... cell_img = grid.crop_cell(image, (row, col)) ... process(cell_img)
160 @abstractmethod 161 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 162 """ 163 Returns the coordinate (row, col) of the cell that contains the given position 164 165 Args: 166 point (tuple[float, float]): a location in the input image 167 168 Returns: 169 tuple[int, int]: the cell index (row, col) that contains the given point 170 """ 171 pass
Returns the coordinate (row, col) of the cell that contains the given position
Arguments:
- point (tuple[float, float]): a location in the input image
Returns:
tuple[int, int]: the cell index (row, col) that contains the given point
173 @abstractmethod 174 def cell_polygon( 175 self, cell: tuple[int, int] 176 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 177 """ 178 Return the polygon (used e.g. with OpenCV) that encloses ``cell``. 179 180 Args: 181 cell: Cell indices as ``(row, col)``. 182 183 Returns: 184 Four ``(x, y)`` points in order: top-left, top-right, 185 bottom-right, bottom-left. 186 """ 187 pass
Return the polygon (used e.g. with OpenCV) that encloses cell.
Arguments:
- cell: Cell indices as
(row, col).
Returns:
Four
(x, y)points in order: top-left, top-right, bottom-right, bottom-left.
209 def highlight_all_cells( 210 self, 211 image: MatLike | os.PathLike[str] | str, 212 color: tuple[int, int, int] = (0, 0, 255), 213 thickness: int = 1, 214 ) -> MatLike: 215 """ 216 Return a copy of ``image`` with every cell outlined and labeled. 217 218 Args: 219 image: Source image (path or array, BGR). 220 color: BGR color of the outline. 221 thickness: Line thickness in pixels. 222 223 Returns: 224 MatLike: a new image with all cells highlighted. 225 """ 226 if not isinstance(image, np.ndarray): 227 image = cv.imread(os.fspath(image)) # ty:ignore 228 img = np.copy(image) 229 230 for cell in self.cells(): 231 self._highlight_cell(img, cell, color, thickness) 232 233 return img
Return a copy of image with every cell outlined and labeled.
Arguments:
- image: Source image (path or array, BGR).
- color: BGR color of the outline.
- thickness: Line thickness in pixels.
Returns:
MatLike: a new image with all cells highlighted.
235 def select_one_cell( 236 self, 237 image: MatLike, 238 window: str = WINDOW, 239 color: tuple[int, int, int] = (255, 0, 0), 240 thickness: int = 2, 241 ) -> tuple[int, int] | None: 242 """ 243 Open an OpenCV window and let the user click one cell. 244 245 Args: 246 image: Source image (BGR). 247 window: OpenCV window name. 248 color: Highlight color in BGR. 249 thickness: Outline thickness in pixels. 250 251 Returns: 252 tuple[int, int] | None: ``(row, col)`` of the clicked cell, or 253 ``None`` if the window was closed without a valid click. 254 """ 255 clicked = None 256 257 def click_event(event, x, y, flags, params): 258 nonlocal clicked 259 260 img = np.copy(image) 261 _ = flags 262 _ = params 263 if event == cv.EVENT_LBUTTONDOWN: 264 cell = self.cell((x, y)) 265 if cell[0] >= 0: 266 clicked = cell 267 else: 268 return 269 self._highlight_cell(img, cell, color, thickness) 270 cv.imshow(window, img) 271 272 imu.show(image, click_event=click_event, title="select one cell", window=window) 273 274 return clicked
Open an OpenCV window and let the user click one cell.
Arguments:
- image: Source image (BGR).
- window: OpenCV window name.
- color: Highlight color in BGR.
- thickness: Outline thickness in pixels.
Returns:
tuple[int, int] | None:
(row, col)of the clicked cell, orNoneif the window was closed without a valid click.
276 def show_cells( 277 self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW 278 ) -> list[tuple[int, int]] | ShowCellsSession: 279 """ 280 Interactively display and highlight table cells. 281 282 In standard environments, shows an OpenCV window where clicking highlights cells. 283 In Jupyter notebooks, returns a ShowCellsSession and displays using matplotlib. 284 285 Args: 286 image: Source image (path or array). 287 window: OpenCV window name (ignored in notebooks). 288 289 Returns: 290 list[tuple[int, int]]: Clicked cell indices (non-notebook). 291 ShowCellsSession: Session object with .cells property (notebook). 292 293 Example: 294 >>> # Standard Python 295 >>> cells = grid.show_cells("table.png") 296 >>> 297 >>> # Jupyter Notebook 298 >>> session = grid.show_cells("table.png") 299 >>> # ... click cells ... 300 >>> cells = session.cells 301 """ 302 if not isinstance(image, np.ndarray): 303 image = cv.imread(os.fspath(image)) # ty:ignore 304 305 def running_in_notebook() -> bool: 306 try: 307 from IPython import get_ipython 308 309 ip = get_ipython() 310 return ip is not None and "IPKernelApp" in ip.config 311 except Exception: 312 return False 313 314 use_notebook = running_in_notebook() 315 316 if use_notebook: 317 return self.show_cells_notebook(image) 318 else: 319 img = np.copy(image) 320 cells = [] 321 322 def click_event(event, x, y, flags, params): 323 _ = flags 324 _ = params 325 if event == cv.EVENT_LBUTTONDOWN: 326 cell = self.cell((x, y)) 327 if cell[0] >= 0: 328 cells.append(cell) 329 else: 330 return 331 self._highlight_cell(img, cell) 332 cv.imshow(window, img) 333 334 imu.show( 335 img, 336 click_event=click_event, 337 title="click to highlight cells", 338 window=window, 339 ) 340 341 return cells
Interactively display and highlight table cells.
In standard environments, shows an OpenCV window where clicking highlights cells. In Jupyter notebooks, returns a ShowCellsSession and displays using matplotlib.
Arguments:
- image: Source image (path or array).
- window: OpenCV window name (ignored in notebooks).
Returns:
list[tuple[int, int]]: Clicked cell indices (non-notebook). ShowCellsSession: Session object with .cells property (notebook).
Example:
>>> # Standard Python >>> cells = grid.show_cells("table.png") >>> >>> # Jupyter Notebook >>> session = grid.show_cells("table.png") >>> # ... click cells ... >>> cells = session.cells
343 def show_cells_notebook( 344 self, image: MatLike | os.PathLike[str] | str 345 ) -> ShowCellsSession: 346 """ 347 Notebook-compatible version of show_cells using matplotlib. 348 349 Returns a ShowCellsSession immediately. Click on cells to highlight them. 350 Access clicked cells via session.cells. 351 352 Args: 353 image: Source image (path or array). 354 355 Returns: 356 ShowCellsSession: Access .cells to get list of clicked cell indices. 357 358 Example: 359 >>> session = grid.show_cells_notebook("table.png") 360 >>> # Click cells in the interactive plot 361 >>> print(session.cells) # [(0, 0), (1, 2), ...] 362 """ 363 if not isinstance(image, np.ndarray): 364 tmp_image = cv.imread(os.fspath(image)) 365 assert tmp_image is not None 366 image = tmp_image 367 368 import ipywidgets as widgets 369 import matplotlib.pyplot as plt 370 from IPython.display import display 371 372 session = ShowCellsSession() 373 374 # Convert BGR to RGB for matplotlib 375 display_img = cv.cvtColor(image, cv.COLOR_BGR2RGB) 376 img_with_highlights = np.copy(display_img) 377 378 fig, ax = plt.subplots(figsize=(15, 12)) 379 fig.canvas.toolbar_visible = False # ty:ignore[unresolved-attribute] 380 fig.canvas.header_visible = False # ty:ignore[unresolved-attribute] 381 382 im_display = ax.imshow(img_with_highlights) 383 ax.set_title("Click cells to highlight them. Cells clicked: 0") 384 ax.set_axis_off() 385 386 # Create buttons 387 done_button = widgets.Button( 388 description="Done", 389 button_style="success", 390 layout=widgets.Layout(width="150px", height="50px"), 391 ) 392 clear_button = widgets.Button( 393 description="Clear All", 394 button_style="warning", 395 layout=widgets.Layout(width="150px", height="50px"), 396 ) 397 undo_button = widgets.Button( 398 description="Undo Last", 399 button_style="info", 400 layout=widgets.Layout(width="150px", height="50px"), 401 ) 402 403 done_button.style.font_size = "18px" 404 clear_button.style.font_size = "18px" 405 undo_button.style.font_size = "18px" 406 407 status_label = widgets.Label( 408 value="Click on cells to highlight them", style={"font_size": "18px"} 409 ) 410 411 def draw_highlight(cell_idx: tuple[int, int]): 412 """Draw a highlighted cell on the image.""" 413 polygon = self.cell_polygon(cell_idx) 414 points = np.array(list(polygon), dtype=np.int32) 415 416 # Draw polyline on the RGB image 417 cv.polylines( 418 img_with_highlights, 419 [points], 420 True, 421 (255, 0, 0), # Red in RGB 422 2, 423 cv.LINE_AA, 424 ) 425 426 # Draw cell index text 427 cv.putText( 428 img_with_highlights, 429 str(cell_idx), 430 (int(polygon[3][0] + 10), int(polygon[3][1] - 10)), 431 cv.FONT_HERSHEY_PLAIN, 432 2.0, 433 (255, 255, 255), # White in RGB 434 2, 435 ) 436 437 def redraw_all(): 438 """Redraw the image with all current highlights.""" 439 nonlocal img_with_highlights 440 img_with_highlights = np.copy(display_img) 441 442 for cell_idx in session._cells: 443 draw_highlight(cell_idx) 444 445 im_display.set_data(img_with_highlights) 446 ax.set_title( 447 f"Click cells to highlight them. Cells clicked: {len(session._cells)}" 448 ) 449 fig.canvas.draw_idle() 450 451 def on_click(event): 452 if event.inaxes != ax or event.xdata is None: 453 return 454 455 x, y = int(event.xdata), int(event.ydata) 456 457 if event.button == 1: # Left click 458 cell_idx = self.cell((x, y)) 459 if cell_idx[0] >= 0: 460 session._cells.append(cell_idx) 461 draw_highlight(cell_idx) 462 im_display.set_data(img_with_highlights) 463 ax.set_title( 464 f"Click cells to highlight them. Cells clicked: {len(session._cells)}" 465 ) 466 status_label.value = ( 467 f"Cell {cell_idx} highlighted. Total: {len(session._cells)}" 468 ) 469 fig.canvas.draw_idle() 470 else: 471 status_label.value = f"Click at ({x}, {y}) is outside table bounds" 472 473 def on_clear(_): 474 session._cells.clear() 475 redraw_all() 476 status_label.value = "All highlights cleared" 477 478 def on_undo(_): 479 if session._cells: 480 removed = session._cells.pop() 481 redraw_all() 482 status_label.value = ( 483 f"Removed cell {removed}. Remaining: {len(session._cells)}" 484 ) 485 else: 486 status_label.value = "No cells to undo" 487 488 def on_done(_): 489 fig.canvas.mpl_disconnect(cid) 490 done_button.disabled = True 491 clear_button.disabled = True 492 undo_button.disabled = True 493 ax.set_title(f"Done! {len(session._cells)} cells highlighted.") 494 status_label.value = "Complete! Access clicked cells via session.cells" 495 fig.canvas.draw_idle() 496 497 done_button.on_click(on_done) 498 clear_button.on_click(on_clear) 499 undo_button.on_click(on_undo) 500 501 cid = fig.canvas.mpl_connect("button_press_event", on_click) 502 503 plt.tight_layout(pad=0) 504 plt.show() 505 display(widgets.HBox([done_button, clear_button, undo_button, status_label])) 506 507 return session
Notebook-compatible version of show_cells using matplotlib.
Returns a ShowCellsSession immediately. Click on cells to highlight them. Access clicked cells via session.cells.
Arguments:
- image: Source image (path or array).
Returns:
ShowCellsSession: Access .cells to get list of clicked cell indices.
Example:
>>> session = grid.show_cells_notebook("table.png") >>> # Click cells in the interactive plot >>> print(session.cells) # [(0, 0), (1, 2), ...]
509 @abstractmethod 510 def region( 511 self, 512 start: tuple[int, int], 513 end: tuple[int, int], 514 ) -> tuple[Point, Point, Point, Point]: 515 """ 516 Get the bounding polygon for the rectangular region from ``start`` to 517 ``end`` (both cells inclusive). 518 519 Args: 520 start: Top-left cell as ``(row, col)``. 521 end: Bottom-right cell as ``(row, col)``. 522 523 Returns: 524 Four points (lt, rt, rb, lb), each as ``(x, y)``. 525 """ 526 pass
Get the bounding polygon for the rectangular region from start to
end (both cells inclusive).
Arguments:
- start: Top-left cell as
(row, col). - end: Bottom-right cell as
(row, col).
Returns:
Four points (lt, rt, rb, lb), each as
(x, y).
528 def crop_region( 529 self, 530 image: MatLike, 531 start: tuple[int, int], 532 end: tuple[int, int], 533 margin: int = 0, 534 margin_top: int | None = None, 535 margin_bottom: int | None = None, 536 margin_left: int | None = None, 537 margin_right: int | None = None, 538 margin_y: int | None = None, 539 margin_x: int | None = None, 540 ) -> MatLike: 541 """ 542 Extract a multi-cell region from the image with perspective correction. 543 544 Crops the image to include all cells from start to end (inclusive), 545 applying a perspective transform to produce a rectangular output. 546 547 Args: 548 image: Source image (BGR or grayscale). 549 start: Top-left cell as (row, col). 550 end: Bottom-right cell as (row, col). 551 margin: Uniform margin in pixels (default 0). 552 margin_top: Override top margin. 553 margin_bottom: Override bottom margin. 554 margin_left: Override left margin. 555 margin_right: Override right margin. 556 margin_y: Override vertical margins (top and bottom). 557 margin_x: Override horizontal margins (left and right). 558 559 Returns: 560 Cropped and perspective-corrected image. 561 562 Example: 563 >>> # Extract a 3x2 region starting at cell (1, 0) 564 >>> region_img = grid.crop_region(image, (1, 0), (3, 1)) 565 """ 566 567 region = self.region(start, end) 568 569 lt, rt, rb, lb = _apply_margin( 570 *region, 571 margin=margin, 572 margin_top=margin_top, 573 margin_bottom=margin_bottom, 574 margin_left=margin_left, 575 margin_right=margin_right, 576 margin_y=margin_y, 577 margin_x=margin_x, 578 ) 579 580 # apply margins according to priority: 581 # margin_top > margin_y > margin (etc.) 582 583 w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2 584 h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2 585 586 # crop by doing a perspective transform to the desired quad 587 src_pts = np.array([lt, rt, rb, lb], dtype="float32") 588 dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32") 589 m = cv.getPerspectiveTransform(src_pts, dst_pts) 590 warped = cv.warpPerspective(image, m, (int(w), int(h))) 591 592 return warped
Extract a multi-cell region from the image with perspective correction.
Crops the image to include all cells from start to end (inclusive), applying a perspective transform to produce a rectangular output.
Arguments:
- image: Source image (BGR or grayscale).
- start: Top-left cell as (row, col).
- end: Bottom-right cell as (row, col).
- margin: Uniform margin in pixels (default 0).
- margin_top: Override top margin.
- margin_bottom: Override bottom margin.
- margin_left: Override left margin.
- margin_right: Override right margin.
- margin_y: Override vertical margins (top and bottom).
- margin_x: Override horizontal margins (left and right).
Returns:
Cropped and perspective-corrected image.
Example:
>>> # Extract a 3x2 region starting at cell (1, 0) >>> region_img = grid.crop_region(image, (1, 0), (3, 1))
594 @abstractmethod 595 def text_regions( 596 self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0 597 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 598 """ 599 Split a row into spans of continuous text crossing column rules. 600 601 Args: 602 img: Source table image. 603 row: Row index to scan. 604 margin_x: Horizontal margin around each rule crop, in pixels. 605 margin_y: Vertical margin around each rule crop, in pixels. 606 607 Returns: 608 List of ``((row, start_col), (row, end_col))`` spans (inclusive). 609 """ 610 611 pass
Split a row into spans of continuous text crossing column rules.
Arguments:
- img: Source table image.
- row: Row index to scan.
- margin_x: Horizontal margin around each rule crop, in pixels.
- margin_y: Vertical margin around each rule crop, in pixels.
Returns:
List of
((row, start_col), (row, end_col))spans (inclusive).
613 def crop_cell( 614 self, 615 image, 616 cell: tuple[int, int], 617 margin: int = 0, 618 margin_top: int | None = None, 619 margin_bottom: int | None = None, 620 margin_left: int | None = None, 621 margin_right: int | None = None, 622 margin_y: int | None = None, 623 margin_x: int | None = None, 624 ) -> MatLike: 625 """ 626 Extract a single cell from the image with perspective correction. 627 628 Convenience method equivalent to `crop_region(image, cell, cell, margin)`. 629 630 Args: 631 image: Source image (BGR or grayscale). 632 cell: Cell indices as (row, col). 633 margin: Padding in pixels around the cell (default 0). 634 635 Returns: 636 Cropped and perspective-corrected cell image. 637 638 Example: 639 >>> cell_img = grid.crop_cell(image, (0, 0)) 640 >>> cv2.imwrite("cell_0_0.png", cell_img) 641 """ 642 return self.crop_region( 643 image, 644 cell, 645 cell, 646 margin, 647 margin_top, 648 margin_bottom, 649 margin_left, 650 margin_right, 651 margin_y, 652 margin_x, 653 )
Extract a single cell from the image with perspective correction.
Convenience method equivalent to crop_region(image, cell, cell, margin).
Arguments:
- image: Source image (BGR or grayscale).
- cell: Cell indices as (row, col).
- margin: Padding in pixels around the cell (default 0).
Returns:
Cropped and perspective-corrected cell image.
Example:
>>> cell_img = grid.crop_cell(image, (0, 0)) >>> cv2.imwrite("cell_0_0.png", cell_img)
188class TableTemplate(TableIndexer): 189 """ 190 Defines the structure of a table header as a set of rules (lines). 191 192 Created via `TableTemplate.from_saved` (loading a JSON annotation) or 193 `AnnotationSession` (interactive annotation). Provides cell position 194 lookups and expected row heights for the grid-growing algorithm. 195 """ 196 197 def __init__(self, rules: Iterable[Iterable[int]]): 198 """ 199 Args: 200 rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1] 201 """ 202 203 super().__init__() 204 self._rules = [_Rule(*rule) for rule in rules] 205 self._h_rules = sorted( 206 [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y 207 ) 208 self._v_rules = sorted( 209 [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x 210 ) 211 212 @log_calls(level=logging.DEBUG) 213 def save(self, path: PathLike[str]): 214 """ 215 Save the TableTemplate as JSON. 216 217 Args: 218 path: Destination JSON file path. 219 """ 220 221 data = {"rules": [r.to_dict() for r in self._rules]} 222 223 with open(path, "w") as f: 224 json.dump(data, f) 225 226 @staticmethod 227 @log_calls(level=logging.DEBUG) 228 def from_saved(path: PathLike[str] | str) -> "TableTemplate": 229 """ 230 Load a TableTemplate from a JSON file produced by `save`. 231 232 Args: 233 path: Path to the JSON file. 234 235 Returns: 236 TableTemplate: the deserialized template. 237 """ 238 with open(path) as f: 239 data = json.load(f) 240 rules = data["rules"] 241 rules = [[r["x0"], r["y0"], r["x1"], r["y1"]] for r in rules] 242 243 return TableTemplate(rules) 244 245 @property 246 def cols(self) -> int: 247 return len(self._v_rules) - 1 248 249 @property 250 def rows(self) -> int: 251 return len(self._h_rules) - 1 252 253 @staticmethod 254 @log_calls(level=logging.DEBUG) 255 def annotate_image( 256 template: MatLike | str, 257 crop: PathLike[str] | str | None = None, 258 margin: int = 10, 259 ) -> "TableTemplate": 260 """ 261 Utility method that allows users to create a template from a template image. 262 263 The user is asked to click to annotate lines (two clicks per line). 264 265 Args: 266 template: the image on which to annotate the header lines 267 crop (str | None): if a path, crop the template image first then 268 do the annotation; the cropped image is written to this path. 269 margin (int): margin to add around the cropping of the header 270 271 Returns: 272 TableTemplate: a new template built from the annotated lines. 273 """ 274 275 if type(template) is str: 276 value = cv.imread(template) 277 assert value is not None 278 template = value 279 template = cast(MatLike, template) 280 281 if crop is not None: 282 cropped = TableTemplate._crop(template, margin) 283 cv.imwrite(os.fspath(crop), cropped) 284 template = cropped 285 286 start_point = None 287 lines: list[list[int]] = [] 288 289 anno_template = np.copy(template) 290 291 def get_point(event, x, y, flags, params): 292 nonlocal lines, start_point, anno_template 293 _ = flags 294 _ = params 295 if event == cv.EVENT_LBUTTONDOWN: 296 if start_point is not None: 297 line: list[int] = [start_point[1], start_point[0], x, y] 298 299 cv.line( 300 anno_template, 301 (start_point[1], start_point[0]), 302 (x, y), 303 (0, 255, 0), 304 2, 305 cv.LINE_AA, 306 ) 307 cv.imshow(constants.WINDOW, anno_template) 308 309 lines.append(line) 310 start_point = None 311 else: 312 start_point = (y, x) 313 elif event == cv.EVENT_RBUTTONDOWN: 314 start_point = None 315 316 # remove the last annotation 317 lines = lines[:-1] 318 319 anno_template = np.copy(anno_template) 320 321 for line in lines: 322 cv.line( 323 template, 324 (line[0], line[1]), 325 (line[2], line[3]), 326 (0, 255, 0), 327 2, 328 cv.LINE_AA, 329 ) 330 331 cv.imshow(constants.WINDOW, template) 332 333 print(ANNO_HELP) 334 335 imu.show(anno_template, get_point, title="annotate the header") 336 337 return TableTemplate(lines) 338 339 @staticmethod 340 @log_calls(level=logging.DEBUG) 341 def annotate_image_notebook( 342 template: MatLike | str, 343 crop: PathLike[str] | str | None = None, 344 margin: int = 10, 345 ) -> "AnnotationSession": 346 """ 347 Notebook-compatible version of annotate_image. Returns an AnnotationSession immediately. 348 Interact with the widget and click Done to finalize. 349 Access the result via session.result after clicking Done. 350 351 Args: 352 template: the image on which to annotate the header lines 353 crop (str | None): if str, crop the template image first, then do the annotation. 354 The cropped image will be stored at the supplied path 355 margin (int): margin to add around the cropping of the header 356 357 Returns: 358 AnnotationSession: access .result after clicking Done to get the TableTemplate. 359 """ 360 if isinstance(template, str): 361 tmp = cv.imread(template) 362 assert tmp is not None 363 template = tmp 364 365 session = AnnotationSession(crop) 366 367 if crop is not None: 368 # First show crop UI, then annotation UI 369 TableTemplate._crop_notebook(template, margin, session) 370 else: 371 # Go directly to annotation 372 TableTemplate._show_annotation_ui(template, session) 373 374 return session 375 376 @staticmethod 377 def _crop_notebook(template: MatLike, margin: int, session: "AnnotationSession"): 378 """Notebook-compatible crop UI using matplotlib + ipywidgets.""" 379 import ipywidgets as widgets 380 from IPython.display import display 381 382 display_img = cv.cvtColor(template, cv.COLOR_BGR2RGB) 383 384 points: list[tuple[int, int]] = [] 385 drawn_points: list = [] 386 387 fig, ax = plt.subplots(figsize=(15, 15)) 388 389 fig.canvas.toolbar_visible = False # ty:ignore[unresolved-attribute] 390 fig.canvas.header_visible = False # ty:ignore[unresolved-attribute] 391 392 ax.imshow(display_img, origin="upper") 393 ax.set_title( 394 "Annotate the header: \nClick 4 corners of the header region such that the entire header is contained within the rectangle." 395 ) 396 ax.set_axis_off() 397 398 # Create ipywidgets buttons 399 done_button = widgets.Button( 400 description="Done Cropping", 401 button_style="success", 402 layout=widgets.Layout(width="200px", height="50px"), 403 ) 404 405 undo_button = widgets.Button( 406 description="Undo Last Point", 407 button_style="warning", 408 layout=widgets.Layout(width="200px", height="50px"), 409 ) 410 411 done_button.style.font_size = "18px" 412 undo_button.style.font_size = "18px" 413 414 status_label = widgets.Label( 415 value="Press 'Done' when finished. Press 'Undo Last Point' to remove the last point.", 416 style={"font_size": "18px"}, 417 ) 418 419 def on_click(event): 420 if event.inaxes != ax or event.xdata is None or event.ydata is None: 421 return 422 423 # Round coordinates to integers for pixel-perfect annotation 424 x, y = round(event.xdata), round(event.ydata) 425 426 # Validate coordinates are within image bounds 427 img_h, img_w = template.shape[:2] 428 x = max(0, min(x, img_w - 1)) 429 y = max(0, min(y, img_h - 1)) 430 431 if event.button == 1: # Left click - add point 432 points.append((x, y)) 433 (point_marker,) = ax.plot(x, y, "go", markersize=10) 434 drawn_points.append(point_marker) 435 status_label.value = f"Points: {len(points)}/4" 436 fig.canvas.draw_idle() 437 438 def on_undo(_): 439 if points: 440 points.pop() 441 drawn_points.pop().remove() 442 status_label.value = f"Points: {len(points)}/4" 443 fig.canvas.draw_idle() 444 445 def on_done(_): 446 nonlocal cid 447 448 if len(points) != 4: 449 status_label.value = ( 450 f"Error: Need exactly 4 points! Currently have {len(points)}" 451 ) 452 return 453 454 fig.canvas.mpl_disconnect(cid) 455 456 # Crop the image 457 points_np = np.array(points) 458 img_h, img_w = template.shape[:2] 459 x_min = max(int(np.min(points_np[:, 0])) - margin, 0) 460 y_min = max(int(np.min(points_np[:, 1])) - margin, 0) 461 x_max = min(int(np.max(points_np[:, 0])) + margin, img_w) 462 y_max = min(int(np.max(points_np[:, 1])) + margin, img_h) 463 464 cropped = template[y_min:y_max, x_min:x_max] 465 466 # Save cropped image if path provided 467 if session._crop_path is not None: 468 cv.imwrite(os.fspath(session._crop_path), cropped) 469 470 plt.close(fig) 471 container.clear_output() 472 with container: 473 TableTemplate._show_annotation_ui(cropped, session) 474 475 done_button.on_click(on_done) 476 undo_button.on_click(on_undo) 477 478 cid = fig.canvas.mpl_connect("button_press_event", on_click) 479 480 # Anchor an Output widget to the cell, then render inside it so that 481 # on_done can clear and re-populate it without leaving the cell context. 482 container = widgets.Output() 483 display(container) 484 with container: 485 plt.tight_layout(pad=0) 486 plt.show() 487 display(widgets.HBox([done_button, undo_button, status_label])) 488 489 @staticmethod 490 def _show_annotation_ui(template: MatLike, session: "AnnotationSession"): 491 """Show the line annotation UI using matplotlib + ipywidgets.""" 492 import ipywidgets as widgets 493 from IPython.display import display 494 495 print( 496 "\x1b[32m[Taulu]: Don't forget to save annotations with annotation.save()!\x1b[0m" 497 ) 498 499 display_img = cv.cvtColor(template, cv.COLOR_BGR2RGB) 500 501 lines: list[list[int]] = [] 502 start_point: list[tuple[int, int] | None] = [None] 503 drawn_lines: list = [] 504 start_markers: list = [] 505 506 fig, ax = plt.subplots(figsize=(15, 12)) 507 fig.canvas.toolbar_visible = False # ty:ignore[unresolved-attribute] 508 fig.canvas.header_visible = False # ty:ignore[unresolved-attribute] 509 ax.imshow(display_img, origin="upper") 510 ax.set_title("Click pairs of points to draw lines. Lines: 0") 511 ax.set_axis_off() 512 513 # Create ipywidgets buttons 514 done_button = widgets.Button( 515 description="Done Annotating", 516 button_style="success", 517 layout=widgets.Layout(width="200px", height="50px"), 518 ) 519 undo_button = widgets.Button( 520 description="Undo Last Line", 521 button_style="warning", 522 layout=widgets.Layout(width="200px", height="50px"), 523 ) 524 status_label = widgets.Label( 525 value="Click to start a line, click again to end it", 526 style={"font_size": "18px"}, 527 ) 528 529 done_button.style.font_size = "18px" 530 undo_button.style.font_size = "18px" 531 532 def on_click(event): 533 if event.inaxes != ax or event.xdata is None or event.ydata is None: 534 return 535 536 # Round coordinates to integers for pixel-perfect annotation 537 x, y = round(event.xdata), round(event.ydata) 538 539 # Validate coordinates are within image bounds 540 img_h, img_w = template.shape[:2] 541 x = max(0, min(x, img_w - 1)) 542 y = max(0, min(y, img_h - 1)) 543 544 if event.button == 1: # Left click 545 if start_point[0] is not None: 546 x0, y0 = start_point[0] 547 lines.append([x0, y0, x, y]) 548 (ln,) = ax.plot([x0, x], [y0, y], color="lime", linewidth=2) 549 drawn_lines.append(ln) 550 # Remove the start-point marker now that the line is complete 551 if start_markers: 552 start_markers.pop().remove() 553 ax.set_title( 554 f"Click pairs of points to draw lines. Lines: {len(lines)}" 555 ) 556 status_label.value = ( 557 f"Line {len(lines)} added. Click to start next line." 558 ) 559 fig.canvas.draw_idle() 560 start_point[0] = None 561 else: 562 start_point[0] = (x, y) 563 status_label.value = ( 564 f"Start point set at ({x}, {y}). Click end point." 565 ) 566 # Draw a temporary marker (tracked so undo can remove it) 567 (marker,) = ax.plot(x, y, "ro", markersize=5) 568 start_markers.append(marker) 569 fig.canvas.draw_idle() 570 571 def on_undo(_): 572 # Clear any pending start-point marker 573 if start_markers: 574 start_markers.pop().remove() 575 start_point[0] = None 576 if lines: 577 lines.pop() 578 drawn_lines.pop().remove() 579 ax.set_title( 580 f"Click pairs of points to draw lines. Lines: {len(lines)}" 581 ) 582 status_label.value = f"Undone. Lines: {len(lines)}" 583 fig.canvas.draw_idle() 584 585 def on_done(_): 586 session._result = TableTemplate(lines) 587 fig.canvas.mpl_disconnect(cid) 588 done_button.disabled = True 589 undo_button.disabled = True 590 ax.set_title( 591 f"Done! {len(lines)} lines annotated. Call session.save() to save." 592 ) 593 status_label.value = ( 594 "Annotation complete! Run session.save('filename.json') to save." 595 ) 596 fig.canvas.draw_idle() 597 598 done_button.on_click(on_done) 599 undo_button.on_click(on_undo) 600 601 cid = fig.canvas.mpl_connect("button_press_event", on_click) 602 603 # Display figure first, then buttons below 604 plt.tight_layout(pad=0) 605 plt.show() 606 display(widgets.HBox([done_button, undo_button, status_label])) 607 608 @staticmethod 609 @log_calls(level=logging.DEBUG, include_return=True) 610 def _crop(template: MatLike, margin: int = 10) -> MatLike: 611 """ 612 Crop the image to contain only the annotations, such that it can be used as the header image in the taulu workflow. 613 """ 614 615 points = [] 616 anno_template = np.copy(template) 617 618 def get_point(event, x, y, flags, params): 619 nonlocal points, anno_template 620 _ = flags 621 _ = params 622 if event == cv.EVENT_LBUTTONDOWN: 623 point = (x, y) 624 625 cv.circle( 626 anno_template, 627 (x, y), 628 4, 629 (0, 255, 0), 630 2, 631 ) 632 cv.imshow(constants.WINDOW, anno_template) 633 634 points.append(point) 635 elif event == cv.EVENT_RBUTTONDOWN: 636 # remove the last annotation 637 points = points[:-1] 638 639 anno_template = np.copy(anno_template) 640 641 for p in points: 642 cv.circle( 643 anno_template, 644 p, 645 4, 646 (0, 255, 0), 647 2, 648 ) 649 650 cv.imshow(constants.WINDOW, anno_template) 651 652 print(CROP_HELP) 653 654 imu.show(anno_template, get_point, title="crop the header") 655 656 assert len(points) == 4, ( 657 "you need to annotate the four corners of the table in order to crop it" 658 ) 659 660 # crop the image to contain all of the points (just crop rectangularly, x, y, w, h) 661 # Convert points to numpy array 662 points_np = np.array(points) 663 664 # Find bounding box 665 x_min = np.min(points_np[:, 0]) 666 y_min = np.min(points_np[:, 1]) 667 x_max = np.max(points_np[:, 0]) 668 y_max = np.max(points_np[:, 1]) 669 670 # Compute width and height 671 width = x_max - x_min 672 height = y_max - y_min 673 674 # Ensure integers and within image boundaries 675 x_min = max(int(x_min), 0) 676 y_min = max(int(y_min), 0) 677 width = int(width) 678 height = int(height) 679 680 # Crop the image 681 cropped = template[ 682 y_min - margin : y_min + height + margin, 683 x_min - margin : x_min + width + margin, 684 ] 685 686 return cropped 687 688 @staticmethod 689 def from_vgg_annotation(annotation: str) -> "TableTemplate": 690 """ 691 Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool. 692 693 Args: 694 annotation (str): the path of the annotation csv file 695 696 Returns: 697 TableTemplate: a new template built from the polyline annotations. 698 """ 699 700 rules = [] 701 with open(annotation) as csvfile: 702 reader = csv.DictReader(csvfile) 703 for row in reader: 704 shape_attributes = json.loads(row["region_shape_attributes"]) 705 if shape_attributes["name"] == "polyline": 706 x_points = shape_attributes["all_points_x"] 707 y_points = shape_attributes["all_points_y"] 708 if len(x_points) == 2 and len(y_points) == 2: 709 rules.append( 710 [x_points[0], y_points[0], x_points[1], y_points[1]] 711 ) 712 713 return TableTemplate(rules) 714 715 def cell_width(self, i: int) -> int: 716 """Width of the ``i``-th cell column, in template pixels.""" 717 self._check_col_idx(i) 718 return int(self._v_rules[i + 1]._x - self._v_rules[i]._x) 719 720 def cell_widths(self, start: int = 0) -> list[int]: 721 """Widths of every cell column starting at ``start``, in template pixels.""" 722 return [self.cell_width(i) for i in range(start, self.cols)] 723 724 def cell_height(self, header_factor: float = 0.8) -> int: 725 """Estimated body row height as ``header_factor`` times the header height.""" 726 return int((self._h_rules[1]._y - self._h_rules[0]._y) * header_factor) 727 728 def cell_heights(self, header_factors: list[float] | float) -> list[int]: 729 """Body row heights, one per element in ``header_factors`` (a single 730 float is broadcast to a 1-element list).""" 731 if isinstance(header_factors, float): 732 header_factors = [header_factors] 733 header_factors = cast(list, header_factors) 734 return [ 735 int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors 736 ] 737 738 def intersection(self, index: tuple[int, int]) -> tuple[float, float]: 739 """ 740 Get the intersection point of a horizontal and vertical rule. 741 742 Args: 743 index: ``(h, v)`` indices into the horizontal and vertical rule lists. 744 745 Returns: 746 tuple[float, float]: the ``(x, y)`` intersection in template pixels. 747 """ 748 749 ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]]) 750 assert ints is not None 751 return ints 752 753 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 754 """ 755 Get the cell index (row, col) that corresponds with the point (x, y) in the template image 756 757 Args: 758 point (tuple[float, float]): the coordinates in the template image 759 760 Returns: 761 tuple[int, int]: (row, col) 762 """ 763 764 x, y = point 765 766 row = -1 767 col = -1 768 769 for i in range(self.rows): 770 y0 = self._h_rules[i]._y_at_x(x) 771 y1 = self._h_rules[i + 1]._y_at_x(x) 772 if min(y0, y1) <= y <= max(y0, y1): 773 row = i 774 break 775 776 for i in range(self.cols): 777 x0 = self._v_rules[i]._x_at_y(y) 778 x1 = self._v_rules[i + 1]._x_at_y(y) 779 if min(x0, x1) <= x <= max(x0, x1): 780 col = i 781 break 782 783 if row == -1 or col == -1: 784 return (-1, -1) 785 786 return (row, col) 787 788 def cell_polygon( 789 self, cell: tuple[int, int] 790 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 791 """ 792 Return the four corner points enclosing ``cell`` in the order 793 (top-left, top-right, bottom-right, bottom-left). 794 795 Args: 796 cell: Cell indices as ``(row, col)``. 797 798 Returns: 799 Tuple of four ``(x, y)`` points in template-pixel coordinates. 800 801 Raises: 802 TauluException: if the surrounding rules do not intersect. 803 """ 804 805 row, col = cell 806 807 self._check_col_idx(col) 808 self._check_row_idx(row) 809 810 top_rule = self._h_rules[row] 811 bottom_rule = self._h_rules[row + 1] 812 left_rule = self._v_rules[col] 813 right_rule = self._v_rules[col + 1] 814 815 # Calculate corner points using intersections 816 top_left = top_rule.intersection(left_rule) 817 top_right = top_rule.intersection(right_rule) 818 bottom_left = bottom_rule.intersection(left_rule) 819 bottom_right = bottom_rule.intersection(right_rule) 820 821 if not all( 822 point is not None 823 for point in [top_left, top_right, bottom_left, bottom_right] 824 ): 825 raise TauluException("the lines around this cell do not intersect") 826 827 return top_left, top_right, bottom_right, bottom_left # type:ignore 828 829 def region( 830 self, start: tuple[int, int], end: tuple[int, int] 831 ) -> tuple[Point, Point, Point, Point]: 832 """ 833 Bounding polygon of the rectangular range of cells from ``start`` to 834 ``end`` (both inclusive), as ``(lt, rt, rb, lb)`` integer pixel points. 835 """ 836 self._check_row_idx(start[0]) 837 self._check_row_idx(end[0]) 838 self._check_col_idx(start[1]) 839 self._check_col_idx(end[1]) 840 841 # the rules that surround this row 842 top_rule = self._h_rules[start[0]] 843 bottom_rule = self._h_rules[end[0] + 1] 844 left_rule = self._v_rules[start[1]] 845 right_rule = self._v_rules[end[1] + 1] 846 847 # four points that will be the bounding polygon of the result, 848 # which needs to be rectified 849 top_left = top_rule.intersection(left_rule) 850 top_right = top_rule.intersection(right_rule) 851 bottom_left = bottom_rule.intersection(left_rule) 852 bottom_right = bottom_rule.intersection(right_rule) 853 854 if ( 855 top_left is None 856 or top_right is None 857 or bottom_left is None 858 or bottom_right is None 859 ): 860 raise TauluException("the lines around this row do not intersect properly") 861 862 def to_point(pnt) -> Point: 863 return (int(pnt[0]), int(pnt[1])) 864 865 return ( 866 to_point(top_left), 867 to_point(top_right), 868 to_point(bottom_right), 869 to_point(bottom_left), 870 ) 871 872 def text_regions( 873 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20 874 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 875 """Not supported on a `TableTemplate`; always raises `TauluException`.""" 876 raise TauluException("text_regions should not be called on a TableTemplate")
Defines the structure of a table header as a set of rules (lines).
Created via TableTemplate.from_saved (loading a JSON annotation) or
AnnotationSession (interactive annotation). Provides cell position
lookups and expected row heights for the grid-growing algorithm.
197 def __init__(self, rules: Iterable[Iterable[int]]): 198 """ 199 Args: 200 rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1] 201 """ 202 203 super().__init__() 204 self._rules = [_Rule(*rule) for rule in rules] 205 self._h_rules = sorted( 206 [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y 207 ) 208 self._v_rules = sorted( 209 [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x 210 )
Arguments:
- rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
212 @log_calls(level=logging.DEBUG) 213 def save(self, path: PathLike[str]): 214 """ 215 Save the TableTemplate as JSON. 216 217 Args: 218 path: Destination JSON file path. 219 """ 220 221 data = {"rules": [r.to_dict() for r in self._rules]} 222 223 with open(path, "w") as f: 224 json.dump(data, f)
Save the TableTemplate as JSON.
Arguments:
- path: Destination JSON file path.
226 @staticmethod 227 @log_calls(level=logging.DEBUG) 228 def from_saved(path: PathLike[str] | str) -> "TableTemplate": 229 """ 230 Load a TableTemplate from a JSON file produced by `save`. 231 232 Args: 233 path: Path to the JSON file. 234 235 Returns: 236 TableTemplate: the deserialized template. 237 """ 238 with open(path) as f: 239 data = json.load(f) 240 rules = data["rules"] 241 rules = [[r["x0"], r["y0"], r["x1"], r["y1"]] for r in rules] 242 243 return TableTemplate(rules)
Load a TableTemplate from a JSON file produced by save.
Arguments:
- path: Path to the JSON file.
Returns:
TableTemplate: the deserialized template.
253 @staticmethod 254 @log_calls(level=logging.DEBUG) 255 def annotate_image( 256 template: MatLike | str, 257 crop: PathLike[str] | str | None = None, 258 margin: int = 10, 259 ) -> "TableTemplate": 260 """ 261 Utility method that allows users to create a template from a template image. 262 263 The user is asked to click to annotate lines (two clicks per line). 264 265 Args: 266 template: the image on which to annotate the header lines 267 crop (str | None): if a path, crop the template image first then 268 do the annotation; the cropped image is written to this path. 269 margin (int): margin to add around the cropping of the header 270 271 Returns: 272 TableTemplate: a new template built from the annotated lines. 273 """ 274 275 if type(template) is str: 276 value = cv.imread(template) 277 assert value is not None 278 template = value 279 template = cast(MatLike, template) 280 281 if crop is not None: 282 cropped = TableTemplate._crop(template, margin) 283 cv.imwrite(os.fspath(crop), cropped) 284 template = cropped 285 286 start_point = None 287 lines: list[list[int]] = [] 288 289 anno_template = np.copy(template) 290 291 def get_point(event, x, y, flags, params): 292 nonlocal lines, start_point, anno_template 293 _ = flags 294 _ = params 295 if event == cv.EVENT_LBUTTONDOWN: 296 if start_point is not None: 297 line: list[int] = [start_point[1], start_point[0], x, y] 298 299 cv.line( 300 anno_template, 301 (start_point[1], start_point[0]), 302 (x, y), 303 (0, 255, 0), 304 2, 305 cv.LINE_AA, 306 ) 307 cv.imshow(constants.WINDOW, anno_template) 308 309 lines.append(line) 310 start_point = None 311 else: 312 start_point = (y, x) 313 elif event == cv.EVENT_RBUTTONDOWN: 314 start_point = None 315 316 # remove the last annotation 317 lines = lines[:-1] 318 319 anno_template = np.copy(anno_template) 320 321 for line in lines: 322 cv.line( 323 template, 324 (line[0], line[1]), 325 (line[2], line[3]), 326 (0, 255, 0), 327 2, 328 cv.LINE_AA, 329 ) 330 331 cv.imshow(constants.WINDOW, template) 332 333 print(ANNO_HELP) 334 335 imu.show(anno_template, get_point, title="annotate the header") 336 337 return TableTemplate(lines)
Utility method that allows users to create a template from a template image.
The user is asked to click to annotate lines (two clicks per line).
Arguments:
- template: the image on which to annotate the header lines
- crop (str | None): if a path, crop the template image first then do the annotation; the cropped image is written to this path.
- margin (int): margin to add around the cropping of the header
Returns:
TableTemplate: a new template built from the annotated lines.
339 @staticmethod 340 @log_calls(level=logging.DEBUG) 341 def annotate_image_notebook( 342 template: MatLike | str, 343 crop: PathLike[str] | str | None = None, 344 margin: int = 10, 345 ) -> "AnnotationSession": 346 """ 347 Notebook-compatible version of annotate_image. Returns an AnnotationSession immediately. 348 Interact with the widget and click Done to finalize. 349 Access the result via session.result after clicking Done. 350 351 Args: 352 template: the image on which to annotate the header lines 353 crop (str | None): if str, crop the template image first, then do the annotation. 354 The cropped image will be stored at the supplied path 355 margin (int): margin to add around the cropping of the header 356 357 Returns: 358 AnnotationSession: access .result after clicking Done to get the TableTemplate. 359 """ 360 if isinstance(template, str): 361 tmp = cv.imread(template) 362 assert tmp is not None 363 template = tmp 364 365 session = AnnotationSession(crop) 366 367 if crop is not None: 368 # First show crop UI, then annotation UI 369 TableTemplate._crop_notebook(template, margin, session) 370 else: 371 # Go directly to annotation 372 TableTemplate._show_annotation_ui(template, session) 373 374 return session
Notebook-compatible version of annotate_image. Returns an AnnotationSession immediately. Interact with the widget and click Done to finalize. Access the result via session.result after clicking Done.
Arguments:
- template: the image on which to annotate the header lines
- crop (str | None): if str, crop the template image first, then do the annotation. The cropped image will be stored at the supplied path
- margin (int): margin to add around the cropping of the header
Returns:
AnnotationSession: access .result after clicking Done to get the TableTemplate.
688 @staticmethod 689 def from_vgg_annotation(annotation: str) -> "TableTemplate": 690 """ 691 Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool. 692 693 Args: 694 annotation (str): the path of the annotation csv file 695 696 Returns: 697 TableTemplate: a new template built from the polyline annotations. 698 """ 699 700 rules = [] 701 with open(annotation) as csvfile: 702 reader = csv.DictReader(csvfile) 703 for row in reader: 704 shape_attributes = json.loads(row["region_shape_attributes"]) 705 if shape_attributes["name"] == "polyline": 706 x_points = shape_attributes["all_points_x"] 707 y_points = shape_attributes["all_points_y"] 708 if len(x_points) == 2 and len(y_points) == 2: 709 rules.append( 710 [x_points[0], y_points[0], x_points[1], y_points[1]] 711 ) 712 713 return TableTemplate(rules)
Create a TableTemplate from annotations made in vgg, using the polylines tool.
Arguments:
- annotation (str): the path of the annotation csv file
Returns:
TableTemplate: a new template built from the polyline annotations.
715 def cell_width(self, i: int) -> int: 716 """Width of the ``i``-th cell column, in template pixels.""" 717 self._check_col_idx(i) 718 return int(self._v_rules[i + 1]._x - self._v_rules[i]._x)
Width of the i-th cell column, in template pixels.
720 def cell_widths(self, start: int = 0) -> list[int]: 721 """Widths of every cell column starting at ``start``, in template pixels.""" 722 return [self.cell_width(i) for i in range(start, self.cols)]
Widths of every cell column starting at start, in template pixels.
724 def cell_height(self, header_factor: float = 0.8) -> int: 725 """Estimated body row height as ``header_factor`` times the header height.""" 726 return int((self._h_rules[1]._y - self._h_rules[0]._y) * header_factor)
Estimated body row height as header_factor times the header height.
728 def cell_heights(self, header_factors: list[float] | float) -> list[int]: 729 """Body row heights, one per element in ``header_factors`` (a single 730 float is broadcast to a 1-element list).""" 731 if isinstance(header_factors, float): 732 header_factors = [header_factors] 733 header_factors = cast(list, header_factors) 734 return [ 735 int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors 736 ]
Body row heights, one per element in header_factors (a single
float is broadcast to a 1-element list).
738 def intersection(self, index: tuple[int, int]) -> tuple[float, float]: 739 """ 740 Get the intersection point of a horizontal and vertical rule. 741 742 Args: 743 index: ``(h, v)`` indices into the horizontal and vertical rule lists. 744 745 Returns: 746 tuple[float, float]: the ``(x, y)`` intersection in template pixels. 747 """ 748 749 ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]]) 750 assert ints is not None 751 return ints
Get the intersection point of a horizontal and vertical rule.
Arguments:
- index:
(h, v)indices into the horizontal and vertical rule lists.
Returns:
tuple[float, float]: the
(x, y)intersection in template pixels.
753 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 754 """ 755 Get the cell index (row, col) that corresponds with the point (x, y) in the template image 756 757 Args: 758 point (tuple[float, float]): the coordinates in the template image 759 760 Returns: 761 tuple[int, int]: (row, col) 762 """ 763 764 x, y = point 765 766 row = -1 767 col = -1 768 769 for i in range(self.rows): 770 y0 = self._h_rules[i]._y_at_x(x) 771 y1 = self._h_rules[i + 1]._y_at_x(x) 772 if min(y0, y1) <= y <= max(y0, y1): 773 row = i 774 break 775 776 for i in range(self.cols): 777 x0 = self._v_rules[i]._x_at_y(y) 778 x1 = self._v_rules[i + 1]._x_at_y(y) 779 if min(x0, x1) <= x <= max(x0, x1): 780 col = i 781 break 782 783 if row == -1 or col == -1: 784 return (-1, -1) 785 786 return (row, col)
Get the cell index (row, col) that corresponds with the point (x, y) in the template image
Arguments:
- point (tuple[float, float]): the coordinates in the template image
Returns:
tuple[int, int]: (row, col)
788 def cell_polygon( 789 self, cell: tuple[int, int] 790 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 791 """ 792 Return the four corner points enclosing ``cell`` in the order 793 (top-left, top-right, bottom-right, bottom-left). 794 795 Args: 796 cell: Cell indices as ``(row, col)``. 797 798 Returns: 799 Tuple of four ``(x, y)`` points in template-pixel coordinates. 800 801 Raises: 802 TauluException: if the surrounding rules do not intersect. 803 """ 804 805 row, col = cell 806 807 self._check_col_idx(col) 808 self._check_row_idx(row) 809 810 top_rule = self._h_rules[row] 811 bottom_rule = self._h_rules[row + 1] 812 left_rule = self._v_rules[col] 813 right_rule = self._v_rules[col + 1] 814 815 # Calculate corner points using intersections 816 top_left = top_rule.intersection(left_rule) 817 top_right = top_rule.intersection(right_rule) 818 bottom_left = bottom_rule.intersection(left_rule) 819 bottom_right = bottom_rule.intersection(right_rule) 820 821 if not all( 822 point is not None 823 for point in [top_left, top_right, bottom_left, bottom_right] 824 ): 825 raise TauluException("the lines around this cell do not intersect") 826 827 return top_left, top_right, bottom_right, bottom_left # type:ignore
Return the four corner points enclosing cell in the order
(top-left, top-right, bottom-right, bottom-left).
Arguments:
- cell: Cell indices as
(row, col).
Returns:
Tuple of four
(x, y)points in template-pixel coordinates.
Raises:
- TauluException: if the surrounding rules do not intersect.
829 def region( 830 self, start: tuple[int, int], end: tuple[int, int] 831 ) -> tuple[Point, Point, Point, Point]: 832 """ 833 Bounding polygon of the rectangular range of cells from ``start`` to 834 ``end`` (both inclusive), as ``(lt, rt, rb, lb)`` integer pixel points. 835 """ 836 self._check_row_idx(start[0]) 837 self._check_row_idx(end[0]) 838 self._check_col_idx(start[1]) 839 self._check_col_idx(end[1]) 840 841 # the rules that surround this row 842 top_rule = self._h_rules[start[0]] 843 bottom_rule = self._h_rules[end[0] + 1] 844 left_rule = self._v_rules[start[1]] 845 right_rule = self._v_rules[end[1] + 1] 846 847 # four points that will be the bounding polygon of the result, 848 # which needs to be rectified 849 top_left = top_rule.intersection(left_rule) 850 top_right = top_rule.intersection(right_rule) 851 bottom_left = bottom_rule.intersection(left_rule) 852 bottom_right = bottom_rule.intersection(right_rule) 853 854 if ( 855 top_left is None 856 or top_right is None 857 or bottom_left is None 858 or bottom_right is None 859 ): 860 raise TauluException("the lines around this row do not intersect properly") 861 862 def to_point(pnt) -> Point: 863 return (int(pnt[0]), int(pnt[1])) 864 865 return ( 866 to_point(top_left), 867 to_point(top_right), 868 to_point(bottom_right), 869 to_point(bottom_left), 870 )
Bounding polygon of the rectangular range of cells from start to
end (both inclusive), as (lt, rt, rb, lb) integer pixel points.
872 def text_regions( 873 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20 874 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 875 """Not supported on a `TableTemplate`; always raises `TauluException`.""" 876 raise TauluException("text_regions should not be called on a TableTemplate")
Not supported on a TableTemplate; always raises TauluException.
42class Taulu: 43 """ 44 High-level API for table segmentation from images. 45 46 Taulu orchestrates header alignment, grid detection, and table segmentation 47 into a single workflow. 48 49 Workflow: 50 1. Create annotated header images via `Taulu.annotate()` 51 2. Initialize Taulu with header(s) and parameters 52 3. Call `segment_table()` to get a `SegmentedTable` with cell boundaries 53 54 For two-page tables, use `Split[T]` to provide different parameters for 55 left and right sides. 56 57 Example: 58 >>> import cv2 59 >>> from taulu import Taulu 60 >>> Taulu.annotate("table_image.png", "header.png") 61 >>> taulu = Taulu("header.png") 62 >>> grid = taulu.segment_table("table_page_01.png") 63 >>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0)) 64 """ 65 66 def __init__( 67 self, 68 template_path: Splittable[PathLike[str]] | Splittable[str], 69 row_height_factor: Splittable[float] | Splittable[list[float]] | None = None, 70 annotation_path: Splittable[PathLike[str]] | Splittable[str] | None = None, 71 binarization_sensitivity: Splittable[float] = 0.25, 72 search_radius: Splittable[int] = 60, 73 position_weight: Splittable[float] = 0.4, 74 line_thickness: Splittable[int] = 10, 75 line_gap_fill: Splittable[int] = 4, 76 intersection_kernel_size: Splittable[int] = 41, 77 detection_scale: Splittable[float] = 1.0, 78 pathfinding_threshold: Splittable[float] = 0.2, 79 min_rows: Splittable[int] = 5, 80 extrapolation_distance: Splittable[int] = 3, 81 detection_threshold: Splittable[float] = 0.3, 82 smooth: bool = False, 83 smooth_strength: float = 0.5, 84 smooth_iterations: int = 1, 85 smooth_degree: int = 1, 86 growing_resets: Splittable[int] = 0, 87 reset_fraction: Splittable[float] = 0.5, 88 feature_detector: Splittable[FeatureDetector] = "akaze", 89 matching_scale: float = 1.0, 90 auto_row_heights: bool = False, 91 min_row_height_factor: Splittable[float] = 0.5, 92 max_row_height_factor: Splittable[float] = 1.5, 93 row_detection_path_scale: float = 0.25, 94 ): 95 """ 96 Args: 97 template_path: Path to header template image(s). Use `Split` for two-page tables. 98 row_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0] 99 annotation_path: Explicit annotation JSON path. Default: inferred from image path. 100 binarization_sensitivity: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25 101 search_radius: Corner search area in pixels. Default: 60 102 position_weight: Position penalty weight [0, 1]. Default: 0.4 103 line_thickness: Cross-kernel width matching line thickness. Default: 10 104 line_gap_fill: Morphological dilation size. Default: 4 105 intersection_kernel_size: Cross-kernel size (odd). Default: 41 106 detection_scale: Image downscale factor (0, 1]. Default: 1.0 107 pathfinding_threshold: Confidence to skip A* pathfinding. Default: 0.2 108 min_rows: Minimum rows before completion. Default: 5 109 extrapolation_distance: Rows to examine for extrapolation. Default: 3 110 detection_threshold: Corner acceptance confidence [0, 1]. Default: 0.3 111 smooth: Apply grid smoothing after detection. Default: False 112 smooth_strength: Blend factor per smoothing iteration (0.0-1.0). Default: 0.5 113 smooth_iterations: Number of smoothing passes. Default: 1 114 smooth_degree: Polynomial degree for smoothing regression (1 or 2). Default: 1 115 growing_resets: Number of grid cuts during growing. Default: 0 116 reset_fraction: Fraction of points to delete per cut. Default: 0.5 117 feature_detector: Feature matching method for header alignment. One of "orb" 118 (fast, patent-free), "sift" (robust, uses FLANN), or "akaze" (robust, 119 patent-free). Default: "akaze" 120 matching_scale: Downscale factor (0, 1] for header alignment only. Lower 121 values speed up feature matching. Default: 1.0 122 auto_row_heights: If True, detect variable per-row heights from the 123 cross-correlation map at runtime (overriding `row_height_factor`). 124 Default: False 125 min_row_height_factor: Minimum row height as a fraction of the header 126 height when `auto_row_heights` is enabled. Default: 0.5 127 max_row_height_factor: Maximum row height as a fraction of the header 128 height when `auto_row_heights` is enabled. Default: 1.5 129 row_detection_path_scale: Downscale factor (0, 1] for the A* path 130 following used by `auto_row_heights`. Lower = faster, less precise. 131 Default: 0.25 132 """ 133 self._detection_scale = detection_scale 134 self._auto_row_heights = auto_row_heights 135 self._min_row_height_factor = min_row_height_factor 136 self._max_row_height_factor = max_row_height_factor 137 self._row_detection_path_scale = row_detection_path_scale 138 self._smooth = smooth 139 self._smooth_strength = smooth_strength 140 self._smooth_iterations = smooth_iterations 141 self._smooth_degree = smooth_degree 142 143 if row_height_factor is None: 144 row_height_factor = [1.0] 145 146 self._row_height_factor = row_height_factor 147 148 if isinstance(template_path, Split) or isinstance(annotation_path, Split): 149 header = Split(Path(template_path.left), Path(template_path.right)) # ty:ignore[unresolved-attribute] 150 151 if not exists(header.left.with_suffix(".png")) or not exists( 152 header.right.with_suffix(".png") 153 ): 154 raise TauluException( 155 "The header images you provided do not exist (or they aren't .png files)" 156 ) 157 158 if annotation_path is None: 159 if not exists(header.left.with_suffix(".json")) or not exists( 160 header.right.with_suffix(".json") 161 ): 162 raise TauluException( 163 "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method" 164 ) 165 166 template_left = TableTemplate.from_saved( 167 header.left.with_suffix(".json") 168 ) 169 template_right = TableTemplate.from_saved( 170 header.right.with_suffix(".json") 171 ) 172 173 else: 174 if not exists(annotation_path.left) or not exists( # ty: ignore[unresolved-attribute] 175 annotation_path.right # ty: ignore[unresolved-attribute] 176 ): 177 raise TauluException( 178 "The header annotation files you provided do not exist (or they aren't .json files)" 179 ) 180 181 template_left = TableTemplate.from_saved(annotation_path.left) # ty: ignore[unresolved-attribute] 182 template_right = TableTemplate.from_saved(annotation_path.right) # ty: ignore[unresolved-attribute] 183 184 self._header = Split( 185 cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right)) 186 ) 187 188 self._aligner = Split( 189 TemplateMatcher( 190 self._header.left, 191 method=get_param(feature_detector, "left"), 192 scale=matching_scale, 193 ), 194 TemplateMatcher( 195 self._header.right, 196 method=get_param(feature_detector, "right"), 197 scale=matching_scale, 198 ), 199 ) 200 201 self._template = Split(template_left, template_right) 202 203 self._cell_heights = Split( 204 self._template.left.cell_heights(get_param(row_height_factor, "left")), 205 self._template.right.cell_heights( 206 get_param(row_height_factor, "right") 207 ), 208 ) 209 210 # Create TableDetector for left and right with potentially different parameters 211 self._grid_detector = Split( 212 TableDetector( 213 intersection_kernel_size=get_param( 214 intersection_kernel_size, "left" 215 ), 216 line_thickness=get_param(line_thickness, "left"), 217 line_gap_fill=get_param(line_gap_fill, "left"), 218 search_radius=get_param(search_radius, "left"), 219 binarization_sensitivity=get_param( 220 binarization_sensitivity, "left" 221 ), 222 position_weight=get_param(position_weight, "left"), 223 detection_scale=get_param(self._detection_scale, "left"), 224 pathfinding_threshold=get_param(pathfinding_threshold, "left"), 225 min_rows=get_param(min_rows, "left"), 226 extrapolation_distance=get_param(extrapolation_distance, "left"), 227 detection_threshold=get_param(detection_threshold, "left"), 228 growing_resets=get_param(growing_resets, "left"), 229 reset_fraction=get_param(reset_fraction, "left"), 230 ), 231 TableDetector( 232 intersection_kernel_size=get_param( 233 intersection_kernel_size, "right" 234 ), 235 line_thickness=get_param(line_thickness, "right"), 236 line_gap_fill=get_param(line_gap_fill, "right"), 237 search_radius=get_param(search_radius, "right"), 238 binarization_sensitivity=get_param( 239 binarization_sensitivity, "right" 240 ), 241 position_weight=get_param(position_weight, "right"), 242 detection_scale=get_param(self._detection_scale, "right"), 243 pathfinding_threshold=get_param(pathfinding_threshold, "right"), 244 min_rows=get_param(min_rows, "right"), 245 extrapolation_distance=get_param(extrapolation_distance, "right"), 246 detection_threshold=get_param(detection_threshold, "right"), 247 growing_resets=get_param(growing_resets, "right"), 248 reset_fraction=get_param(reset_fraction, "right"), 249 ), 250 ) 251 252 else: 253 template_path = Path(template_path) 254 self._header = cv2.imread(os.fspath(template_path)) 255 self._aligner = TemplateMatcher( 256 self._header, 257 method=cast(FeatureDetector, feature_detector), 258 scale=matching_scale, 259 ) 260 self._template = TableTemplate.from_saved( 261 template_path.with_suffix(".json") 262 ) 263 264 # For single header, parameters should not be Split objects 265 if any( 266 isinstance(param, Split) 267 for param in [ 268 binarization_sensitivity, 269 search_radius, 270 position_weight, 271 line_thickness, 272 line_gap_fill, 273 intersection_kernel_size, 274 detection_scale, 275 min_rows, 276 extrapolation_distance, 277 detection_threshold, 278 row_height_factor, 279 growing_resets, 280 reset_fraction, 281 feature_detector, 282 ] 283 ): 284 raise TauluException( 285 "Split parameters can only be used with split headers (tuple header_path)" 286 ) 287 288 self._cell_heights = self._template.cell_heights( 289 cast(list[float] | float, self._row_height_factor) 290 ) 291 292 self._grid_detector = TableDetector( 293 intersection_kernel_size=intersection_kernel_size, # ty: ignore 294 line_thickness=line_thickness, # ty: ignore 295 line_gap_fill=line_gap_fill, # ty: ignore 296 search_radius=search_radius, # ty: ignore 297 binarization_sensitivity=binarization_sensitivity, # ty: ignore 298 position_weight=position_weight, # ty: ignore 299 detection_scale=self._detection_scale, # ty: ignore 300 pathfinding_threshold=pathfinding_threshold, # ty: ignore 301 min_rows=min_rows, # ty: ignore 302 extrapolation_distance=extrapolation_distance, # ty: ignore 303 detection_threshold=detection_threshold, # ty: ignore 304 growing_resets=growing_resets, # ty:ignore 305 reset_fraction=reset_fraction, # ty:ignore 306 ) 307 308 @classmethod 309 def from_config(cls, config: TauluConfig) -> "Taulu": 310 """ 311 Create a :class:`Taulu` instance from a :class:`~taulu.config.TauluConfig`. 312 313 Args: 314 config: A populated :class:`~taulu.config.TauluConfig` instance. 315 316 Returns: 317 A :class:`Taulu` instance configured according to ``config``. 318 319 Example: 320 >>> from taulu import Taulu 321 >>> from taulu.config import TauluConfig 322 >>> config = TauluConfig.from_toml("my_table.toml") 323 >>> taulu = Taulu.from_config(config) 324 """ 325 return cls(**{name: getattr(config, name) for name in config.model_fields}) 326 327 @staticmethod 328 def annotate( 329 image_path: PathLike[str] | str, 330 output_path: PathLike[str] | str, 331 *, 332 backend: Literal["auto", "gui", "notebook"] = "auto", 333 ): 334 """ 335 Interactive tool to create header annotations for table segmentation. 336 337 This method guides you through a two-step annotation process: 338 339 1. **Crop the header**: Click four corners to define the header region 340 2. **Annotate lines**: Click pairs of points to define each vertical and 341 horizontal line in the header 342 343 The annotations are saved as: 344 - A cropped header image (.png) at `output_path` 345 - A JSON file (.json) containing line coordinates 346 347 ## Annotation Guidelines 348 349 **Which lines to annotate:** 350 - All vertical lines that extend into the table body (column separators) 351 - The top horizontal line of the header 352 - The bottom horizontal line of the header (top of data rows) 353 354 **Order doesn't matter** - annotate lines in any order that's convenient. 355 356 **To annotate a line:** 357 1. Click once at one endpoint 358 2. Click again at the other endpoint 359 3. A green line appears showing your annotation 360 361 **To undo:** 362 - Right-click anywhere to remove the last line you drew 363 364 **When finished:** 365 - Press 'n' to save and exit 366 - Press 'q' to quit without saving 367 368 Args: 369 image_path (PathLike[str] | str): Path to a table image containing 370 a clear view of the header. This can be a full table image. 371 output_path (PathLike[str] | str): Where to save the cropped header 372 image. The annotation JSON will be saved with the same name but 373 .json extension. 374 375 Raises: 376 TauluException: If image_path doesn't exist or output_path is a directory 377 378 Examples: 379 Annotate a single header: 380 381 >>> from taulu import Taulu 382 >>> Taulu.annotate("scan_page_01.png", "header.png") 383 # Interactive window opens 384 # After annotation: creates header.png and header.json 385 386 Annotate left and right headers for a split table: 387 388 >>> Taulu.annotate("scan_page_01.png", "header_left.png") 389 >>> Taulu.annotate("scan_page_01.png", "header_right.png") 390 # Creates header_left.{png,json} and header_right.{png,json} 391 392 Notes: 393 - The header image doesn't need to be perfectly cropped initially - 394 the tool will help you crop it precisely 395 - Annotation accuracy is important: misaligned lines will cause 396 segmentation errors 397 - You can re-run this method to update annotations if needed 398 """ 399 400 if not exists(image_path): 401 raise TauluException(f"Image path {image_path} does not exist") 402 403 if os.path.isdir(output_path): 404 raise TauluException("Output path should be a file") 405 406 output_path = Path(output_path) 407 408 def running_in_notebook() -> bool: 409 try: 410 from IPython import get_ipython 411 412 ip = get_ipython() 413 return ip is not None and "IPKernelApp" in ip.config 414 except Exception: 415 return False 416 417 # Decide backend 418 if backend not in ("auto", "gui", "notebook"): 419 raise TauluException("backend must be one of: 'auto', 'gui', 'notebook'") 420 if backend == "auto": 421 use_notebook = running_in_notebook() 422 else: 423 use_notebook = backend == "notebook" 424 425 if use_notebook: 426 # Notebook way 427 logger.info( 428 "\x1b[32mNotebook environment detected/selected. Using notebook annotation backend." 429 ) 430 session = TableTemplate.annotate_image_notebook( 431 os.fspath(image_path), crop=output_path.with_suffix(".png") 432 ) 433 session._save_path = output_path.with_suffix(".json") # ty: ignore[unresolved-attribute] 434 return session 435 436 else: 437 # GUI way 438 template = TableTemplate.annotate_image( 439 os.fspath(image_path), crop=output_path.with_suffix(".png") 440 ) 441 template.save(output_path.with_suffix(".json")) 442 443 def segment_table( 444 self, 445 image: MatLike | PathLike[str] | str, 446 filtered: MatLike | PathLike[str] | str | None = None, 447 debug_view: bool = False, 448 debug_view_notebook: bool = False, 449 ) -> SegmentedTable: 450 """ 451 Segment a table image into a grid of cells. 452 453 Orchestrates header alignment, grid detection, corner growing, and 454 extrapolation to produce a complete grid structure. 455 456 Args: 457 image: Table image to segment (file path or numpy array). 458 filtered: Optional pre-filtered binary image for corner detection. 459 If provided, binarization parameters are ignored. 460 debug_view: Show intermediate processing steps via OpenCV windows. Press 'n' to advance, 461 'q' to quit. Default: False 462 debug_view_notebook: Show intermediate processing steps inline in a Jupyter notebook 463 using matplotlib. Default: False 464 465 Returns: 466 SegmentedTable: Grid structure with methods for cell access (`crop_cell`, 467 `cell_polygon`), visualization (`show_cells`), and persistence 468 (`save`, `from_saved`). 469 470 Raises: 471 TauluException: If image cannot be loaded or grid detection fails. 472 """ 473 474 if not isinstance(image, MatLike): 475 image = cast(str | PathLike[str], image) 476 tmp_image = cv2.imread(os.fspath(image)) 477 assert tmp_image is not None 478 image = tmp_image 479 480 now = perf_counter() 481 h = self._aligner.align( 482 image, # ty: ignore[invalid-argument-type] 483 visual=debug_view, 484 visual_notebook=debug_view_notebook, 485 ) 486 align_time = perf_counter() - now 487 logger.info(f"Header alignment took {align_time:.2f} seconds") 488 489 # find the starting point for the table grid algorithm 490 491 def make_top_row(template: TableTemplate, aligner: TemplateMatcher, h: NDArray): 492 top_row = [] 493 for x in range(template.cols + 1): 494 on_template = template.intersection((1, x)) 495 on_template = (int(on_template[0]), int(on_template[1])) 496 497 on_img = aligner.template_to_img(h, on_template) 498 499 top_row.append(on_img) 500 501 return top_row 502 503 if isinstance(self._aligner, Split): 504 top_row = Split( 505 make_top_row(self._template.left, self._aligner.left, h.left), # ty:ignore 506 make_top_row(self._template.right, self._aligner.right, h.right), # ty:ignore 507 ) 508 else: 509 top_row = make_top_row(self._template, self._aligner, h) # ty:ignore 510 511 cell_heights = self._cell_heights 512 filtered_pre: MatLike | Split | None = None 513 514 if self._auto_row_heights: 515 now_ar = perf_counter() 516 if isinstance(self._grid_detector, Split): 517 filtered_pre = self._grid_detector.apply(image, visual=debug_view) # ty:ignore 518 assert isinstance(self._template, Split) 519 header_h = Split( 520 self._template.left.cell_height(1.0), # ty:ignore[unresolved-attribute] 521 self._template.right.cell_height(1.0), # ty:ignore[unresolved-attribute] 522 ) 523 min_h = Split( 524 int(header_h.left * get_param(self._min_row_height_factor, "left")), 525 int( 526 header_h.right * get_param(self._min_row_height_factor, "right") 527 ), 528 ) 529 max_h = Split( 530 int(header_h.left * get_param(self._max_row_height_factor, "left")), 531 int( 532 header_h.right * get_param(self._max_row_height_factor, "right") 533 ), 534 ) 535 detected = self._grid_detector.detect_row_heights( 536 image, 537 filtered_pre, 538 top_row, 539 min_h, 540 max_h, 541 path_scale=self._row_detection_path_scale, 542 ) 543 # detected is Split[list[int]]; fall back per side if empty. 544 cell_heights = Split( 545 detected.left or self._cell_heights.left, # ty:ignore[unresolved-attribute] 546 detected.right or self._cell_heights.right, # ty:ignore[unresolved-attribute] 547 ) 548 else: 549 filtered_pre = self._grid_detector.apply(image, visual=debug_view) # ty:ignore 550 header_h_one = self._template.cell_height(1.0) 551 min_h_one = int(header_h_one * cast(float, self._min_row_height_factor)) 552 max_h_one = int(header_h_one * cast(float, self._max_row_height_factor)) 553 detected_one = self._grid_detector.detect_row_heights( 554 image, # ty:ignore[invalid-argument-type] 555 filtered_pre, 556 top_row, # ty:ignore 557 min_h_one, 558 max_h_one, 559 path_scale=self._row_detection_path_scale, 560 ) 561 cell_heights = detected_one or self._cell_heights 562 ar_time = perf_counter() - now_ar 563 logger.info(f"Row-height detection took {ar_time:.2f} seconds") 564 565 now = perf_counter() 566 table = self._grid_detector.find_table_points( 567 image, # ty:ignore 568 top_row, # ty:ignore 569 self._template.cell_widths(0), 570 cell_heights, # ty:ignore 571 visual=debug_view, 572 visual_notebook=debug_view_notebook, 573 filtered=filtered if filtered is not None else filtered_pre, # ty:ignore 574 smooth=self._smooth, 575 smooth_strength=self._smooth_strength, 576 smooth_iterations=self._smooth_iterations, 577 smooth_degree=self._smooth_degree, 578 ) 579 grid_time = perf_counter() - now 580 logger.info(f"Grid detection took {grid_time:.2f} seconds") 581 582 if debug_view_notebook: 583 self._aligner.show_matches_notebook() 584 585 if isinstance(table, Split): 586 table = SegmentedTable.from_split(table, (0, 0)) # ty: ignore 587 588 return table
High-level API for table segmentation from images.
Taulu orchestrates header alignment, grid detection, and table segmentation into a single workflow.
Workflow:
- Create annotated header images via
Taulu.annotate()- Initialize Taulu with header(s) and parameters
- Call
segment_table()to get aSegmentedTablewith cell boundaries
For two-page tables, use Split[T] to provide different parameters for
left and right sides.
Example:
>>> import cv2 >>> from taulu import Taulu >>> Taulu.annotate("table_image.png", "header.png") >>> taulu = Taulu("header.png") >>> grid = taulu.segment_table("table_page_01.png") >>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0))
66 def __init__( 67 self, 68 template_path: Splittable[PathLike[str]] | Splittable[str], 69 row_height_factor: Splittable[float] | Splittable[list[float]] | None = None, 70 annotation_path: Splittable[PathLike[str]] | Splittable[str] | None = None, 71 binarization_sensitivity: Splittable[float] = 0.25, 72 search_radius: Splittable[int] = 60, 73 position_weight: Splittable[float] = 0.4, 74 line_thickness: Splittable[int] = 10, 75 line_gap_fill: Splittable[int] = 4, 76 intersection_kernel_size: Splittable[int] = 41, 77 detection_scale: Splittable[float] = 1.0, 78 pathfinding_threshold: Splittable[float] = 0.2, 79 min_rows: Splittable[int] = 5, 80 extrapolation_distance: Splittable[int] = 3, 81 detection_threshold: Splittable[float] = 0.3, 82 smooth: bool = False, 83 smooth_strength: float = 0.5, 84 smooth_iterations: int = 1, 85 smooth_degree: int = 1, 86 growing_resets: Splittable[int] = 0, 87 reset_fraction: Splittable[float] = 0.5, 88 feature_detector: Splittable[FeatureDetector] = "akaze", 89 matching_scale: float = 1.0, 90 auto_row_heights: bool = False, 91 min_row_height_factor: Splittable[float] = 0.5, 92 max_row_height_factor: Splittable[float] = 1.5, 93 row_detection_path_scale: float = 0.25, 94 ): 95 """ 96 Args: 97 template_path: Path to header template image(s). Use `Split` for two-page tables. 98 row_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0] 99 annotation_path: Explicit annotation JSON path. Default: inferred from image path. 100 binarization_sensitivity: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25 101 search_radius: Corner search area in pixels. Default: 60 102 position_weight: Position penalty weight [0, 1]. Default: 0.4 103 line_thickness: Cross-kernel width matching line thickness. Default: 10 104 line_gap_fill: Morphological dilation size. Default: 4 105 intersection_kernel_size: Cross-kernel size (odd). Default: 41 106 detection_scale: Image downscale factor (0, 1]. Default: 1.0 107 pathfinding_threshold: Confidence to skip A* pathfinding. Default: 0.2 108 min_rows: Minimum rows before completion. Default: 5 109 extrapolation_distance: Rows to examine for extrapolation. Default: 3 110 detection_threshold: Corner acceptance confidence [0, 1]. Default: 0.3 111 smooth: Apply grid smoothing after detection. Default: False 112 smooth_strength: Blend factor per smoothing iteration (0.0-1.0). Default: 0.5 113 smooth_iterations: Number of smoothing passes. Default: 1 114 smooth_degree: Polynomial degree for smoothing regression (1 or 2). Default: 1 115 growing_resets: Number of grid cuts during growing. Default: 0 116 reset_fraction: Fraction of points to delete per cut. Default: 0.5 117 feature_detector: Feature matching method for header alignment. One of "orb" 118 (fast, patent-free), "sift" (robust, uses FLANN), or "akaze" (robust, 119 patent-free). Default: "akaze" 120 matching_scale: Downscale factor (0, 1] for header alignment only. Lower 121 values speed up feature matching. Default: 1.0 122 auto_row_heights: If True, detect variable per-row heights from the 123 cross-correlation map at runtime (overriding `row_height_factor`). 124 Default: False 125 min_row_height_factor: Minimum row height as a fraction of the header 126 height when `auto_row_heights` is enabled. Default: 0.5 127 max_row_height_factor: Maximum row height as a fraction of the header 128 height when `auto_row_heights` is enabled. Default: 1.5 129 row_detection_path_scale: Downscale factor (0, 1] for the A* path 130 following used by `auto_row_heights`. Lower = faster, less precise. 131 Default: 0.25 132 """ 133 self._detection_scale = detection_scale 134 self._auto_row_heights = auto_row_heights 135 self._min_row_height_factor = min_row_height_factor 136 self._max_row_height_factor = max_row_height_factor 137 self._row_detection_path_scale = row_detection_path_scale 138 self._smooth = smooth 139 self._smooth_strength = smooth_strength 140 self._smooth_iterations = smooth_iterations 141 self._smooth_degree = smooth_degree 142 143 if row_height_factor is None: 144 row_height_factor = [1.0] 145 146 self._row_height_factor = row_height_factor 147 148 if isinstance(template_path, Split) or isinstance(annotation_path, Split): 149 header = Split(Path(template_path.left), Path(template_path.right)) # ty:ignore[unresolved-attribute] 150 151 if not exists(header.left.with_suffix(".png")) or not exists( 152 header.right.with_suffix(".png") 153 ): 154 raise TauluException( 155 "The header images you provided do not exist (or they aren't .png files)" 156 ) 157 158 if annotation_path is None: 159 if not exists(header.left.with_suffix(".json")) or not exists( 160 header.right.with_suffix(".json") 161 ): 162 raise TauluException( 163 "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method" 164 ) 165 166 template_left = TableTemplate.from_saved( 167 header.left.with_suffix(".json") 168 ) 169 template_right = TableTemplate.from_saved( 170 header.right.with_suffix(".json") 171 ) 172 173 else: 174 if not exists(annotation_path.left) or not exists( # ty: ignore[unresolved-attribute] 175 annotation_path.right # ty: ignore[unresolved-attribute] 176 ): 177 raise TauluException( 178 "The header annotation files you provided do not exist (or they aren't .json files)" 179 ) 180 181 template_left = TableTemplate.from_saved(annotation_path.left) # ty: ignore[unresolved-attribute] 182 template_right = TableTemplate.from_saved(annotation_path.right) # ty: ignore[unresolved-attribute] 183 184 self._header = Split( 185 cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right)) 186 ) 187 188 self._aligner = Split( 189 TemplateMatcher( 190 self._header.left, 191 method=get_param(feature_detector, "left"), 192 scale=matching_scale, 193 ), 194 TemplateMatcher( 195 self._header.right, 196 method=get_param(feature_detector, "right"), 197 scale=matching_scale, 198 ), 199 ) 200 201 self._template = Split(template_left, template_right) 202 203 self._cell_heights = Split( 204 self._template.left.cell_heights(get_param(row_height_factor, "left")), 205 self._template.right.cell_heights( 206 get_param(row_height_factor, "right") 207 ), 208 ) 209 210 # Create TableDetector for left and right with potentially different parameters 211 self._grid_detector = Split( 212 TableDetector( 213 intersection_kernel_size=get_param( 214 intersection_kernel_size, "left" 215 ), 216 line_thickness=get_param(line_thickness, "left"), 217 line_gap_fill=get_param(line_gap_fill, "left"), 218 search_radius=get_param(search_radius, "left"), 219 binarization_sensitivity=get_param( 220 binarization_sensitivity, "left" 221 ), 222 position_weight=get_param(position_weight, "left"), 223 detection_scale=get_param(self._detection_scale, "left"), 224 pathfinding_threshold=get_param(pathfinding_threshold, "left"), 225 min_rows=get_param(min_rows, "left"), 226 extrapolation_distance=get_param(extrapolation_distance, "left"), 227 detection_threshold=get_param(detection_threshold, "left"), 228 growing_resets=get_param(growing_resets, "left"), 229 reset_fraction=get_param(reset_fraction, "left"), 230 ), 231 TableDetector( 232 intersection_kernel_size=get_param( 233 intersection_kernel_size, "right" 234 ), 235 line_thickness=get_param(line_thickness, "right"), 236 line_gap_fill=get_param(line_gap_fill, "right"), 237 search_radius=get_param(search_radius, "right"), 238 binarization_sensitivity=get_param( 239 binarization_sensitivity, "right" 240 ), 241 position_weight=get_param(position_weight, "right"), 242 detection_scale=get_param(self._detection_scale, "right"), 243 pathfinding_threshold=get_param(pathfinding_threshold, "right"), 244 min_rows=get_param(min_rows, "right"), 245 extrapolation_distance=get_param(extrapolation_distance, "right"), 246 detection_threshold=get_param(detection_threshold, "right"), 247 growing_resets=get_param(growing_resets, "right"), 248 reset_fraction=get_param(reset_fraction, "right"), 249 ), 250 ) 251 252 else: 253 template_path = Path(template_path) 254 self._header = cv2.imread(os.fspath(template_path)) 255 self._aligner = TemplateMatcher( 256 self._header, 257 method=cast(FeatureDetector, feature_detector), 258 scale=matching_scale, 259 ) 260 self._template = TableTemplate.from_saved( 261 template_path.with_suffix(".json") 262 ) 263 264 # For single header, parameters should not be Split objects 265 if any( 266 isinstance(param, Split) 267 for param in [ 268 binarization_sensitivity, 269 search_radius, 270 position_weight, 271 line_thickness, 272 line_gap_fill, 273 intersection_kernel_size, 274 detection_scale, 275 min_rows, 276 extrapolation_distance, 277 detection_threshold, 278 row_height_factor, 279 growing_resets, 280 reset_fraction, 281 feature_detector, 282 ] 283 ): 284 raise TauluException( 285 "Split parameters can only be used with split headers (tuple header_path)" 286 ) 287 288 self._cell_heights = self._template.cell_heights( 289 cast(list[float] | float, self._row_height_factor) 290 ) 291 292 self._grid_detector = TableDetector( 293 intersection_kernel_size=intersection_kernel_size, # ty: ignore 294 line_thickness=line_thickness, # ty: ignore 295 line_gap_fill=line_gap_fill, # ty: ignore 296 search_radius=search_radius, # ty: ignore 297 binarization_sensitivity=binarization_sensitivity, # ty: ignore 298 position_weight=position_weight, # ty: ignore 299 detection_scale=self._detection_scale, # ty: ignore 300 pathfinding_threshold=pathfinding_threshold, # ty: ignore 301 min_rows=min_rows, # ty: ignore 302 extrapolation_distance=extrapolation_distance, # ty: ignore 303 detection_threshold=detection_threshold, # ty: ignore 304 growing_resets=growing_resets, # ty:ignore 305 reset_fraction=reset_fraction, # ty:ignore 306 )
Arguments:
- template_path: Path to header template image(s). Use
Splitfor two-page tables. - row_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
- annotation_path: Explicit annotation JSON path. Default: inferred from image path.
- binarization_sensitivity: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
- search_radius: Corner search area in pixels. Default: 60
- position_weight: Position penalty weight [0, 1]. Default: 0.4
- line_thickness: Cross-kernel width matching line thickness. Default: 10
- line_gap_fill: Morphological dilation size. Default: 4
- intersection_kernel_size: Cross-kernel size (odd). Default: 41
- detection_scale: Image downscale factor (0, 1]. Default: 1.0
- pathfinding_threshold: Confidence to skip A* pathfinding. Default: 0.2
- min_rows: Minimum rows before completion. Default: 5
- extrapolation_distance: Rows to examine for extrapolation. Default: 3
- detection_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
- smooth: Apply grid smoothing after detection. Default: False
- smooth_strength: Blend factor per smoothing iteration (0.0-1.0). Default: 0.5
- smooth_iterations: Number of smoothing passes. Default: 1
- smooth_degree: Polynomial degree for smoothing regression (1 or 2). Default: 1
- growing_resets: Number of grid cuts during growing. Default: 0
- reset_fraction: Fraction of points to delete per cut. Default: 0.5
- feature_detector: Feature matching method for header alignment. One of "orb" (fast, patent-free), "sift" (robust, uses FLANN), or "akaze" (robust, patent-free). Default: "akaze"
- matching_scale: Downscale factor (0, 1] for header alignment only. Lower values speed up feature matching. Default: 1.0
- auto_row_heights: If True, detect variable per-row heights from the
cross-correlation map at runtime (overriding
row_height_factor). Default: False - min_row_height_factor: Minimum row height as a fraction of the header
height when
auto_row_heightsis enabled. Default: 0.5 - max_row_height_factor: Maximum row height as a fraction of the header
height when
auto_row_heightsis enabled. Default: 1.5 - row_detection_path_scale: Downscale factor (0, 1] for the A* path
following used by
auto_row_heights. Lower = faster, less precise. Default: 0.25
308 @classmethod 309 def from_config(cls, config: TauluConfig) -> "Taulu": 310 """ 311 Create a :class:`Taulu` instance from a :class:`~taulu.config.TauluConfig`. 312 313 Args: 314 config: A populated :class:`~taulu.config.TauluConfig` instance. 315 316 Returns: 317 A :class:`Taulu` instance configured according to ``config``. 318 319 Example: 320 >>> from taulu import Taulu 321 >>> from taulu.config import TauluConfig 322 >>> config = TauluConfig.from_toml("my_table.toml") 323 >>> taulu = Taulu.from_config(config) 324 """ 325 return cls(**{name: getattr(config, name) for name in config.model_fields})
Create a Taulu instance from a ~taulu.config.TauluConfig.
Arguments:
- config: A populated
~taulu.config.TauluConfiginstance.
Returns:
A
Tauluinstance configured according toconfig.
Example:
>>> from taulu import Taulu >>> from taulu.config import TauluConfig >>> config = TauluConfig.from_toml("my_table.toml") >>> taulu = Taulu.from_config(config)
327 @staticmethod 328 def annotate( 329 image_path: PathLike[str] | str, 330 output_path: PathLike[str] | str, 331 *, 332 backend: Literal["auto", "gui", "notebook"] = "auto", 333 ): 334 """ 335 Interactive tool to create header annotations for table segmentation. 336 337 This method guides you through a two-step annotation process: 338 339 1. **Crop the header**: Click four corners to define the header region 340 2. **Annotate lines**: Click pairs of points to define each vertical and 341 horizontal line in the header 342 343 The annotations are saved as: 344 - A cropped header image (.png) at `output_path` 345 - A JSON file (.json) containing line coordinates 346 347 ## Annotation Guidelines 348 349 **Which lines to annotate:** 350 - All vertical lines that extend into the table body (column separators) 351 - The top horizontal line of the header 352 - The bottom horizontal line of the header (top of data rows) 353 354 **Order doesn't matter** - annotate lines in any order that's convenient. 355 356 **To annotate a line:** 357 1. Click once at one endpoint 358 2. Click again at the other endpoint 359 3. A green line appears showing your annotation 360 361 **To undo:** 362 - Right-click anywhere to remove the last line you drew 363 364 **When finished:** 365 - Press 'n' to save and exit 366 - Press 'q' to quit without saving 367 368 Args: 369 image_path (PathLike[str] | str): Path to a table image containing 370 a clear view of the header. This can be a full table image. 371 output_path (PathLike[str] | str): Where to save the cropped header 372 image. The annotation JSON will be saved with the same name but 373 .json extension. 374 375 Raises: 376 TauluException: If image_path doesn't exist or output_path is a directory 377 378 Examples: 379 Annotate a single header: 380 381 >>> from taulu import Taulu 382 >>> Taulu.annotate("scan_page_01.png", "header.png") 383 # Interactive window opens 384 # After annotation: creates header.png and header.json 385 386 Annotate left and right headers for a split table: 387 388 >>> Taulu.annotate("scan_page_01.png", "header_left.png") 389 >>> Taulu.annotate("scan_page_01.png", "header_right.png") 390 # Creates header_left.{png,json} and header_right.{png,json} 391 392 Notes: 393 - The header image doesn't need to be perfectly cropped initially - 394 the tool will help you crop it precisely 395 - Annotation accuracy is important: misaligned lines will cause 396 segmentation errors 397 - You can re-run this method to update annotations if needed 398 """ 399 400 if not exists(image_path): 401 raise TauluException(f"Image path {image_path} does not exist") 402 403 if os.path.isdir(output_path): 404 raise TauluException("Output path should be a file") 405 406 output_path = Path(output_path) 407 408 def running_in_notebook() -> bool: 409 try: 410 from IPython import get_ipython 411 412 ip = get_ipython() 413 return ip is not None and "IPKernelApp" in ip.config 414 except Exception: 415 return False 416 417 # Decide backend 418 if backend not in ("auto", "gui", "notebook"): 419 raise TauluException("backend must be one of: 'auto', 'gui', 'notebook'") 420 if backend == "auto": 421 use_notebook = running_in_notebook() 422 else: 423 use_notebook = backend == "notebook" 424 425 if use_notebook: 426 # Notebook way 427 logger.info( 428 "\x1b[32mNotebook environment detected/selected. Using notebook annotation backend." 429 ) 430 session = TableTemplate.annotate_image_notebook( 431 os.fspath(image_path), crop=output_path.with_suffix(".png") 432 ) 433 session._save_path = output_path.with_suffix(".json") # ty: ignore[unresolved-attribute] 434 return session 435 436 else: 437 # GUI way 438 template = TableTemplate.annotate_image( 439 os.fspath(image_path), crop=output_path.with_suffix(".png") 440 ) 441 template.save(output_path.with_suffix(".json"))
Interactive tool to create header annotations for table segmentation.
This method guides you through a two-step annotation process:
- Crop the header: Click four corners to define the header region
- Annotate lines: Click pairs of points to define each vertical and horizontal line in the header
The annotations are saved as:
- A cropped header image (.png) at
output_path - A JSON file (.json) containing line coordinates
Annotation Guidelines
Which lines to annotate:
- All vertical lines that extend into the table body (column separators)
- The top horizontal line of the header
- The bottom horizontal line of the header (top of data rows)
Order doesn't matter - annotate lines in any order that's convenient.
To annotate a line:
- Click once at one endpoint
- Click again at the other endpoint
- A green line appears showing your annotation
To undo:
- Right-click anywhere to remove the last line you drew
When finished:
- Press 'n' to save and exit
- Press 'q' to quit without saving
Arguments:
- image_path (PathLike[str] | str): Path to a table image containing a clear view of the header. This can be a full table image.
- output_path (PathLike[str] | str): Where to save the cropped header image. The annotation JSON will be saved with the same name but .json extension.
Raises:
- TauluException: If image_path doesn't exist or output_path is a directory
Examples:
Annotate a single header:
>>> from taulu import Taulu >>> Taulu.annotate("scan_page_01.png", "header.png") <h1 id="interactive-window-opens">Interactive window opens</h1>After annotation: creates header.png and header.json
Annotate left and right headers for a split table:
>>> Taulu.annotate("scan_page_01.png", "header_left.png") >>> Taulu.annotate("scan_page_01.png", "header_right.png") <h1 id="creates-header_leftpngjson-and-header_rightpngjson">Creates header_left.{png,json} and header_right.{png,json}</h1>
Notes:
- The header image doesn't need to be perfectly cropped initially - the tool will help you crop it precisely
- Annotation accuracy is important: misaligned lines will cause segmentation errors
- You can re-run this method to update annotations if needed
443 def segment_table( 444 self, 445 image: MatLike | PathLike[str] | str, 446 filtered: MatLike | PathLike[str] | str | None = None, 447 debug_view: bool = False, 448 debug_view_notebook: bool = False, 449 ) -> SegmentedTable: 450 """ 451 Segment a table image into a grid of cells. 452 453 Orchestrates header alignment, grid detection, corner growing, and 454 extrapolation to produce a complete grid structure. 455 456 Args: 457 image: Table image to segment (file path or numpy array). 458 filtered: Optional pre-filtered binary image for corner detection. 459 If provided, binarization parameters are ignored. 460 debug_view: Show intermediate processing steps via OpenCV windows. Press 'n' to advance, 461 'q' to quit. Default: False 462 debug_view_notebook: Show intermediate processing steps inline in a Jupyter notebook 463 using matplotlib. Default: False 464 465 Returns: 466 SegmentedTable: Grid structure with methods for cell access (`crop_cell`, 467 `cell_polygon`), visualization (`show_cells`), and persistence 468 (`save`, `from_saved`). 469 470 Raises: 471 TauluException: If image cannot be loaded or grid detection fails. 472 """ 473 474 if not isinstance(image, MatLike): 475 image = cast(str | PathLike[str], image) 476 tmp_image = cv2.imread(os.fspath(image)) 477 assert tmp_image is not None 478 image = tmp_image 479 480 now = perf_counter() 481 h = self._aligner.align( 482 image, # ty: ignore[invalid-argument-type] 483 visual=debug_view, 484 visual_notebook=debug_view_notebook, 485 ) 486 align_time = perf_counter() - now 487 logger.info(f"Header alignment took {align_time:.2f} seconds") 488 489 # find the starting point for the table grid algorithm 490 491 def make_top_row(template: TableTemplate, aligner: TemplateMatcher, h: NDArray): 492 top_row = [] 493 for x in range(template.cols + 1): 494 on_template = template.intersection((1, x)) 495 on_template = (int(on_template[0]), int(on_template[1])) 496 497 on_img = aligner.template_to_img(h, on_template) 498 499 top_row.append(on_img) 500 501 return top_row 502 503 if isinstance(self._aligner, Split): 504 top_row = Split( 505 make_top_row(self._template.left, self._aligner.left, h.left), # ty:ignore 506 make_top_row(self._template.right, self._aligner.right, h.right), # ty:ignore 507 ) 508 else: 509 top_row = make_top_row(self._template, self._aligner, h) # ty:ignore 510 511 cell_heights = self._cell_heights 512 filtered_pre: MatLike | Split | None = None 513 514 if self._auto_row_heights: 515 now_ar = perf_counter() 516 if isinstance(self._grid_detector, Split): 517 filtered_pre = self._grid_detector.apply(image, visual=debug_view) # ty:ignore 518 assert isinstance(self._template, Split) 519 header_h = Split( 520 self._template.left.cell_height(1.0), # ty:ignore[unresolved-attribute] 521 self._template.right.cell_height(1.0), # ty:ignore[unresolved-attribute] 522 ) 523 min_h = Split( 524 int(header_h.left * get_param(self._min_row_height_factor, "left")), 525 int( 526 header_h.right * get_param(self._min_row_height_factor, "right") 527 ), 528 ) 529 max_h = Split( 530 int(header_h.left * get_param(self._max_row_height_factor, "left")), 531 int( 532 header_h.right * get_param(self._max_row_height_factor, "right") 533 ), 534 ) 535 detected = self._grid_detector.detect_row_heights( 536 image, 537 filtered_pre, 538 top_row, 539 min_h, 540 max_h, 541 path_scale=self._row_detection_path_scale, 542 ) 543 # detected is Split[list[int]]; fall back per side if empty. 544 cell_heights = Split( 545 detected.left or self._cell_heights.left, # ty:ignore[unresolved-attribute] 546 detected.right or self._cell_heights.right, # ty:ignore[unresolved-attribute] 547 ) 548 else: 549 filtered_pre = self._grid_detector.apply(image, visual=debug_view) # ty:ignore 550 header_h_one = self._template.cell_height(1.0) 551 min_h_one = int(header_h_one * cast(float, self._min_row_height_factor)) 552 max_h_one = int(header_h_one * cast(float, self._max_row_height_factor)) 553 detected_one = self._grid_detector.detect_row_heights( 554 image, # ty:ignore[invalid-argument-type] 555 filtered_pre, 556 top_row, # ty:ignore 557 min_h_one, 558 max_h_one, 559 path_scale=self._row_detection_path_scale, 560 ) 561 cell_heights = detected_one or self._cell_heights 562 ar_time = perf_counter() - now_ar 563 logger.info(f"Row-height detection took {ar_time:.2f} seconds") 564 565 now = perf_counter() 566 table = self._grid_detector.find_table_points( 567 image, # ty:ignore 568 top_row, # ty:ignore 569 self._template.cell_widths(0), 570 cell_heights, # ty:ignore 571 visual=debug_view, 572 visual_notebook=debug_view_notebook, 573 filtered=filtered if filtered is not None else filtered_pre, # ty:ignore 574 smooth=self._smooth, 575 smooth_strength=self._smooth_strength, 576 smooth_iterations=self._smooth_iterations, 577 smooth_degree=self._smooth_degree, 578 ) 579 grid_time = perf_counter() - now 580 logger.info(f"Grid detection took {grid_time:.2f} seconds") 581 582 if debug_view_notebook: 583 self._aligner.show_matches_notebook() 584 585 if isinstance(table, Split): 586 table = SegmentedTable.from_split(table, (0, 0)) # ty: ignore 587 588 return table
Segment a table image into a grid of cells.
Orchestrates header alignment, grid detection, corner growing, and extrapolation to produce a complete grid structure.
Arguments:
- image: Table image to segment (file path or numpy array).
- filtered: Optional pre-filtered binary image for corner detection. If provided, binarization parameters are ignored.
- debug_view: Show intermediate processing steps via OpenCV windows. Press 'n' to advance, 'q' to quit. Default: False
- debug_view_notebook: Show intermediate processing steps inline in a Jupyter notebook using matplotlib. Default: False
Returns:
SegmentedTable: Grid structure with methods for cell access (
crop_cell,cell_polygon), visualization (show_cells), and persistence (save,from_saved).
Raises:
- TauluException: If image cannot be loaded or grid detection fails.
47class TauluConfig(BaseModel): 48 """ 49 Configuration for :class:`~taulu.Taulu`. 50 51 All parameters mirror the ``Taulu.__init__`` signature. Any parameter that 52 accepts a ``Split[T]`` can be given as a ``Split`` instance or as a plain 53 scalar (applied to both sides). 54 55 Use :meth:`from_toml` to load from a ``.toml`` file, then pass to 56 :meth:`Taulu.from_config <taulu.Taulu.from_config>`. 57 """ 58 59 model_config = ConfigDict(arbitrary_types_allowed=True) 60 61 template_path: Splittable[str] = Field( 62 description="Path to header template image(s). Use left/right split for two-page tables.", 63 ) 64 row_height_factor: Splittable[float] | Splittable[list[float]] | None = Field( 65 default=None, 66 description="Row height relative to header (e.g. 0.8 for 80%). Default: [1.0]", 67 ) 68 annotation_path: Splittable[str] | None = Field( 69 default=None, 70 description="Explicit annotation JSON path. Default: inferred from template_path.", 71 ) 72 binarization_sensitivity: Splittable[float] = Field( 73 default=0.25, 74 description="Binarization threshold (0.0-1.0). Higher = less noise.", 75 ) 76 search_radius: Splittable[int] = Field( 77 default=60, 78 description="Corner search area in pixels.", 79 ) 80 position_weight: Splittable[float] = Field( 81 default=0.4, 82 description="Position penalty weight [0, 1].", 83 ) 84 line_thickness: Splittable[int] = Field( 85 default=10, 86 description="Cross-kernel width matching line thickness.", 87 ) 88 line_gap_fill: Splittable[int] = Field( 89 default=4, 90 description="Morphological dilation size for gap filling.", 91 ) 92 intersection_kernel_size: Splittable[int] = Field( 93 default=41, 94 description="Cross-kernel size (must be odd).", 95 ) 96 detection_scale: Splittable[float] = Field( 97 default=1.0, 98 description="Image downscale factor (0, 1].", 99 ) 100 pathfinding_threshold: Splittable[float] = Field( 101 default=0.2, 102 description="Confidence threshold to skip A* pathfinding.", 103 ) 104 min_rows: Splittable[int] = Field( 105 default=5, 106 description="Minimum rows before completion.", 107 ) 108 extrapolation_distance: Splittable[int] = Field( 109 default=3, 110 description="Rows to examine for extrapolation.", 111 ) 112 detection_threshold: Splittable[float] = Field( 113 default=0.3, 114 description="Corner acceptance confidence [0, 1].", 115 ) 116 smooth: bool = Field( 117 default=False, 118 description="Apply grid smoothing after detection.", 119 ) 120 smooth_strength: float = Field( 121 default=0.5, 122 description="Blend factor per smoothing iteration (0.0-1.0).", 123 ) 124 smooth_iterations: int = Field( 125 default=1, 126 description="Number of smoothing passes.", 127 ) 128 smooth_degree: int = Field( 129 default=1, 130 description="Polynomial degree for smoothing regression (1 or 2).", 131 ) 132 growing_resets: Splittable[int] = Field( 133 default=0, 134 description="Number of grid resets during growing.", 135 ) 136 reset_fraction: Splittable[float] = Field( 137 default=0.5, 138 description="Fraction of points to delete per reset.", 139 ) 140 feature_detector: Splittable[Literal["orb", "sift", "akaze"]] = Field( 141 default="akaze", 142 description="Feature matching method: 'orb' (fast), 'sift' (robust), 'akaze'.", 143 ) 144 matching_scale: float = Field( 145 default=1.0, 146 description="Downscale factor (0, 1] for header alignment only.", 147 ) 148 auto_row_heights: bool = Field( 149 default=False, 150 description="If True, detect variable per-row heights from the cross-correlation map (overrides row_height_factor).", 151 ) 152 min_row_height_factor: Splittable[float] = Field( 153 default=0.5, 154 description="Minimum row height as a fraction of header height when auto_row_heights is enabled.", 155 ) 156 max_row_height_factor: Splittable[float] = Field( 157 default=1.5, 158 description="Maximum row height as a fraction of header height when auto_row_heights is enabled.", 159 ) 160 row_detection_path_scale: float = Field( 161 default=0.25, 162 description="Downscale factor (0, 1] for the A* path following used by auto row height detection.", 163 ) 164 165 @classmethod 166 def from_toml(cls, *paths: PathLike[str] | str) -> "TauluConfig": 167 """ 168 Load a :class:`TauluConfig` from one or more TOML files. 169 170 When multiple paths are given, files are merged in order: later files 171 override keys from earlier ones. Use this to share a common base config 172 and override only the fields that differ:: 173 174 config = TauluConfig.from_toml("common.toml", "left.toml") 175 176 Args: 177 *paths: One or more paths to ``.toml`` configuration files. 178 179 Returns: 180 A fully populated :class:`TauluConfig` instance. 181 182 Raises: 183 KeyError: If a required field (``template_path``) is missing. 184 TypeError: If a field value has an unexpected type. 185 """ 186 merged: dict = {} 187 for path in paths: 188 with open(path, "rb") as f: 189 data = tomllib.load(f) 190 merged.update(data) 191 192 parsed = { 193 key: _parse_value(value) 194 for key, value in merged.items() 195 if not key.startswith("$") 196 } 197 return cls(**parsed)
Configuration for ~taulu.Taulu.
All parameters mirror the Taulu.__init__ signature. Any parameter that
accepts a Split[T] can be given as a Split instance or as a plain
scalar (applied to both sides).
Use from_toml() to load from a .toml file, then pass to
Taulu.from_config <taulu.Taulu.from_config>().
Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
165 @classmethod 166 def from_toml(cls, *paths: PathLike[str] | str) -> "TauluConfig": 167 """ 168 Load a :class:`TauluConfig` from one or more TOML files. 169 170 When multiple paths are given, files are merged in order: later files 171 override keys from earlier ones. Use this to share a common base config 172 and override only the fields that differ:: 173 174 config = TauluConfig.from_toml("common.toml", "left.toml") 175 176 Args: 177 *paths: One or more paths to ``.toml`` configuration files. 178 179 Returns: 180 A fully populated :class:`TauluConfig` instance. 181 182 Raises: 183 KeyError: If a required field (``template_path``) is missing. 184 TypeError: If a field value has an unexpected type. 185 """ 186 merged: dict = {} 187 for path in paths: 188 with open(path, "rb") as f: 189 data = tomllib.load(f) 190 merged.update(data) 191 192 parsed = { 193 key: _parse_value(value) 194 for key, value in merged.items() 195 if not key.startswith("$") 196 } 197 return cls(**parsed)
Load a TauluConfig from one or more TOML files.
When multiple paths are given, files are merged in order: later files override keys from earlier ones. Use this to share a common base config and override only the fields that differ::
config = TauluConfig.from_toml("common.toml", "left.toml")
Arguments:
- *paths: One or more paths to
.tomlconfiguration files.
Returns:
A fully populated
TauluConfiginstance.
Raises:
- KeyError: If a required field (
template_path) is missing. - TypeError: If a field value has an unexpected type.
27class TemplateMatcher: 28 """ 29 Aligns table header templates to subject images using feature-based registration. 30 31 This class supports multiple feature detection and matching methods to compute 32 a homography transformation that maps points from a header template image to 33 their corresponding locations in full table images. 34 35 ## How it Works 36 37 1. **Feature Detection**: Extracts keypoints from both template and subject 38 2. **Feature Matching**: Finds correspondences using the selected matcher 39 3. **Filtering**: Keeps top matches and prunes based on spatial consistency 40 4. **Homography Estimation**: Computes perspective transform using RANSAC 41 42 The computed homography can then transform any point from template space to 43 image space, allowing you to locate table structures based on your annotation. 44 45 ## Available Methods 46 47 - **orb** (default): ORB features with BFMatcher (Hamming distance). Fast and 48 patent-free. Good for most use cases. 49 - **sift**: SIFT features with FLANN-based matcher. More robust to scale and 50 rotation changes. Slower but often more accurate. 51 - **surf**: SURF features with BFMatcher (L2 norm). Requires opencv-contrib-python 52 with non-free modules enabled. Fast and robust. 53 - **akaze**: AKAZE features with BFMatcher (Hamming distance). Patent-free, 54 handles scale/rotation well, and often more robust than ORB on documents. 55 56 ## Preprocessing Options 57 58 - Set `k` parameter to apply Sauvola thresholding before feature detection. 59 This can improve matching on documents with variable lighting. 60 - Set `k=None` to use raw images (just extract blue channel for BGR images) 61 62 ## Tuning Guidelines 63 64 - **max_features**: Increase if matching fails on complex templates 65 - **match_fraction**: Decrease if you get many incorrect matches 66 - **max_dist**: Increase for documents with more warping/distortion 67 - **scale**: Decrease (<1.0) to speed up on high-resolution images 68 69 Args: 70 template (MatLike | PathLike[str] | str | None): Header template image or path. 71 This should contain a clear, representative view of the table header. 72 method (FeatureDetector): Feature detection/matching method. One of "orb", "sift", 73 or "surf". Default is "orb". 74 max_features (int): Maximum features to detect. More features = slower 75 but potentially more robust matching. 76 patch_size (int): ORB patch size for feature extraction (only used with "orb"). 77 match_fraction (float): Fraction [0, 1] of matches to keep after sorting by 78 quality. Higher = more matches but potentially more outliers. 79 scale (float): Image downscaling factor (0, 1] for processing speed. 80 max_dist (float): Maximum allowed distance (relative to image size) between 81 matched keypoints. Filters out spatially inconsistent matches. 82 k (float | None): Sauvola threshold parameter for preprocessing. If None, 83 no thresholding is applied. Typical range: 0.03-0.15. 84 """ 85 86 def __init__( 87 self, 88 template: None | MatLike | PathLike[str] | str = None, 89 method: FeatureDetector = "orb", 90 max_features: int = 100_000, 91 patch_size: int = 31, 92 match_fraction: float = 0.3, 93 scale: float = 1.0, 94 max_dist: float = 1.00, 95 k: float | None = None, 96 ): 97 """ 98 Args: 99 template (MatLike | str): (path of) template image, with the table template clearly visible 100 method (FeatureDetector): feature detection/matching method ("orb", "sift", or "surf") 101 max_features (int): maximal number of features that will be extracted 102 patch_size (int): for ORB feature extractor (only used with method="orb") 103 match_fraction (float): best fraction of matches that are kept 104 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 105 max_dist (float): maximum distance (relative to image size) of matched features. 106 Increase this value if the warping between image and template needs to be more agressive 107 k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done 108 """ 109 110 if type(template) is str or type(template) is PathLike: 111 value = cv.imread(fspath(template)) 112 template = value 113 114 self._method = method 115 self._k = k 116 if scale > 1.0: 117 raise TauluException( 118 "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0" 119 ) 120 if scale == 0: 121 raise TauluException("Use 0 < scale <= 1.0") 122 123 self._scale = scale 124 self._template = self._scale_img(cast(MatLike, template)) 125 self._template_orig: None | MatLike = None 126 self._preprocess_template() 127 self._max_features = max_features 128 self._patch_size = patch_size 129 self._match_fraction = match_fraction 130 self._max_dist = max_dist 131 self._validate_method() 132 self._matches_notebook_img = None 133 134 def _scale_img(self, img: MatLike) -> MatLike: 135 if self._scale == 1.0: 136 return img 137 138 return cv.resize(img, None, fx=self._scale, fy=self._scale) 139 140 def _unscale_img(self, img: MatLike) -> MatLike: 141 if self._scale == 1.0: 142 return img 143 144 return cv.resize(img, None, fx=1 / self._scale, fy=1 / self._scale) 145 146 def _unscale_homography(self, h: np.ndarray) -> np.ndarray: 147 if self._scale == 1.0: 148 return h 149 150 scale_matrix = np.diag([self._scale, self._scale, 1.0]) 151 # inv_scale_matrix = np.linalg.inv(scale_matrix) 152 inv_scale_matrix = np.diag([1.0 / self._scale, 1.0 / self._scale, 1.0]) 153 # return inv_scale_matrix @ h @ scale_matrix 154 return inv_scale_matrix @ h @ scale_matrix 155 156 @property 157 def method(self) -> FeatureDetector: 158 """The feature detection/matching method being used.""" 159 return self._method 160 161 @property 162 def template(self): 163 """The template image that subject images are aligned to""" 164 return self._template 165 166 @template.setter 167 def template(self, value: MatLike | str): 168 """Set the template image as a path or an image""" 169 170 if type(value) is str: 171 tmp_value = cv.imread(value) 172 assert tmp_value is not None 173 value = tmp_value 174 self._template = value 175 176 # TODO: check if the image has the right properties (dimensions etc.) 177 self._template = cast(MatLike, value) 178 179 self._preprocess_template() 180 181 def _preprocess_template(self): 182 self._template_orig = cv.cvtColor(self._template, cv.COLOR_BGR2GRAY) 183 if self._k is not None: 184 self._template = imu.sauvola(self._template, self._k) 185 self._template = cv.bitwise_not(self._template) 186 else: 187 _, _, self._template = cv.split(self._template) 188 189 def _preprocess_image(self, img: MatLike): 190 if self._template_orig is None: 191 raise TauluException("process the template first") 192 193 if self._k is not None: 194 img = imu.sauvola(img, self._k) 195 img = cv.bitwise_not(img) 196 else: 197 _, _, img = cv.split(img) 198 199 return img 200 201 def _validate_method(self): 202 """Validate that the selected method is available.""" 203 if self._method == "surf": 204 if not hasattr(cv, "xfeatures2d"): 205 raise TauluException( 206 "SURF requires opencv-contrib-python with non-free modules. " 207 "Install with: pip install opencv-contrib-python" 208 ) 209 210 def _create_detector(self): 211 """Create the feature detector based on the selected method.""" 212 if self._method == "orb": 213 return cv.ORB_create( # type:ignore 214 self._max_features, 215 patchSize=self._patch_size, 216 ) 217 elif self._method == "sift": 218 return cv.SIFT_create( # type:ignore 219 nfeatures=self._max_features, sigma=2.5, edgeThreshold=10 220 ) 221 elif self._method == "akaze": 222 return cv.AKAZE_create() # type:ignore 223 elif self._method == "surf": 224 # SURF is in xfeatures2d (requires opencv-contrib-python) 225 return cv.xfeatures2d.SURF_create(hessianThreshold=400) # ty:ignore[unresolved-attribute] 226 else: 227 raise TauluException(f"Unknown method: {self._method}") 228 229 def _create_matcher(self): 230 """Create the feature matcher based on the selected method.""" 231 if self._method == "orb": 232 # ORB uses binary descriptors -> Hamming distance 233 return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True) 234 elif self._method == "sift": 235 # SIFT uses float descriptors -> L2 norm with crossCheck 236 return cv.BFMatcher(cv.NORM_L2, crossCheck=True) 237 elif self._method == "akaze": 238 # AKAZE uses binary descriptors -> Hamming distance 239 return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True) 240 elif self._method == "surf": 241 # SURF uses float descriptors -> L2 norm 242 return cv.BFMatcher(cv.NORM_L2, crossCheck=True) 243 else: 244 raise TauluException(f"Unknown method: {self._method}") 245 246 def _match_features(self, matcher, descriptors_im, descriptors_tg): 247 """Match features using BFMatcher with crossCheck for all methods.""" 248 return list(matcher.match(descriptors_im, descriptors_tg)) 249 250 @log_calls(level=logging.DEBUG, include_return=True) 251 def _find_transform_of_template_on( 252 self, 253 im: MatLike, 254 visual: bool = False, 255 visual_notebook: bool = False, 256 window: str = WINDOW, 257 ): 258 im = self._scale_img(im) 259 260 # Create detector and matcher based on selected method 261 detector = self._create_detector() 262 matcher = self._create_matcher() 263 264 # Detect features and compute descriptors 265 keypoints_im, descriptors_im = detector.detectAndCompute(im, None) 266 keypoints_tg, descriptors_tg = detector.detectAndCompute(self._template, None) 267 268 if descriptors_im is None or descriptors_tg is None: 269 raise TauluException("No features detected in one or both images") 270 271 # Match features 272 matches = self._match_features(matcher, descriptors_im, descriptors_tg) 273 274 # Sort matches by score 275 matches = sorted(matches, key=lambda x: x.distance) 276 277 # Remove not so good matches 278 num_good_matches = int(len(matches) * self._match_fraction) 279 matches = matches[:num_good_matches] 280 281 if visual or visual_notebook: 282 final_img_filtered = cv.drawMatches( 283 im, 284 keypoints_im, 285 self._template, 286 keypoints_tg, 287 matches[:10], 288 None, 289 cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, 290 ) 291 if visual: 292 imu.show(final_img_filtered, title="matches", window=window) 293 if visual_notebook: 294 self._matches_notebook_img = final_img_filtered 295 296 # Extract location of good matches 297 points1 = np.zeros((len(matches), 2), dtype=np.float32) 298 points2 = np.zeros((len(matches), 2), dtype=np.float32) 299 300 for i, match in enumerate(matches): 301 points1[i, :] = keypoints_tg[match.trainIdx].pt 302 points2[i, :] = keypoints_im[match.queryIdx].pt 303 304 # Prune reference points based upon distance between 305 # key points. This assumes a fairly good alignment to start with 306 # due to the protocol used (location of the sheets) 307 p1 = pd.DataFrame(data=points1) 308 p2 = pd.DataFrame(data=points2) 309 refdist = abs(p1 - p2) 310 311 mask_x = refdist.loc[:, 0] < (im.shape[0] * self._max_dist) 312 mask_y = refdist.loc[:, 1] < (im.shape[1] * self._max_dist) 313 mask = mask_x & mask_y 314 mask_array = mask.to_numpy() 315 points1 = points1[mask_array] 316 points2 = points2[mask_array] 317 318 # Filter matches for visualization 319 filtered_matches = [ 320 m for m, keep in zip(matches, mask_array, strict=False) if keep 321 ] 322 323 if visual: 324 final_img_filtered = cv.drawMatches( 325 im, 326 keypoints_im, 327 self._template, 328 keypoints_tg, 329 filtered_matches[:100], 330 None, 331 cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, 332 ) 333 imu.show(final_img_filtered, title="matches", window=window) 334 335 # Find homography 336 h, _ = cv.findHomography(points1, points2, cv.RANSAC) 337 338 return self._unscale_homography(h) 339 340 def show_matches_notebook(self): 341 """Display the stored feature matches image in the notebook (call after grid detection).""" 342 if self._matches_notebook_img is not None: 343 imu.show_notebook(self._matches_notebook_img, title="matches") 344 self._matches_notebook_img = None 345 346 def view_alignment(self, img: MatLike, h: NDArray): 347 """ 348 Show the alignment of the template on the given image by transforming 349 it with ``h`` and overlaying both on separate color channels. 350 351 Args: 352 img (MatLike): the image on which the template is overlaid 353 h (NDArray): the homography matrix from `align` 354 355 Returns: 356 int | None: the key code returned by the OpenCV window, if any. 357 """ 358 359 im = imu.ensure_gray(img) 360 header = imu.ensure_gray(self._unscale_img(self._template)) 361 height, width = im.shape 362 363 header_warped = cv.warpPerspective(header, h, (width, height)) 364 365 merged = np.full((height, width, 3), 255, dtype=np.uint8) 366 367 merged[..., 1] = im 368 merged[..., 2] = header_warped 369 370 return imu.show(merged) 371 372 @log_calls(level=logging.DEBUG, include_return=True) 373 def align( 374 self, 375 img: MatLike | str, 376 visual: bool = False, 377 visual_notebook: bool = False, 378 window: str = WINDOW, 379 ) -> NDArray: 380 """ 381 Compute a homography that maps template pixels onto ``img``. 382 383 Args: 384 img: Subject image (path or array). 385 visual: Show match visualization in an OpenCV window. 386 visual_notebook: Store the match visualization for later display 387 via `show_matches_notebook`. 388 window: OpenCV window name when ``visual=True``. 389 390 Returns: 391 NDArray: the ``(3, 3)`` homography from template to image space. 392 """ 393 394 logger.info("Aligning header with supplied table image") 395 396 if type(img) is str: 397 tmp_img = cv.imread(img) 398 assert tmp_img is not None 399 img = tmp_img 400 img = cast(MatLike, img) 401 402 img = self._preprocess_image(img) 403 404 h = self._find_transform_of_template_on(img, visual, visual_notebook, window) 405 406 if visual: 407 self.view_alignment(img, h) 408 409 return h 410 411 def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]: 412 """ 413 Transform a template-space point through the homography ``h`` 414 (obtained from `align`). 415 416 Args: 417 h (NDArray): transformation matrix of shape ``(3, 3)`` 418 point (Iterable[int]): the to-be-transformed point as ``(x, y)`` 419 420 Returns: 421 tuple[int, int]: the transformed point in image space. 422 """ 423 424 point = np.array([[point[0], point[1], 1]]) # type:ignore 425 transformed = np.dot(h, point.T) 426 427 transformed /= transformed[2] 428 429 return int(transformed[0][0]), int(transformed[1][0])
Aligns table header templates to subject images using feature-based registration.
This class supports multiple feature detection and matching methods to compute a homography transformation that maps points from a header template image to their corresponding locations in full table images.
How it Works
- Feature Detection: Extracts keypoints from both template and subject
- Feature Matching: Finds correspondences using the selected matcher
- Filtering: Keeps top matches and prunes based on spatial consistency
- Homography Estimation: Computes perspective transform using RANSAC
The computed homography can then transform any point from template space to image space, allowing you to locate table structures based on your annotation.
Available Methods
- orb (default): ORB features with BFMatcher (Hamming distance). Fast and patent-free. Good for most use cases.
- sift: SIFT features with FLANN-based matcher. More robust to scale and rotation changes. Slower but often more accurate.
- surf: SURF features with BFMatcher (L2 norm). Requires opencv-contrib-python with non-free modules enabled. Fast and robust.
- akaze: AKAZE features with BFMatcher (Hamming distance). Patent-free, handles scale/rotation well, and often more robust than ORB on documents.
Preprocessing Options
- Set
kparameter to apply Sauvola thresholding before feature detection. This can improve matching on documents with variable lighting. - Set
k=Noneto use raw images (just extract blue channel for BGR images)
Tuning Guidelines
- max_features: Increase if matching fails on complex templates
- match_fraction: Decrease if you get many incorrect matches
- max_dist: Increase for documents with more warping/distortion
- scale: Decrease (<1.0) to speed up on high-resolution images
Arguments:
- template (MatLike | PathLike[str] | str | None): Header template image or path. This should contain a clear, representative view of the table header.
- method (FeatureDetector): Feature detection/matching method. One of "orb", "sift", or "surf". Default is "orb".
- max_features (int): Maximum features to detect. More features = slower but potentially more robust matching.
- patch_size (int): ORB patch size for feature extraction (only used with "orb").
- match_fraction (float): Fraction [0, 1] of matches to keep after sorting by quality. Higher = more matches but potentially more outliers.
- scale (float): Image downscaling factor (0, 1] for processing speed.
- max_dist (float): Maximum allowed distance (relative to image size) between matched keypoints. Filters out spatially inconsistent matches.
- k (float | None): Sauvola threshold parameter for preprocessing. If None, no thresholding is applied. Typical range: 0.03-0.15.
86 def __init__( 87 self, 88 template: None | MatLike | PathLike[str] | str = None, 89 method: FeatureDetector = "orb", 90 max_features: int = 100_000, 91 patch_size: int = 31, 92 match_fraction: float = 0.3, 93 scale: float = 1.0, 94 max_dist: float = 1.00, 95 k: float | None = None, 96 ): 97 """ 98 Args: 99 template (MatLike | str): (path of) template image, with the table template clearly visible 100 method (FeatureDetector): feature detection/matching method ("orb", "sift", or "surf") 101 max_features (int): maximal number of features that will be extracted 102 patch_size (int): for ORB feature extractor (only used with method="orb") 103 match_fraction (float): best fraction of matches that are kept 104 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 105 max_dist (float): maximum distance (relative to image size) of matched features. 106 Increase this value if the warping between image and template needs to be more agressive 107 k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done 108 """ 109 110 if type(template) is str or type(template) is PathLike: 111 value = cv.imread(fspath(template)) 112 template = value 113 114 self._method = method 115 self._k = k 116 if scale > 1.0: 117 raise TauluException( 118 "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0" 119 ) 120 if scale == 0: 121 raise TauluException("Use 0 < scale <= 1.0") 122 123 self._scale = scale 124 self._template = self._scale_img(cast(MatLike, template)) 125 self._template_orig: None | MatLike = None 126 self._preprocess_template() 127 self._max_features = max_features 128 self._patch_size = patch_size 129 self._match_fraction = match_fraction 130 self._max_dist = max_dist 131 self._validate_method() 132 self._matches_notebook_img = None
Arguments:
- template (MatLike | str): (path of) template image, with the table template clearly visible
- method (FeatureDetector): feature detection/matching method ("orb", "sift", or "surf")
- max_features (int): maximal number of features that will be extracted
- patch_size (int): for ORB feature extractor (only used with method="orb")
- match_fraction (float): best fraction of matches that are kept
- scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
- max_dist (float): maximum distance (relative to image size) of matched features. Increase this value if the warping between image and template needs to be more agressive
- k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
156 @property 157 def method(self) -> FeatureDetector: 158 """The feature detection/matching method being used.""" 159 return self._method
The feature detection/matching method being used.
161 @property 162 def template(self): 163 """The template image that subject images are aligned to""" 164 return self._template
The template image that subject images are aligned to
340 def show_matches_notebook(self): 341 """Display the stored feature matches image in the notebook (call after grid detection).""" 342 if self._matches_notebook_img is not None: 343 imu.show_notebook(self._matches_notebook_img, title="matches") 344 self._matches_notebook_img = None
Display the stored feature matches image in the notebook (call after grid detection).
346 def view_alignment(self, img: MatLike, h: NDArray): 347 """ 348 Show the alignment of the template on the given image by transforming 349 it with ``h`` and overlaying both on separate color channels. 350 351 Args: 352 img (MatLike): the image on which the template is overlaid 353 h (NDArray): the homography matrix from `align` 354 355 Returns: 356 int | None: the key code returned by the OpenCV window, if any. 357 """ 358 359 im = imu.ensure_gray(img) 360 header = imu.ensure_gray(self._unscale_img(self._template)) 361 height, width = im.shape 362 363 header_warped = cv.warpPerspective(header, h, (width, height)) 364 365 merged = np.full((height, width, 3), 255, dtype=np.uint8) 366 367 merged[..., 1] = im 368 merged[..., 2] = header_warped 369 370 return imu.show(merged)
Show the alignment of the template on the given image by transforming
it with h and overlaying both on separate color channels.
Arguments:
- img (MatLike): the image on which the template is overlaid
- h (NDArray): the homography matrix from
align
Returns:
int | None: the key code returned by the OpenCV window, if any.
372 @log_calls(level=logging.DEBUG, include_return=True) 373 def align( 374 self, 375 img: MatLike | str, 376 visual: bool = False, 377 visual_notebook: bool = False, 378 window: str = WINDOW, 379 ) -> NDArray: 380 """ 381 Compute a homography that maps template pixels onto ``img``. 382 383 Args: 384 img: Subject image (path or array). 385 visual: Show match visualization in an OpenCV window. 386 visual_notebook: Store the match visualization for later display 387 via `show_matches_notebook`. 388 window: OpenCV window name when ``visual=True``. 389 390 Returns: 391 NDArray: the ``(3, 3)`` homography from template to image space. 392 """ 393 394 logger.info("Aligning header with supplied table image") 395 396 if type(img) is str: 397 tmp_img = cv.imread(img) 398 assert tmp_img is not None 399 img = tmp_img 400 img = cast(MatLike, img) 401 402 img = self._preprocess_image(img) 403 404 h = self._find_transform_of_template_on(img, visual, visual_notebook, window) 405 406 if visual: 407 self.view_alignment(img, h) 408 409 return h
Compute a homography that maps template pixels onto img.
Arguments:
- img: Subject image (path or array).
- visual: Show match visualization in an OpenCV window.
- visual_notebook: Store the match visualization for later display
via
show_matches_notebook. - window: OpenCV window name when
visual=True.
Returns:
NDArray: the
(3, 3)homography from template to image space.
411 def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]: 412 """ 413 Transform a template-space point through the homography ``h`` 414 (obtained from `align`). 415 416 Args: 417 h (NDArray): transformation matrix of shape ``(3, 3)`` 418 point (Iterable[int]): the to-be-transformed point as ``(x, y)`` 419 420 Returns: 421 tuple[int, int]: the transformed point in image space. 422 """ 423 424 point = np.array([[point[0], point[1], 1]]) # type:ignore 425 transformed = np.dot(h, point.T) 426 427 transformed /= transformed[2] 428 429 return int(transformed[0][0]), int(transformed[1][0])
Transform a template-space point through the homography h
(obtained from align).
Arguments:
- h (NDArray): transformation matrix of shape
(3, 3) - point (Iterable[int]): the to-be-transformed point as
(x, y)
Returns:
tuple[int, int]: the transformed point in image space.