Skip to content

Commit f0ff6e4

Browse files
add tests for split size
1 parent 5ec932b commit f0ff6e4

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

_test_unstructured_client/test_split_pdf_hook.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,3 +353,62 @@ def test_unit_get_starting_page_number_missing_key(self):
353353

354354
self.assertEqual(result, 1)
355355

356+
def test_small_pdf_fewer_than_max_pages_per_thread_num_threads(self):
357+
description = "Small PDF, fewer than max pages per thread * num threads"
358+
num_pages = 5
359+
num_threads = 3
360+
expected_split_size = 2
361+
split_size = SplitPdfHook()._get_optimal_split_size(num_pages, num_threads)
362+
self.assertEqual(
363+
split_size,
364+
expected_split_size,
365+
f"{description} => Expected: {expected_split_size}, Got: {split_size}",
366+
)
367+
368+
def test_large_pdf_more_than_max_pages_per_thread_num_threads(self):
369+
description = "Large PDF, more than max pages per thread * num threads"
370+
num_pages = 100
371+
num_threads = 3
372+
expected_split_size = 20
373+
split_size = SplitPdfHook()._get_optimal_split_size(num_pages, num_threads)
374+
self.assertEqual(
375+
split_size,
376+
expected_split_size,
377+
f"{description} => Expected: {expected_split_size}, Got: {split_size}",
378+
)
379+
380+
def test_small_pdf_fewer_than_min_pages_per_thread(self):
381+
description = "Small PDF, fewer than min pages per thread"
382+
num_pages = 1
383+
num_threads = 5
384+
expected_split_size = 2
385+
split_size = SplitPdfHook()._get_optimal_split_size(num_pages, num_threads)
386+
self.assertEqual(
387+
split_size,
388+
expected_split_size,
389+
f"{description} => Expected: {expected_split_size}, Got: {split_size}",
390+
)
391+
392+
def test_exact_multiple_of_num_threads(self):
393+
description = "Exact multiple of num threads"
394+
num_pages = 60
395+
num_threads = 4
396+
expected_split_size = 15
397+
split_size = SplitPdfHook()._get_optimal_split_size(num_pages, num_threads)
398+
self.assertEqual(
399+
split_size,
400+
expected_split_size,
401+
f"{description} => Expected: {expected_split_size}, Got: {split_size}",
402+
)
403+
404+
def test_large_thread_count_for_small_pdf(self):
405+
description = "Large thread count for a small PDF"
406+
num_pages = 3
407+
num_threads = 10
408+
expected_split_size = 2
409+
split_size = SplitPdfHook()._get_optimal_split_size(num_pages, num_threads)
410+
self.assertEqual(
411+
split_size,
412+
expected_split_size,
413+
f"{description} => Expected: {expected_split_size}, Got: {split_size}",
414+
)

0 commit comments

Comments
 (0)