7
7
import logging
8
8
import math
9
9
import os
10
- from concurrent .futures import ThreadPoolExecutor , Future
11
- from typing import Optional , Tuple , Union , Generator
10
+ from concurrent .futures import Future , ThreadPoolExecutor
11
+ from typing import Generator , Optional , Tuple , Union
12
12
13
13
import requests
14
+ from pypdf import PdfReader , PdfWriter
15
+ from pypdf .errors import PdfReadError
14
16
from requests .structures import CaseInsensitiveDict
15
17
from requests_toolbelt .multipart .decoder import MultipartDecoder
16
18
from requests_toolbelt .multipart .encoder import MultipartEncoder
17
- from pypdf import PdfReader , PdfWriter
18
- from pypdf .errors import PdfReadError
19
19
20
20
from unstructured_client ._hooks .custom .common import UNSTRUCTURED_CLIENT_LOGGER_NAME
21
21
from unstructured_client ._hooks .types import (
22
- BeforeRequestContext ,
23
- AfterSuccessContext ,
24
22
AfterErrorContext ,
25
- SDKInitHook ,
26
- BeforeRequestHook ,
27
- AfterSuccessHook ,
28
23
AfterErrorHook ,
24
+ AfterSuccessContext ,
25
+ AfterSuccessHook ,
26
+ BeforeRequestContext ,
27
+ BeforeRequestHook ,
28
+ SDKInitHook ,
29
29
)
30
30
from unstructured_client .models import shared
31
31
@@ -61,9 +61,7 @@ def __init__(self) -> None:
61
61
self .partition_responses : dict [str , list [requests .Response ]] = {}
62
62
self .partition_requests : dict [str , list [Future [requests .Response ]]] = {}
63
63
64
- def sdk_init (
65
- self , base_url : str , client : requests .Session
66
- ) -> Tuple [str , requests .Session ]:
64
+ def sdk_init (self , base_url : str , client : requests .Session ) -> Tuple [str , requests .Session ]:
67
65
"""Initializes Split PDF Hook.
68
66
69
67
Args:
@@ -314,9 +312,7 @@ def _parse_form_data(self, decoded_data: MultipartDecoder) -> FormData:
314
312
for part in decoded_data .parts :
315
313
content_disposition = part .headers .get (b"Content-Disposition" )
316
314
if content_disposition is None :
317
- raise RuntimeError (
318
- "Content-Disposition header not found. Can't split pdf file."
319
- )
315
+ raise RuntimeError ("Content-Disposition header not found. Can't split pdf file." )
320
316
part_params = self ._decode_content_disposition (content_disposition )
321
317
name = part_params .get ("name" )
322
318
@@ -327,9 +323,7 @@ def _parse_form_data(self, decoded_data: MultipartDecoder) -> FormData:
327
323
filename = part_params .get ("filename" )
328
324
if filename is None or not filename .strip ():
329
325
raise ValueError ("Filename can't be an empty string." )
330
- form_data [PARTITION_FORM_FILES_KEY ] = shared .Files (
331
- part .content , filename
332
- )
326
+ form_data [PARTITION_FORM_FILES_KEY ] = shared .Files (part .content , filename )
333
327
else :
334
328
form_data [name ] = part .content .decode ()
335
329
@@ -377,9 +371,7 @@ def _call_api(
377
371
raise RuntimeError ("HTTP client not accessible!" )
378
372
page_content , page_number = page
379
373
380
- new_request = self ._create_request (
381
- request , form_data , page_content , filename , page_number
382
- )
374
+ new_request = self ._create_request (request , form_data , page_content , filename , page_number )
383
375
prepared_request = self .client .prepare_request (new_request )
384
376
385
377
try :
@@ -469,9 +461,7 @@ def _prepare_request_payload(self, form_data: FormData) -> FormData:
469
461
payload .update (updated_parameters )
470
462
return payload
471
463
472
- def _create_response (
473
- self , response : requests .Response , elements : list
474
- ) -> requests .Response :
464
+ def _create_response (self , response : requests .Response , elements : list ) -> requests .Response :
475
465
"""
476
466
Creates a modified response object with updated content.
477
467
@@ -490,9 +480,7 @@ def _create_response(
490
480
setattr (response_copy , "_content" , content )
491
481
return response_copy
492
482
493
- def _await_elements (
494
- self , operation_id : str , response : requests .Response
495
- ) -> Optional [list ]:
483
+ def _await_elements (self , operation_id : str , response : requests .Response ) -> Optional [list ]:
496
484
"""
497
485
Waits for the partition requests to complete and returns the flattened
498
486
elements.
@@ -525,7 +513,6 @@ def _await_elements(
525
513
flattened_elements = [element for sublist in elements for element in sublist ]
526
514
return flattened_elements
527
515
528
-
529
516
def _clear_operation (self , operation_id : str ) -> None :
530
517
"""
531
518
Clears the operation data associated with the given operation ID.
0 commit comments