4343 elements_from_json ,
4444)
4545from unstructured_inference .models .base import UnknownModelException
46- from unstructured_inference .models .chipper import MODEL_TYPES as CHIPPER_MODEL_TYPES
4746
4847app = FastAPI ()
4948router = APIRouter ()
@@ -214,37 +213,6 @@ def partition_pdf_splits(
214213 return results
215214
216215
217- is_chipper_processing = False
218-
219-
220- class ChipperMemoryProtection :
221- """Chipper calls are expensive, and right now we can only do one call at a time.
222-
223- If the model is in use, return a 503 error. The API should scale up and the user can try again
224- on a different server.
225- """
226-
227- def __enter__ (self ):
228- global is_chipper_processing
229- if is_chipper_processing :
230- # Log here so we can track how often it happens
231- logger .error ("Chipper is already is use" )
232- raise HTTPException (
233- status_code = 503 , detail = "Server is under heavy load. Please try again later."
234- )
235-
236- is_chipper_processing = True
237-
238- def __exit__ (
239- self ,
240- exc_type : Optional [type [BaseException ]],
241- exc_value : Optional [BaseException ],
242- exc_tb : Optional [TracebackType ],
243- ):
244- global is_chipper_processing
245- is_chipper_processing = False
246-
247-
248216def pipeline_api (
249217 file : IO [bytes ],
250218 request : Request ,
@@ -331,7 +299,6 @@ def pipeline_api(
331299 if file_content_type == "application/pdf" :
332300 _check_pdf (file )
333301
334- hi_res_model_name = _validate_hi_res_model_name (hi_res_model_name , coordinates )
335302 strategy = _validate_strategy (strategy )
336303 pdf_infer_table_structure = _set_pdf_infer_table_structure (
337304 pdf_infer_table_structure ,
@@ -417,9 +384,6 @@ def pipeline_api(
417384 coordinates = coordinates ,
418385 ** partition_kwargs , # type: ignore # pyright: ignore[reportGeneralTypeIssues]
419386 )
420- elif hi_res_model_name and hi_res_model_name in CHIPPER_MODEL_TYPES :
421- with ChipperMemoryProtection ():
422- elements = partition (** partition_kwargs ) # type: ignore # pyright: ignore[reportGeneralTypeIssues]
423387 else :
424388 elements = partition (** partition_kwargs ) # type: ignore # pyright: ignore[reportGeneralTypeIssues]
425389
@@ -533,21 +497,6 @@ def _validate_strategy(strategy: str) -> str:
533497 return strategy
534498
535499
536- def _validate_hi_res_model_name (
537- hi_res_model_name : Optional [str ], show_coordinates : bool
538- ) -> Optional [str ]:
539- # Make sure chipper aliases to the latest model
540- if hi_res_model_name and hi_res_model_name == "chipper" :
541- hi_res_model_name = "chipperv2"
542-
543- if hi_res_model_name and hi_res_model_name in CHIPPER_MODEL_TYPES and show_coordinates :
544- raise HTTPException (
545- status_code = 400 ,
546- detail = f"coordinates aren't available when using the { hi_res_model_name } model type" ,
547- )
548- return hi_res_model_name
549-
550-
551500def _validate_chunking_strategy (chunking_strategy : Optional [str ]) -> Optional [str ]:
552501 """Raise on `chunking_strategy` is not a valid chunking strategy name.
553502
@@ -653,7 +602,7 @@ def return_content_type(filename: str):
653602
654603
655604@router .get ("/general/v0/general" , include_in_schema = False )
656- @router .get ("/general/v0.0.81 /general" , include_in_schema = False )
605+ @router .get ("/general/v0.0.82 /general" , include_in_schema = False )
657606async def handle_invalid_get_request ():
658607 raise HTTPException (
659608 status_code = status .HTTP_405_METHOD_NOT_ALLOWED , detail = "Only POST requests are supported."
@@ -668,7 +617,7 @@ async def handle_invalid_get_request():
668617 description = "Description" ,
669618 operation_id = "partition_parameters" ,
670619)
671- @router .post ("/general/v0.0.81 /general" , include_in_schema = False )
620+ @router .post ("/general/v0.0.82 /general" , include_in_schema = False )
672621def general_partition (
673622 request : Request ,
674623 # cannot use annotated type here because of a bug described here:
0 commit comments