Skip to content

Commit 4a107f4

Browse files
committed
Adjust example instatiation of multi-stage VLM pipeline
Signed-off-by: Christoph Auer <[email protected]>
1 parent 3d07f1c commit 4a107f4

File tree

3 files changed

+8
-13
lines changed

3 files changed

+8
-13
lines changed

docling/datamodel/vlm_model_specs.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,6 @@
229229
],
230230
scale=2.0,
231231
temperature=0.0,
232-
max_new_tokens=4096,
233232
)
234233

235234

docling/models/vlm_models_inline/hf_transformers_model.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,7 @@ def process_images(
280280
padding=True, # pad across batch for both text and vision
281281
# no truncation by default; match SmolDocling examples
282282
)
283-
inputs = {
284-
k: (v.to(self.device) if hasattr(v, "to") else v) for k, v in inputs.items()
285-
}
283+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
286284

287285
# -- Optional stopping criteria
288286
stopping_criteria = None
@@ -302,7 +300,7 @@ def process_images(
302300
"max_new_tokens": self.max_new_tokens,
303301
"use_cache": self.use_cache,
304302
"generation_config": self.generation_config,
305-
"temperature": self.temperature,
303+
# "temperature": self.temperature,
306304
**self.vlm_options.extra_generation_config,
307305
}
308306
if stopping_criteria is not None:

docling/pipeline/threaded_multistage_vlm_pipeline.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,16 +160,14 @@ def create_default(cls) -> ThreadedMultiStageVlmPipelineOptions:
160160
smoldocling_model = SMOLDOCLING_TRANSFORMERS
161161

162162
text_opts = base_model.model_copy()
163-
# text_opts.prompt = "Convert this page to docling."
164-
text_opts.prompt = "What does it say?"
165-
text_opts.response_format = ResponseFormat.PLAINTEXT
166-
text_opts.max_new_tokens = 4096
163+
text_opts.prompt = "Convert this page to docling."
164+
text_opts.response_format = ResponseFormat.DOCTAGS
165+
text_opts.max_new_tokens = 1024
167166

168167
formula_opts = base_model.model_copy()
169-
# formula_opts.prompt = "Convert formula to latex."
170-
formula_opts.prompt = "What does it say?"
171-
formula_opts.response_format = ResponseFormat.PLAINTEXT
172-
formula_opts.max_new_tokens = 4096
168+
formula_opts.prompt = "Convert formula to latex."
169+
formula_opts.response_format = ResponseFormat.DOCTAGS
170+
formula_opts.max_new_tokens = 512
173171

174172
code_opts = smoldocling_model.model_copy()
175173
code_opts.prompt = "Convert code to text."

0 commit comments

Comments
 (0)