diff --git a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/CompositeTableParser.java b/app/common/src/main/java/stirling/software/SPDF/pdf/parser/CompositeTableParser.java
deleted file mode 100644
index 429f180f3e..0000000000
--- a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/CompositeTableParser.java
+++ /dev/null
@@ -1,73 +0,0 @@
-package stirling.software.SPDF.pdf.parser;
-
-import static stirling.software.SPDF.pdf.parser.PdfModels.*;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.springframework.context.annotation.Primary;
-import org.springframework.stereotype.Service;
-
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-
-/**
- * Chains table parsers in priority order: Tabula lattice → Tabula stream → {@link
- * LineAlignmentTableParser}. The first parser returning a result above {@link
- * #TABULA_CONFIDENCE_THRESHOLD} wins; results from different parsers are never mixed on one page.
- */
-@Service
-@Primary
-@RequiredArgsConstructor
-@Slf4j
-public class CompositeTableParser implements TableParser {
-
-    /** Min Tabula confidence to accept results; below this LineAlignment is tried instead. */
-    static final float TABULA_CONFIDENCE_THRESHOLD = 0.5f;
-
-    private final TabulaTableParser tabulaParser;
-    private final LineAlignmentTableParser lineAlignmentParser;
-
-    @Override
-    public List<TableFragment> parse(PDDocument document, RawPage rawPage) throws IOException {
-        // Step 1: Tabula lattice mode (ruled/bordered tables).
-        List<TableFragment> latticeResults = filterConfident(tabulaParser.parse(document, rawPage));
-        if (!latticeResults.isEmpty()) {
-            log.debug(
-                    "Page {}: using Tabula lattice ({} table(s))",
-                    rawPage.pageNumber(),
-                    latticeResults.size());
-            return latticeResults;
-        }
-
-        // Step 2: Tabula stream mode (borderless/whitespace-delimited tables).
-        // parseStream is not on the TableParser interface — this intentionally couples to the
-        // concrete TabulaTableParser since stream mode is a Tabula-specific concept.
-        List<TableFragment> streamResults =
-                filterConfident(tabulaParser.parseStream(document, rawPage));
-        if (!streamResults.isEmpty()) {
-            log.debug(
-                    "Page {}: using Tabula stream ({} table(s))",
-                    rawPage.pageNumber(),
-                    streamResults.size());
-            return streamResults;
-        }
-
-        // Step 3: Geometry-based line-alignment fallback.
-        List<TableFragment> lineResults = lineAlignmentParser.parse(document, rawPage);
-        if (!lineResults.isEmpty()) {
-            log.debug(
-                    "Page {}: using LineAlignment ({} table(s))",
-                    rawPage.pageNumber(),
-                    lineResults.size());
-            return lineResults;
-        }
-
-        return List.of();
-    }
-
-    private List<TableFragment> filterConfident(List<TableFragment> tables) {
-        return tables.stream().filter(t -> t.confidence() >= TABULA_CONFIDENCE_THRESHOLD).toList();
-    }
-}
diff --git a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/LineAlignmentTableParser.java b/app/common/src/main/java/stirling/software/SPDF/pdf/parser/LineAlignmentTableParser.java
deleted file mode 100644
index b2d8de5167..0000000000
--- a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/LineAlignmentTableParser.java
+++ /dev/null
@@ -1,528 +0,0 @@
-package stirling.software.SPDF.pdf.parser;
-
-import static stirling.software.SPDF.pdf.parser.PdfModels.*;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.TreeMap;
-import java.util.regex.Pattern;
-
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.springframework.stereotype.Service;
-
-import lombok.extern.slf4j.Slf4j;
-
-/**
- * Fallback {@link TableParser} for borderless financial tables using text geometry.
- *
- * <p>Identifies "anchor lines" (≥2 numeric tokens), builds a column grid from their right-edge
- * positions, groups vertically proximate anchor lines into table candidates, then scores each group
- * on column consistency and anchor density (confidence ceiling 0.85).
- */
-@Service
-@Slf4j
-public class LineAlignmentTableParser implements TableParser {
-
-    /** Width in points of each column position bucket. */
-    static final float COLUMN_BUCKET_PT = 5f;
-
-    /** Tolerance in buckets when matching a token's right-edge to a confirmed column position. */
-    private static final int COLUMN_MATCH_BUCKETS = 2;
-
-    /** Maximum gap (as a multiple of modal line spacing) before splitting a group. */
-    private static final float MAX_GAP_FACTOR = 2.5f;
-
-    /** Minimum anchor rows (numeric-heavy) to form a valid table. */
-    static final int MIN_TABLE_ROWS = 3;
-
-    /** Minimum confirmed column positions to form a valid table. */
-    static final int MIN_COLUMNS = 2;
-
-    /**
-     * Min fraction of anchor lines a column must appear on to be confirmed (permissive for N/A
-     * rows).
-     */
-    private static final double COLUMN_MIN_FREQUENCY = 0.40;
-
-    /**
-     * Matches financial numeric tokens: integers, decimals, parenthetical negatives, currency,
-     * percent, nil dashes.
-     */
-    private static final Pattern NUMERIC =
-            Pattern.compile("^[\\(\\-\\$£€¥]?\\d[\\d,\\.]*[\\)%]?$|^[-–—]$");
-
-    /**
-     * Lines within this y-distance are merged into one row (restores rows split by LineBuilder's
-     * column-gap logic).
-     */
-    static final float ROW_MERGE_TOLERANCE_PT = 2f;
-
-    // ── public API ───────────────────────────────────────────────────────────────────────────────
-
-    @Override
-    public List<TableFragment> parse(PDDocument document, RawPage rawPage) throws IOException {
-        List<RawLine> lines = rawPage.lines();
-        if (lines.size() < MIN_TABLE_ROWS) return List.of();
-
-        float modalSpacing = computeModalSpacing(lines);
-        List<TokenizedLine> tokenized =
-                mergeCoincidentLines(lines.stream().map(this::tokenize).toList());
-
-        List<TokenizedLine> anchors = tokenized.stream().filter(TokenizedLine::isAnchor).toList();
-
-        if (anchors.size() < MIN_TABLE_ROWS) return List.of();
-
-        List<Float> columnGrid = buildColumnGrid(anchors);
-        if (columnGrid.size() < MIN_COLUMNS) {
-            log.debug(
-                    "Page {}: LineAlignment — fewer than {} confirmed columns, skipping",
-                    rawPage.pageNumber(),
-                    MIN_COLUMNS);
-            return List.of();
-        }
-
-        List<List<TokenizedLine>> groups = groupRows(tokenized, columnGrid, modalSpacing);
-
-        List<TableFragment> results = new ArrayList<>();
-        for (int i = 0; i < groups.size(); i++) {
-            buildFragment(groups.get(i), columnGrid, rawPage.pageNumber(), i)
-                    .ifPresent(results::add);
-        }
-
-        log.debug(
-                "Page {}: LineAlignment detected {} table(s) ({} anchor lines, {} columns)",
-                rawPage.pageNumber(),
-                results.size(),
-                anchors.size(),
-                columnGrid.size());
-        return results;
-    }
-
-    // ── coincident-line merging ──────────────────────────────────────────────────────────────────
-
-    /**
-     * Merges tokenised lines sharing the same y-position into one row, rejoining label/value halves
-     * split by LineBuilder.
-     */
-    List<TokenizedLine> mergeCoincidentLines(List<TokenizedLine> tokenized) {
-        if (tokenized.size() < 2) return tokenized;
-
-        List<TokenizedLine> result = new ArrayList<>();
-        int i = 0;
-
-        while (i < tokenized.size()) {
-            float baseY = tokenized.get(i).line().bounds().y();
-            int j = i + 1;
-            while (j < tokenized.size()
-                    && Math.abs(tokenized.get(j).line().bounds().y() - baseY)
-                            <= ROW_MERGE_TOLERANCE_PT) {
-                j++;
-            }
-
-            if (j == i + 1) {
-                result.add(tokenized.get(i));
-            } else {
-                result.add(mergeGroup(tokenized.subList(i, j)));
-            }
-            i = j;
-        }
-
-        return result;
-    }
-
-    private TokenizedLine mergeGroup(List<TokenizedLine> group) {
-        List<TextFragment> mergedFragments =
-                group.stream()
-                        .flatMap(tl -> tl.line().fragments().stream())
-                        .sorted(Comparator.comparingDouble(f -> f.bounds().x()))
-                        .toList();
-
-        Bounds mergedBounds =
-                group.stream()
-                        .map(tl -> tl.line().bounds())
-                        .reduce(Bounds::merge)
-                        .orElse(group.get(0).line().bounds());
-
-        RawLine mergedLine =
-                new RawLine(
-                        group.get(0).line().lineId(),
-                        mergedFragments,
-                        mergedBounds,
-                        group.get(0).line().pageNumber());
-
-        return tokenize(mergedLine);
-    }
-
-    // ── tokenisation ─────────────────────────────────────────────────────────────────────────────
-
-    /**
-     * Splits fragments into word-level tokens; x-positions are estimated linearly within each
-     * fragment.
-     */
-    TokenizedLine tokenize(RawLine line) {
-        List<LineToken> tokens = new ArrayList<>();
-        for (TextFragment frag : line.fragments()) {
-            tokens.addAll(tokensFromFragment(frag));
-        }
-        List<LineToken> numeric = tokens.stream().filter(LineToken::numeric).toList();
-        return new TokenizedLine(line, tokens, numeric);
-    }
-
-    private List<LineToken> tokensFromFragment(TextFragment frag) {
-        String raw = frag.text();
-        if (raw == null || raw.isBlank()) return List.of();
-
-        float fragX = frag.bounds().x();
-        float fragWidth = frag.bounds().width();
-        int rawLen = raw.length();
-
-        List<LineToken> result = new ArrayList<>();
-        int offset = 0;
-        for (String part : raw.split("\\s+")) {
-            if (part.isEmpty()) {
-                offset++;
-                continue;
-            }
-            int idx = raw.indexOf(part, offset);
-            if (idx < 0) idx = offset;
-
-            float tokenX = rawLen > 0 ? fragX + ((float) idx / rawLen) * fragWidth : fragX;
-            float tokenRight =
-                    rawLen > 0
-                            ? fragX + ((float) (idx + part.length()) / rawLen) * fragWidth
-                            : fragX + fragWidth;
-
-            result.add(new LineToken(part, tokenX, tokenRight, NUMERIC.matcher(part).matches()));
-            offset = idx + part.length();
-        }
-        return result;
-    }
-
-    // ── column grid ──────────────────────────────────────────────────────────────────────────────
-
-    /**
-     * Returns confirmed column right-edge positions — those appearing on ≥ {@value
-     * #COLUMN_MIN_FREQUENCY} × N anchor lines.
-     */
-    private List<Float> buildColumnGrid(List<TokenizedLine> anchors) {
-        // bucket → set of line indices that contributed a numeric token to that bucket
-        Map<Integer, List<Integer>> bucketLines = new HashMap<>();
-        for (int i = 0; i < anchors.size(); i++) {
-            for (LineToken t : anchors.get(i).numeric()) {
-                int bucket = bucket(t.right());
-                bucketLines.computeIfAbsent(bucket, k -> new ArrayList<>()).add(i);
-            }
-        }
-
-        int minHits =
-                Math.max(MIN_TABLE_ROWS, (int) Math.ceil(anchors.size() * COLUMN_MIN_FREQUENCY));
-
-        // Confirmed buckets → average right-edge for that bucket
-        TreeMap<Integer, Float> confirmed = new TreeMap<>();
-        for (Map.Entry<Integer, List<Integer>> entry : bucketLines.entrySet()) {
-            // Count distinct lines
-            long distinctLines = entry.getValue().stream().distinct().count();
-            if (distinctLines >= minHits) {
-                double avg =
-                        entry.getValue().stream()
-                                .distinct() // weight each line equally regardless of token count
-                                .mapToDouble(
-                                        lineIdx ->
-                                                avgRightEdgeForBucket(
-                                                        anchors, lineIdx, entry.getKey()))
-                                .average()
-                                .orElse(entry.getKey() * (double) COLUMN_BUCKET_PT);
-                confirmed.put(entry.getKey(), (float) avg);
-            }
-        }
-
-        return new ArrayList<>(confirmed.values()); // already sorted by bucket (left to right)
-    }
-
-    /**
-     * Returns the average right-edge position of tokens in {@code line} whose bucket matches {@code
-     * targetBucket}, falling back to the bucket's nominal centre when no tokens match.
-     */
-    private double avgRightEdgeForBucket(
-            List<TokenizedLine> anchors, int lineIdx, int targetBucket) {
-        return anchors.get(lineIdx).numeric().stream()
-                .filter(t -> bucket(t.right()) == targetBucket)
-                .mapToDouble(LineToken::right)
-                .average()
-                .orElse(targetBucket * (double) COLUMN_BUCKET_PT);
-    }
-
-    // ── grouping ─────────────────────────────────────────────────────────────────────────────────
-
-    /**
-     * Groups anchor lines into table candidates, including adjacent label rows; a gap &gt;
-     * MAX_GAP_FACTOR × modal spacing splits groups.
-     */
-    private List<List<TokenizedLine>> groupRows(
-            List<TokenizedLine> all, List<Float> columnGrid, float modalSpacing) {
-        float maxGap = modalSpacing > 0 ? modalSpacing * MAX_GAP_FACTOR : 30f;
-
-        List<List<TokenizedLine>> groups = new ArrayList<>();
-        List<TokenizedLine> current = new ArrayList<>();
-
-        for (int i = 0; i < all.size(); i++) {
-            TokenizedLine tl = all.get(i);
-            boolean fits = tl.isAnchor() && matchesGrid(tl, columnGrid);
-
-            if (current.isEmpty()) {
-                if (fits) current.add(tl);
-                continue;
-            }
-
-            float gap =
-                    tl.line().bounds().y()
-                            - current.get(current.size() - 1).line().bounds().bottom();
-
-            if (gap > maxGap) {
-                groups.add(current);
-                current = new ArrayList<>();
-                if (fits) current.add(tl);
-                continue;
-            }
-
-            if (fits) {
-                current.add(tl);
-            } else if (!tl.line().text().isBlank()) {
-                // Include non-anchor lines (labels) only if they have text and are within
-                // proximity.
-                current.add(tl);
-            }
-        }
-
-        if (!current.isEmpty()) groups.add(current);
-
-        return groups.stream().filter(g -> hasEnoughAnchorRows(g, columnGrid)).toList();
-    }
-
-    private boolean hasEnoughAnchorRows(List<TokenizedLine> group, List<Float> columnGrid) {
-        return group.stream().filter(r -> r.isAnchor() && matchesGrid(r, columnGrid)).count()
-                >= MIN_TABLE_ROWS;
-    }
-
-    /** A line "matches" the grid when ≥ 60 % of its numeric tokens land in confirmed columns. */
-    private boolean matchesGrid(TokenizedLine tl, List<Float> columnGrid) {
-        if (tl.numeric().isEmpty()) return false;
-        long matches =
-                tl.numeric().stream()
-                        .filter(t -> nearestColumnIndex(t.right(), columnGrid) >= 0)
-                        .count();
-        return (double) matches / tl.numeric().size() >= 0.60;
-    }
-
-    private boolean hasInconsistentColumnMatch(TokenizedLine tl, List<Float> columnGrid) {
-        if (tl.numeric().isEmpty()) return false;
-        long hits =
-                tl.numeric().stream()
-                        .filter(t -> nearestColumnIndex(t.right(), columnGrid) >= 0)
-                        .count();
-        return (double) hits / tl.numeric().size() < 0.60;
-    }
-
-    // ── fragment assembly ────────────────────────────────────────────────────────────────────────
-
-    private Optional<TableFragment> buildFragment(
-            List<TokenizedLine> group, List<Float> columnGrid, int pageNumber, int tableIndex) {
-
-        long anchorCount =
-                group.stream().filter(r -> r.isAnchor() && matchesGrid(r, columnGrid)).count();
-        if (anchorCount < MIN_TABLE_ROWS) return Optional.empty();
-
-        List<String> warnings = new ArrayList<>();
-        List<List<String>> rawRows = new ArrayList<>();
-        List<TableRow> rows = new ArrayList<>();
-
-        for (int rowIdx = 0; rowIdx < group.size(); rowIdx++) {
-            TokenizedLine tl = group.get(rowIdx);
-            List<String> rawRow = buildRawRow(tl, columnGrid);
-            rawRows.add(Collections.unmodifiableList(rawRow));
-            rows.add(buildTableRow(rowIdx, tl, rawRow, columnGrid));
-        }
-
-        // Column count = 1 label column + confirmed numeric columns
-        int colCount = columnGrid.size() + 1;
-        Bounds bounds = computeGroupBounds(group);
-        float confidence = computeConfidence(group, columnGrid, warnings);
-
-        return Optional.of(
-                new TableFragment(
-                        "tbl-la-p" + pageNumber + "-" + tableIndex,
-                        pageNumber,
-                        bounds,
-                        List.of(),
-                        Collections.unmodifiableList(rows),
-                        Collections.unmodifiableList(rawRows),
-                        colCount,
-                        confidence,
-                        Collections.unmodifiableList(warnings),
-                        null));
-    }
-
-    /**
-     * Builds a raw row as a list of strings: index 0 = label text, indices 1..N = column values.
-     */
-    private List<String> buildRawRow(TokenizedLine tl, List<Float> columnGrid) {
-        String[] cells = new String[columnGrid.size() + 1];
-        Arrays.fill(cells, "");
-
-        // Separate label tokens (those not landing in any confirmed column) from column tokens.
-        List<String> labelParts = new ArrayList<>();
-        for (LineToken token : tl.all()) {
-            int col = nearestColumnIndex(token.right(), columnGrid);
-            if (col >= 0 && token.numeric()) {
-                int cellIdx = col + 1;
-                cells[cellIdx] =
-                        cells[cellIdx].isEmpty()
-                                ? token.text()
-                                : cells[cellIdx] + " " + token.text();
-            } else {
-                labelParts.add(token.text());
-            }
-        }
-        cells[0] = String.join(" ", labelParts).trim();
-        return Arrays.asList(cells);
-    }
-
-    private TableRow buildTableRow(
-            int rowIdx, TokenizedLine tl, List<String> rawRow, List<Float> columnGrid) {
-        List<TableCell> cells = new ArrayList<>(rawRow.size());
-
-        // Label cell: use the line's full bounds as an approximation.
-        cells.add(TableCell.of(0, rawRow.get(0), tl.line().bounds()));
-
-        for (int col = 0; col < columnGrid.size(); col++) {
-            String text = col + 1 < rawRow.size() ? rawRow.get(col + 1) : "";
-            float right = columnGrid.get(col);
-            float left = col > 0 ? columnGrid.get(col - 1) : right - 50f;
-            Bounds cellBounds =
-                    new Bounds(
-                            left,
-                            tl.line().bounds().y(),
-                            right - left,
-                            tl.line().bounds().height());
-            cells.add(TableCell.of(col + 1, text, cellBounds));
-        }
-        return new TableRow(rowIdx, Collections.unmodifiableList(cells));
-    }
-
-    // ── confidence scoring ───────────────────────────────────────────────────────────────────────
-
-    /**
-     * Heuristic score in [0.0, 0.85] (ceiling keeps results below Tabula lattice which starts at
-     * 1.0). Base 0.70; +0.05/col beyond 2 (max +0.10); +0.05 at ≥5 anchors, +0.05 at ≥8; −0.15 if
-     * &gt;30 % of anchors have inconsistent columns; −0.10 if non-anchors outnumber anchors.
-     */
-    private float computeConfidence(
-            List<TokenizedLine> group, List<Float> columnGrid, List<String> warnings) {
-        float score = 0.70f;
-
-        long anchorCount =
-                group.stream().filter(r -> r.isAnchor() && matchesGrid(r, columnGrid)).count();
-        long totalRows = group.size();
-
-        // More columns
-        int extraCols = Math.min(columnGrid.size() - MIN_COLUMNS, 2);
-        score += extraCols * 0.05f;
-
-        // More anchor rows
-        if (anchorCount >= 5) score += 0.05f;
-        if (anchorCount >= 8) score += 0.05f;
-
-        // Inconsistent column matching
-        long inconsistent =
-                group.stream()
-                        .filter(TokenizedLine::isAnchor)
-                        .filter(tl -> hasInconsistentColumnMatch(tl, columnGrid))
-                        .count();
-        if (inconsistent > anchorCount * 0.30) {
-            score -= 0.15f;
-            warnings.add(
-                    "Column match inconsistent on "
-                            + inconsistent
-                            + "/"
-                            + anchorCount
-                            + " anchor rows");
-        }
-
-        // Label-heavy
-        long nonAnchor = totalRows - anchorCount;
-        if (nonAnchor > anchorCount) {
-            score -= 0.10f;
-            warnings.add(
-                    "Non-anchor rows ("
-                            + nonAnchor
-                            + ") outnumber anchor rows ("
-                            + anchorCount
-                            + ")");
-        }
-
-        return Math.max(0f, Math.min(0.85f, score));
-    }
-
-    // ── utility ──────────────────────────────────────────────────────────────────────────────────
-
-    /**
-     * Returns the grid index nearest to {@code rightEdge}, or -1 if none is within {@value
-     * #COLUMN_MATCH_BUCKETS} buckets.
-     */
-    private int nearestColumnIndex(float rightEdge, List<Float> grid) {
-        int nearest = -1;
-        float minDist = COLUMN_MATCH_BUCKETS * COLUMN_BUCKET_PT + 1f;
-        for (int i = 0; i < grid.size(); i++) {
-            float dist = Math.abs(rightEdge - grid.get(i));
-            if (dist < minDist) {
-                minDist = dist;
-                nearest = i;
-            }
-        }
-        return nearest;
-    }
-
-    private Bounds computeGroupBounds(List<TokenizedLine> group) {
-        return group.stream()
-                .map(tl -> tl.line().bounds())
-                .reduce(Bounds::merge)
-                .orElse(new Bounds(0, 0, 0, 0));
-    }
-
-    /** Modal gap between consecutive line edges, used to calibrate the group-split threshold. */
-    private float computeModalSpacing(List<RawLine> lines) {
-        if (lines.size() < 2) return 0f;
-        Map<Float, Long> freq = new HashMap<>();
-        for (int i = 1; i < lines.size(); i++) {
-            float gap = lines.get(i).bounds().y() - lines.get(i - 1).bounds().bottom();
-            if (gap > 0) freq.merge(Math.round(gap / 2f) * 2f, 1L, Long::sum);
-        }
-        return freq.entrySet().stream()
-                .max(Map.Entry.comparingByValue())
-                .map(Map.Entry::getKey)
-                .orElse(0f);
-    }
-
-    private static int bucket(float x) {
-        return Math.round(x / COLUMN_BUCKET_PT);
-    }
-
-    // ── private data types ───────────────────────────────────────────────────────────────────────
-
-    /** A word-level token with an approximate right-edge x-position. */
-    record LineToken(String text, float x, float right, boolean numeric) {}
-
-    /** A {@link RawLine} with tokens pre-computed; an "anchor" has ≥ 2 numeric tokens. */
-    record TokenizedLine(RawLine line, List<LineToken> all, List<LineToken> numeric) {
-        boolean isAnchor() {
-            return numeric.size() >= 2;
-        }
-    }
-}
diff --git a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/LineBuilder.java b/app/common/src/main/java/stirling/software/SPDF/pdf/parser/LineBuilder.java
deleted file mode 100644
index 6831f6d734..0000000000
--- a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/LineBuilder.java
+++ /dev/null
@@ -1,139 +0,0 @@
-package stirling.software.SPDF.pdf.parser;
-
-import static stirling.software.SPDF.pdf.parser.PdfModels.*;
-
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-
-import org.springframework.stereotype.Service;
-
-import lombok.extern.slf4j.Slf4j;
-
-/**
- * Groups {@link TextFragment} objects into visual {@link RawLine}s using baseline proximity.
- *
- * <p>Fragments are on the same line when their baselines are within a font-size-derived tolerance.
- * A new line starts whenever the horizontal gap exceeds an adaptive column-gap threshold ({@code
- * max(effectiveWidth * COLUMN_GAP_RATIO, COLUMN_GAP_MIN_PT)}), splitting two-column text.
- */
-@Service
-@Slf4j
-public class LineBuilder {
-
-    /** Baseline tolerance as a fraction of font size; 0.5 keeps mixed-size text on one line. */
-    private static final float BASELINE_TOLERANCE_FACTOR = 0.5f;
-
-    /** Absolute minimum tolerance so tiny font sizes don't collapse multi-line content. */
-    private static final float MIN_BASELINE_TOLERANCE = 2f;
-
-    /**
-     * Column-gap threshold as a fraction of page width; 0.10 clears tab stops but stays below
-     * two-column gutters.
-     */
-    static final float COLUMN_GAP_RATIO = 0.10f;
-
-    /** Floor for the column-gap threshold so narrow pages don't over-split lines. */
-    static final float COLUMN_GAP_MIN_PT = 40f;
-
-    public List<RawLine> build(List<TextFragment> fragments, int pageNumber) {
-        if (fragments.isEmpty()) return List.of();
-
-        float effectiveWidth = inferEffectiveWidth(fragments);
-        float columnGapThreshold = Math.max(effectiveWidth * COLUMN_GAP_RATIO, COLUMN_GAP_MIN_PT);
-        log.debug(
-                "LineBuilder page {}: effectiveWidth={:.1f}pt, columnGapThreshold={:.1f}pt",
-                pageNumber,
-                effectiveWidth,
-                columnGapThreshold);
-
-        // Sort top-to-bottom first, then left-to-right within the same baseline band.
-        List<TextFragment> sorted =
-                fragments.stream()
-                        .sorted(
-                                Comparator.comparingDouble(TextFragment::baseline)
-                                        .thenComparingDouble(f -> f.bounds().x()))
-                        .toList();
-
-        List<List<TextFragment>> groups = groupByBaseline(sorted, columnGapThreshold);
-
-        List<RawLine> lines = new ArrayList<>(groups.size());
-        for (int i = 0; i < groups.size(); i++) {
-            List<TextFragment> group =
-                    groups.get(i).stream()
-                            .sorted(Comparator.comparingDouble(f -> f.bounds().x()))
-                            .toList();
-
-            Bounds lineBounds =
-                    group.stream()
-                            .map(TextFragment::bounds)
-                            .reduce(Bounds::merge)
-                            .orElse(new Bounds(0, 0, 0, 0));
-
-            lines.add(new RawLine("ln-p" + pageNumber + "-" + i, group, lineBounds, pageNumber));
-        }
-        return lines;
-    }
-
-    private List<List<TextFragment>> groupByBaseline(
-            List<TextFragment> sorted, float columnGapThreshold) {
-        List<List<TextFragment>> groups = new ArrayList<>();
-        List<TextFragment> current = new ArrayList<>();
-        float currentBaseline = Float.NaN;
-
-        for (TextFragment fragment : sorted) {
-            if (current.isEmpty()) {
-                current.add(fragment);
-                currentBaseline = fragment.baseline();
-                continue;
-            }
-
-            float maxFontSize =
-                    Math.max(
-                            fragment.fontSize(),
-                            (float)
-                                    current.stream()
-                                            .mapToDouble(TextFragment::fontSize)
-                                            .max()
-                                            .orElse(0));
-            float tolerance =
-                    Math.max(maxFontSize * BASELINE_TOLERANCE_FACTOR, MIN_BASELINE_TOLERANCE);
-
-            boolean sameBaseline = Math.abs(fragment.baseline() - currentBaseline) <= tolerance;
-            boolean columnGap = sameBaseline && hasColumnGap(fragment, current, columnGapThreshold);
-
-            if (sameBaseline && !columnGap) {
-                current.add(fragment);
-                // Anchor to the weighted mean baseline so long lines stay stable.
-                currentBaseline =
-                        (currentBaseline * (current.size() - 1) + fragment.baseline())
-                                / current.size();
-            } else {
-                groups.add(current);
-                current = new ArrayList<>();
-                current.add(fragment);
-                currentBaseline = fragment.baseline();
-            }
-        }
-
-        if (!current.isEmpty()) groups.add(current);
-        return groups;
-    }
-
-    /**
-     * True when the gap from the rightmost fragment in {@code group} to {@code next} exceeds {@code
-     * threshold}.
-     */
-    private static boolean hasColumnGap(
-            TextFragment next, List<TextFragment> group, float threshold) {
-        float lastRight = group.get(group.size() - 1).bounds().right();
-        return next.bounds().x() - lastRight > threshold;
-    }
-
-    /** Infers effective page width from the rightmost fragment right-edge plus a 10 % margin. */
-    private static float inferEffectiveWidth(List<TextFragment> fragments) {
-        double maxRight =
-                fragments.stream().mapToDouble(f -> f.bounds().right()).max().orElse(500.0);
-        return (float) maxRight * 1.10f;
-    }
-}
diff --git a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/PdfIngester.java b/app/common/src/main/java/stirling/software/SPDF/pdf/parser/PdfIngester.java
deleted file mode 100644
index a7dc9c282b..0000000000
--- a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/PdfIngester.java
+++ /dev/null
@@ -1,79 +0,0 @@
-package stirling.software.SPDF.pdf.parser;
-
-import static stirling.software.SPDF.pdf.parser.PdfModels.*;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.common.PDRectangle;
-import org.springframework.stereotype.Service;
-
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-
-/**
- * Runs the per-page ingestion pipeline: {@link WordExtractingStripper} → {@link LineBuilder} →
- * {@link TableParser}, producing a {@link PdfModels.ParsedPage} per page. The caller owns the
- * {@link PDDocument} lifecycle.
- */
-@Service
-@RequiredArgsConstructor
-@Slf4j
-public class PdfIngester {
-
-    private final LineBuilder lineBuilder;
-    private final TableParser tableParser;
-
-    public List<ParsedPage> parse(PDDocument document) throws IOException {
-        return parse(document, document.getNumberOfPages());
-    }
-
-    public List<ParsedPage> parse(PDDocument document, int maxPages) throws IOException {
-        int pageCount = Math.min(document.getNumberOfPages(), maxPages);
-        List<ParsedPage> pages = new ArrayList<>(pageCount);
-        long fragmentsMs = 0;
-        long tablesMs = 0;
-        long t0 = System.currentTimeMillis();
-
-        for (int p = 1; p <= pageCount; p++) {
-            long ft = System.currentTimeMillis();
-            List<TextFragment> fragments = extractFragments(document, p);
-            fragmentsMs += System.currentTimeMillis() - ft;
-
-            PDPage page = document.getPage(p - 1);
-            PDRectangle mediaBox = page.getMediaBox();
-            List<RawLine> lines = lineBuilder.build(fragments, p);
-            RawPage rawPage = new RawPage(p, mediaBox.getWidth(), mediaBox.getHeight(), lines);
-
-            long tt = System.currentTimeMillis();
-            List<TableFragment> tables = tableParser.parse(document, rawPage);
-            tablesMs += System.currentTimeMillis() - tt;
-
-            log.debug(
-                    "Page {}: {} fragments → {} lines, {} table(s)",
-                    p,
-                    fragments.size(),
-                    lines.size(),
-                    tables.size());
-            pages.add(new ParsedPage(p, mediaBox.getWidth(), mediaBox.getHeight(), tables, lines));
-        }
-
-        log.info(
-                "[timing] parse pages={} total={}ms fragments={}ms tables={}ms",
-                pageCount,
-                System.currentTimeMillis() - t0,
-                fragmentsMs,
-                tablesMs);
-        return pages;
-    }
-
-    private List<TextFragment> extractFragments(PDDocument document, int pageNumber)
-            throws IOException {
-        WordExtractingStripper stripper = new WordExtractingStripper(pageNumber);
-        stripper.getText(document);
-        return stripper.getFragments();
-    }
-}
diff --git a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/WordExtractingStripper.java b/app/common/src/main/java/stirling/software/SPDF/pdf/parser/WordExtractingStripper.java
deleted file mode 100644
index 52ab9d9a18..0000000000
--- a/app/common/src/main/java/stirling/software/SPDF/pdf/parser/WordExtractingStripper.java
+++ /dev/null
@@ -1,113 +0,0 @@
-package stirling.software.SPDF.pdf.parser;
-
-import static stirling.software.SPDF.pdf.parser.PdfModels.*;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.font.PDFont;
-import org.apache.pdfbox.text.PDFTextStripper;
-import org.apache.pdfbox.text.TextPosition;
-
-/**
- * Extends {@link PDFTextStripper} to capture per-fragment geometry and font metadata.
- *
- * <p>Overrides {@link #writeString} to split each content-stream string into word-level {@link
- * TextFragment}s with bounding boxes, baseline, font name, and bold flag. Coordinates are in
- * PDFTextStripper space: (0,0) top-left, Y increases downward, {@code getY()} is the baseline.
- */
-class WordExtractingStripper extends PDFTextStripper {
-
-    private final int targetPage;
-    private final List<TextFragment> fragments = new ArrayList<>();
-    private int fragmentIndex = 0;
-
-    WordExtractingStripper(int pageNumber) throws IOException {
-        this.targetPage = pageNumber;
-        setStartPage(pageNumber);
-        setEndPage(pageNumber);
-        setSortByPosition(true);
-    }
-
-    @Override
-    protected void startPage(PDPage page) throws IOException {
-        super.startPage(page);
-        fragments.clear();
-        fragmentIndex = 0;
-    }
-
-    @Override
-    protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
-        if (text == null || text.isBlank()) return;
-
-        // Fast path: no whitespace → emit one fragment (most financial PDFs have each
-        // number as its own string operation, so this is the common case).
-        if (text.indexOf(' ') < 0) {
-            emitFragment(text, textPositions);
-            return;
-        }
-
-        // Per-word splitting requires 1:1 text-char to TextPosition correspondence.
-        // Fall back to one fragment when sizes differ (ligatures, encoding edge cases).
-        if (textPositions.size() != text.length()) {
-            emitFragment(text, textPositions);
-            return;
-        }
-
-        // Emit one TextFragment per whitespace-delimited word with accurate per-word bounds.
-        int start = 0;
-        for (int i = 0; i <= text.length(); i++) {
-            if (i == text.length() || text.charAt(i) == ' ') {
-                if (start < i) {
-                    emitFragment(text.substring(start, i), textPositions.subList(start, i));
-                }
-                start = i + 1;
-            }
-        }
-    }
-
-    private void emitFragment(String text, List<TextPosition> positions) {
-        if (positions.isEmpty()) return;
-
-        float minX = Float.MAX_VALUE;
-        float minY = Float.MAX_VALUE;
-        float maxRight = -Float.MAX_VALUE;
-        float maxBaseline = -Float.MAX_VALUE;
-        TextPosition first = null;
-
-        for (TextPosition tp : positions) {
-            if (tp == null) continue;
-            if (first == null) first = tp;
-
-            float x = tp.getX();
-            // getY() is the baseline; top of character = getY() - getHeight().
-            float top = tp.getY() - tp.getHeight();
-            float right = x + tp.getWidth();
-            float baseline = tp.getY();
-
-            minX = Math.min(minX, x);
-            minY = Math.min(minY, top);
-            maxRight = Math.max(maxRight, right);
-            maxBaseline = Math.max(maxBaseline, baseline);
-        }
-
-        if (first == null) return;
-
-        PDFont font = first.getFont();
-        String fontName = font != null ? font.getName() : "";
-        boolean bold = fontName != null && fontName.toLowerCase().contains("bold");
-        // getHeight() gives the rendered glyph height, which is the most reliable visual size.
-        float fontSize = first.getHeight();
-
-        Bounds bounds = new Bounds(minX, minY, maxRight - minX, maxBaseline - minY);
-        String id = "tf-p" + targetPage + "-" + fragmentIndex++;
-        fragments.add(new TextFragment(id, text, bounds, maxBaseline, fontSize, fontName, bold));
-    }
-
-    List<TextFragment> getFragments() {
-        return Collections.unmodifiableList(fragments);
-    }
-}
diff --git a/app/common/src/main/java/stirling/software/common/pdf/HeadingDetector.java b/app/common/src/main/java/stirling/software/common/pdf/HeadingDetector.java
new file mode 100644
index 0000000000..0937cef646
--- /dev/null
+++ b/app/common/src/main/java/stirling/software/common/pdf/HeadingDetector.java
@@ -0,0 +1,191 @@
+package stirling.software.common.pdf;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import stirling.software.jpdfium.text.PageText;
+import stirling.software.jpdfium.text.TextChar;
+import stirling.software.jpdfium.text.TextLine;
+import stirling.software.jpdfium.text.TextWord;
+
+final class HeadingDetector {
+
+    private HeadingDetector() {}
+
+    /** A heading is at most this many words; longer lines are treated as body text. */
+    private static final int MAX_HEADING_WORDS = 12;
+
+    /**
+     * Returns the Markdown heading prefix for a line. The decision combines several signals, never
+     * text matching, so a plain line that merely shares text with a heading is never promoted:
+     *
+     * <ul>
+     *   <li><b>Size</b> — dominant glyph font size vs. the document body median (primary signal).
+     *       Some PDFs encode visual size in the text matrix, so every glyph reports ~1.0; for those
+     *       the line height is used as the proxy instead.
+     *   <li><b>Brevity</b> — headings are short labels; a line over {@value #MAX_HEADING_WORDS}
+     *       words is body text regardless of size.
+     *   <li><b>Not a sentence</b> — a line ending in {@code . ! ?} reads as prose, not a heading.
+     * </ul>
+     *
+     * <p>Boldness is deliberately <em>not</em> a heading signal — a bold-but-not-larger line is
+     * emphasis, not a heading (see {@link #isBoldLabel}); promoting it to {@code #}/{@code ##} is
+     * the main source of false-positive headings.
+     *
+     * <ul>
+     *   <li>size &gt; baseline * 1.4 → {@code "# "}
+     *   <li>size &gt; baseline * 1.2 → {@code "## "}
+     *   <li>otherwise → {@code ""}
+     * </ul>
+     */
+    static String headingPrefix(TextLine line, float medianBodySize, float medianBodyHeight) {
+        String text = line.text().strip();
+        if (text.isEmpty() || wordCount(text) > MAX_HEADING_WORDS || endsLikeSentence(text)) {
+            return "";
+        }
+
+        float dominant = dominantFontSize(line);
+        float value;
+        float baseline;
+        if (dominant > 2f && medianBodySize > 2f) {
+            value = dominant;
+            baseline = medianBodySize;
+        } else {
+            value = line.height();
+            baseline = medianBodyHeight;
+        }
+        if (baseline <= 0f) {
+            return "";
+        }
+
+        float ratio = value / baseline;
+        if (ratio > 1.4f) {
+            return "# ";
+        }
+        if (ratio > 1.2f) {
+            return "## ";
+        }
+        return "";
+    }
+
+    /**
+     * True when a line should be emphasised as bold (rendered {@code **like this**}) rather than
+     * promoted to a heading: it is bold, short, and not a full sentence. Used for bold labels that
+     * are not large enough to be headings.
+     */
+    static boolean isBoldLabel(TextLine line) {
+        String text = line.text().strip();
+        if (text.isEmpty() || wordCount(text) > MAX_HEADING_WORDS || endsLikeSentence(text)) {
+            return false;
+        }
+        return isBold(line);
+    }
+
+    private static int wordCount(String text) {
+        return text.split("\\s+").length;
+    }
+
+    private static boolean endsLikeSentence(String text) {
+        char last = text.charAt(text.length() - 1);
+        return last == '.' || last == '!' || last == '?';
+    }
+
+    /** True when the line's dominant font is bold, inferred from PostScript font names. */
+    private static boolean isBold(TextLine line) {
+        Map<String, Integer> counts = new HashMap<>();
+        for (TextWord word : line.words()) {
+            for (TextChar ch : word.chars()) {
+                if (ch.isWhitespace() || ch.isNewline()) {
+                    continue;
+                }
+                String name = ch.fontName();
+                if (name != null && !name.isBlank()) {
+                    counts.merge(name, 1, Integer::sum);
+                }
+            }
+        }
+        String dominantFont = "";
+        int max = -1;
+        for (Map.Entry<String, Integer> e : counts.entrySet()) {
+            if (e.getValue() > max) {
+                max = e.getValue();
+                dominantFont = e.getKey();
+            }
+        }
+        String lower = dominantFont.toLowerCase(java.util.Locale.ROOT);
+        return lower.contains("bold")
+                || lower.contains("black")
+                || lower.contains("heavy")
+                || lower.contains("semibold");
+    }
+
+    /** Computes the median glyph font size across all pages. */
+    static float medianFontSize(List<PageText> allPages) {
+        List<Float> sizes = new ArrayList<>();
+        for (PageText page : allPages) {
+            for (TextChar ch : page.chars()) {
+                if (!ch.isWhitespace() && !ch.isNewline() && ch.fontSize() > 0f) {
+                    sizes.add(ch.fontSize());
+                }
+            }
+        }
+        return median(sizes, 12f);
+    }
+
+    /** Computes the median TextLine height across all pages. Used when font size is degenerate. */
+    static float medianLineHeight(List<PageText> allPages) {
+        List<Float> heights = new ArrayList<>();
+        for (PageText page : allPages) {
+            for (TextLine line : page.lines()) {
+                if (line.height() > 0f && !line.text().isBlank()) {
+                    heights.add(line.height());
+                }
+            }
+        }
+        return median(heights, 12f);
+    }
+
+    private static float median(List<Float> values, float fallback) {
+        if (values.isEmpty()) {
+            return fallback;
+        }
+        Collections.sort(values);
+        int mid = values.size() / 2;
+        if (values.size() % 2 == 0) {
+            return (values.get(mid - 1) + values.get(mid)) / 2f;
+        }
+        return values.get(mid);
+    }
+
+    /**
+     * Returns the font size that appears most often (by character count) in the given line. Ties
+     * are broken in favour of the larger size.
+     */
+    private static float dominantFontSize(TextLine line) {
+        Map<Float, Integer> counts = new HashMap<>();
+        for (TextWord word : line.words()) {
+            for (TextChar ch : word.chars()) {
+                if (!ch.isWhitespace() && !ch.isNewline() && ch.fontSize() > 0f) {
+                    counts.merge(ch.fontSize(), 1, Integer::sum);
+                }
+            }
+        }
+        if (counts.isEmpty()) {
+            return 0f;
+        }
+        float dominant = 0f;
+        int maxCount = -1;
+        for (Map.Entry<Float, Integer> entry : counts.entrySet()) {
+            int count = entry.getValue();
+            float size = entry.getKey();
+            if (count > maxCount || (count == maxCount && size > dominant)) {
+                maxCount = count;
+                dominant = size;
+            }
+        }
+        return dominant;
+    }
+}
diff --git a/app/common/src/main/java/stirling/software/common/pdf/PdfMarkdownConverter.java b/app/common/src/main/java/stirling/software/common/pdf/PdfMarkdownConverter.java
new file mode 100644
index 0000000000..c19468b5ed
--- /dev/null
+++ b/app/common/src/main/java/stirling/software/common/pdf/PdfMarkdownConverter.java
@@ -0,0 +1,1043 @@
+package stirling.software.common.pdf;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import stirling.software.jpdfium.PdfDocument;
+import stirling.software.jpdfium.PdfPage;
+import stirling.software.jpdfium.doc.ExtractedImage;
+import stirling.software.jpdfium.doc.PdfImageExtractor;
+import stirling.software.jpdfium.model.Rect;
+import stirling.software.jpdfium.text.PageText;
+import stirling.software.jpdfium.text.PdfTableExtractor;
+import stirling.software.jpdfium.text.PdfTextExtractor;
+import stirling.software.jpdfium.text.Table;
+import stirling.software.jpdfium.text.TextLine;
+import stirling.software.jpdfium.text.TextWord;
+
+/**
+ * Converts a PDF to Markdown using a TextLine-driven body pipeline.
+ *
+ * <p>Body text is rebuilt from {@link PdfTextExtractor} {@link TextLine}s. TextLines group words
+ * faithfully and keep paragraph order, so the only pre-processing needed is stitching narrow
+ * standalone glyph fragments (apostrophes, quotes, asterisks, superscript footnote markers,
+ * bullets) back into the line they belong to. Column layout and tables are derived from line/word
+ * geometry directly.
+ */
+public class PdfMarkdownConverter {
+
+    private static final Pattern SOFT_HYPHEN = Pattern.compile("(\\w+)-\\n([a-z])");
+
+    /** Width below which a TextLine is treated as a stray glyph fragment to be stitched. */
+    private static final float GLYPH_WIDTH = 7.5f;
+
+    public String convert(PdfDocument doc) throws IOException {
+        List<PageText> allPageText = PdfTextExtractor.extractAll(doc);
+        float medianSize = HeadingDetector.medianFontSize(allPageText);
+        float medianHeight = HeadingDetector.medianLineHeight(allPageText);
+
+        int pageCount = doc.pageCount();
+        // Elements are either rendered text (String) or a structured TableBlock. Tables stay
+        // structured until after the page loop so a table split across a page break can be stitched
+        // back together before rendering.
+        List<Object> output = new ArrayList<>();
+        // Header text of a table that ended the previous page, used to spot a continuation whose
+        // header repeats at the top of the current page. Null when the previous page did not end in
+        // a table.
+        String prevPageTrailingTableHeader = null;
+
+        for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) {
+            List<TextLine> rawLines =
+                    pageIndex < allPageText.size() ? allPageText.get(pageIndex).lines() : List.of();
+
+            // Stitch stray glyph fragments (apostrophes, asterisks, superscripts, bullets) into
+            // their host lines so paragraph assembly sees faithful, complete lines.
+            List<Line> lines = stitchGlyphs(rawLines);
+            if (lines.isEmpty()) {
+                emitImages(doc, pageIndex, output);
+                prevPageTrailingTableHeader = null;
+                continue;
+            }
+
+            // Sort top-to-bottom (PDF y=0 is the bottom of the page).
+            lines.sort(Comparator.comparingDouble((Line l) -> l.y).reversed());
+
+            // Multi-column guard: only genuine two-column prose should be split. A table's column
+            // gutters must NOT be mistaken for a page-layout gutter, so this looks at whether row
+            // lines span the gutter (table) or stay within one side (two-column prose).
+            // A table that ran to the bottom of the previous page and repeats its header at the top
+            // of this page is a continuation, not a new two-column layout. Detecting the repeated
+            // header keeps this page out of the two-column path so the continuation is rebuilt as a
+            // table and stitched back onto the previous block.
+            final String continuationHeader = prevPageTrailingTableHeader;
+            boolean tableContinuation =
+                    continuationHeader != null
+                            && lines.stream()
+                                    .anyMatch(
+                                            l -> normaliseSpace(l.text).equals(continuationHeader));
+
+            boolean twoColumn = !tableContinuation && detectsTwoColumns(lines);
+
+            // Tables are detected from text/word geometry (the word-grid detector), which handles
+            // both ruled and borderless tables and places cells by column alignment. The native
+            // ruled-line extractor is not used: it both mis-renders cells and double-emits rows.
+            Set<String> tableRowTexts = new HashSet<>();
+            List<TableBlock> blocks = twoColumn ? List.of() : findTableBlocks(lines);
+            Set<Line> tableLines = new HashSet<>();
+            for (TableBlock b : blocks) {
+                for (List<Line> row : b.rows()) {
+                    for (Line l : row) {
+                        tableLines.add(l);
+                        tableRowTexts.add(repairHyphens(l.text).strip());
+                    }
+                }
+            }
+
+            List<Object> pageItems = new ArrayList<>();
+            if (twoColumn) {
+                for (List<Line> col : splitIntoColumns(lines)) {
+                    List<String> paras = new ArrayList<>();
+                    assembleParagraphs(col, medianSize, medianHeight, paras, tableRowTexts);
+                    pageItems.addAll(paras);
+                }
+            } else {
+                // Interleave tables with surrounding text by vertical position. Each block sits in
+                // its own slot; non-table lines fall into the slot for their y (text above a block,
+                // between blocks, or below the last). This keeps multiple tables on one page
+                // separate and in reading order.
+                List<List<Line>> segments = new ArrayList<>();
+                for (int s = 0; s <= blocks.size(); s++) {
+                    segments.add(new ArrayList<>());
+                }
+                for (Line l : lines) {
+                    if (tableLines.contains(l)) {
+                        continue;
+                    }
+                    int slot = 0;
+                    for (TableBlock b : blocks) {
+                        if (b.bottom() > l.y) {
+                            slot++;
+                        }
+                    }
+                    segments.get(slot).add(l);
+                }
+                for (int s = 0; s <= blocks.size(); s++) {
+                    List<String> paras = new ArrayList<>();
+                    assembleParagraphs(
+                            segments.get(s), medianSize, medianHeight, paras, tableRowTexts);
+                    pageItems.addAll(paras);
+                    if (s < blocks.size()) {
+                        pageItems.add(blocks.get(s));
+                    }
+                }
+            }
+
+            emitImages(doc, pageIndex, pageItems);
+
+            if (pageItems.isEmpty()) {
+                continue;
+            }
+
+            mergeAcrossPageBoundary(output, pageItems);
+            output.addAll(pageItems);
+            prevPageTrailingTableHeader = trailingTableHeader(pageItems);
+        }
+
+        // Stitch tables split across page breaks, then render every element to Markdown.
+        List<Object> stitched = stitchTables(output);
+        List<String> rendered = new ArrayList<>();
+        for (Object e : stitched) {
+            rendered.add(e instanceof TableBlock tb ? tb.render() : (String) e);
+        }
+        return String.join("\n\n", rendered);
+    }
+
+    // --- Glyph stitching ---------------------------------------------------
+
+    /** A mutable assembled line: text plus geometry used for ordering and heading detection. */
+    private static final class Line {
+        String text;
+        float x;
+        float y;
+        float width;
+        float height;
+        final TextLine source;
+
+        Line(TextLine src) {
+            this.source = src;
+            this.text = src.text();
+            this.x = src.x();
+            this.y = src.y();
+            this.width = src.width();
+            this.height = src.height();
+        }
+    }
+
+    /**
+     * Merges narrow glyph fragments (width &lt; {@link #GLYPH_WIDTH}) into the line they belong to.
+     *
+     * <ul>
+     *   <li>A glyph between a left fragment that ends near it and a right fragment that starts near
+     *       it (both on the same baseline) is inserted inline: {@code aren} + {@code '} + {@code t}
+     *       → {@code aren't}.
+     *   <li>A glyph immediately right of a line's end is appended (e.g. superscript footnote marker
+     *       after a number).
+     *   <li>A glyph immediately left of a line's start is prepended (e.g. footnote marker before
+     *       its text).
+     * </ul>
+     */
+    private static List<Line> stitchGlyphs(List<TextLine> raw) {
+        List<TextLine> hosts = new ArrayList<>();
+        List<TextLine> glyphs = new ArrayList<>();
+        for (TextLine l : raw) {
+            String t = l.text().strip();
+            if (t.isEmpty()) {
+                continue;
+            }
+            if (l.width() < GLYPH_WIDTH && t.length() <= 2) {
+                glyphs.add(l);
+            } else {
+                hosts.add(l);
+            }
+        }
+
+        List<Line> lines = hosts.stream().map(Line::new).collect(Collectors.toList());
+
+        for (TextLine g : glyphs) {
+            String gt = g.text().strip();
+            if (isBulletGlyph(gt)) {
+                attachBullet(g, gt, lines);
+            } else {
+                attachInlineGlyph(g, gt, lines);
+            }
+        }
+        return lines;
+    }
+
+    private static boolean isBulletGlyph(String gt) {
+        return "•".equals(gt) || "▪".equals(gt) || "◦".equals(gt);
+    }
+
+    /**
+     * Attaches a bullet glyph to the body line it introduces: the closest line that begins to the
+     * right of the bullet at roughly the same height or just below it.
+     */
+    private static void attachBullet(TextLine g, String gt, List<Line> lines) {
+        Line best = null;
+        float bestScore = Float.MAX_VALUE;
+        for (Line h : lines) {
+            if (h.x < g.x() - 2f) {
+                continue;
+            }
+            float dy = g.y() - h.y;
+            if (dy < -4f || dy > 28f) {
+                continue;
+            }
+            float score = Math.abs(dy) + (h.x - g.x()) * 0.2f;
+            if (score < bestScore) {
+                bestScore = score;
+                best = h;
+            }
+        }
+        if (best != null && !best.text.startsWith("•")) {
+            best.text = "• " + best.text;
+            best.x = g.x();
+        } else {
+            lines.add(new Line(g));
+        }
+    }
+
+    /**
+     * Stitches a narrow inline glyph (apostrophe, quote, asterisk, superscript marker) into the
+     * line it belongs to: inline between two same-baseline fragments, appended to the line that
+     * ends at it, or prepended to the line that starts at it.
+     */
+    private static void attachInlineGlyph(TextLine g, String gt, List<Line> lines) {
+        Line left = null;
+        Line right = null;
+        float lb = 7f;
+        float rb = 7f;
+        for (Line h : lines) {
+            boolean sameBaseline = g.y() >= h.y - 4f && g.y() <= h.y + h.height + 5f;
+            if (!sameBaseline) {
+                continue;
+            }
+            float rightEdge = h.x + h.width;
+            float dxLeft = Math.abs(rightEdge - g.x());
+            if (dxLeft < lb) {
+                lb = dxLeft;
+                left = h;
+            }
+            float dxRight = Math.abs(h.x - g.x());
+            if (dxRight < rb) {
+                rb = dxRight;
+                right = h;
+            }
+        }
+
+        if (left != null && right != null && left != right && Math.abs(left.y - right.y) < 6f) {
+            left.text = left.text + gt + right.text;
+            left.width = (right.x + right.width) - left.x;
+            lines.remove(right);
+        } else if (left != null) {
+            left.text = left.text + gt;
+            left.width = Math.max(left.width, g.x() + g.width() - left.x);
+        } else if (right != null) {
+            right.text = gt + right.text;
+            right.x = g.x();
+        } else {
+            lines.add(new Line(g));
+        }
+    }
+
+    // --- Column detection (guard only) -------------------------------------
+
+    /**
+     * Returns true when the page is a genuine two-column layout. Uses line/word geometry: body
+     * blocks (ignoring narrow glyph blocks) and requires a wide horizontal gutter populated on both
+     * sides, so single apostrophe glyphs cannot create a false second column.
+     */
+    private static boolean detectsTwoColumns(List<Line> lines) {
+        if (lines.size() < 8) {
+            return false;
+        }
+        float minX = Float.MAX_VALUE;
+        float maxX = -Float.MAX_VALUE;
+        for (Line l : lines) {
+            minX = Math.min(minX, l.x);
+            maxX = Math.max(maxX, l.x + l.width);
+        }
+        if (maxX - minX < 200f) {
+            return false;
+        }
+
+        // Scan candidate gutter positions across the central band (35%-65% of width) and pick the
+        // one crossed by the fewest lines. Two-column prose has a gutter that only a handful of
+        // full-width lines (title, section headings) cross; a table's rows all span the full width,
+        // so every candidate gutter is crossed by most lines.
+        float centreLo = minX + (maxX - minX) * 0.35f;
+        float centreHi = minX + (maxX - minX) * 0.65f;
+        int bestCrossing = Integer.MAX_VALUE;
+        int bestLeft = 0;
+        int bestRight = 0;
+        for (float gutter = centreLo; gutter <= centreHi; gutter += 2f) {
+            int crossing = 0;
+            int leftOnly = 0;
+            int rightOnly = 0;
+            for (Line l : lines) {
+                float lx = l.x;
+                float rx = l.x + l.width;
+                if (lx < gutter - 5f && rx > gutter + 5f) {
+                    crossing++;
+                } else if (rx <= gutter) {
+                    leftOnly++;
+                } else {
+                    rightOnly++;
+                }
+            }
+            if (crossing < bestCrossing) {
+                bestCrossing = crossing;
+                bestLeft = leftOnly;
+                bestRight = rightOnly;
+            }
+        }
+
+        return bestLeft >= 4 && bestRight >= 4 && bestCrossing <= (int) (lines.size() * 0.25f);
+    }
+
+    private static List<List<Line>> splitIntoColumns(List<Line> lines) {
+        List<Float> xs =
+                lines.stream()
+                        .filter(l -> l.width >= 40f)
+                        .map(l -> l.x)
+                        .sorted()
+                        .collect(Collectors.toList());
+        if (xs.isEmpty()) {
+            return List.of(lines);
+        }
+        float minX = xs.get(0);
+        float maxX = xs.get(xs.size() - 1);
+        float splitAt = (minX + maxX) / 2f;
+        float biggestGap = 0;
+        for (int i = 1; i < xs.size(); i++) {
+            float gap = xs.get(i) - xs.get(i - 1);
+            if (gap > biggestGap) {
+                biggestGap = gap;
+                splitAt = (xs.get(i - 1) + xs.get(i)) / 2f;
+            }
+        }
+        List<Line> left = new ArrayList<>();
+        List<Line> right = new ArrayList<>();
+        for (Line l : lines) {
+            if (l.x < splitAt) {
+                left.add(l);
+            } else {
+                right.add(l);
+            }
+        }
+        if (left.isEmpty()) {
+            return List.of(right);
+        }
+        if (right.isEmpty()) {
+            return List.of(left);
+        }
+        return List.of(left, right);
+    }
+
+    // --- Paragraph assembly ------------------------------------------------
+
+    private static void assembleParagraphs(
+            List<Line> lines,
+            float medianSize,
+            float medianHeight,
+            List<String> out,
+            Set<String> tableRowTexts) {
+        StringBuilder para = new StringBuilder();
+        float prevBottomY = Float.MAX_VALUE;
+        float prevHeight = 0f;
+
+        for (Line line : lines) {
+            String text = repairHyphens(line.text).strip();
+            if (text.isEmpty()) {
+                continue;
+            }
+            if (tableRowTexts.contains(text)) {
+                continue;
+            }
+
+            float blockTop = line.y + line.height;
+            float gap = prevBottomY - blockTop;
+            boolean paragraphBreak = prevHeight > 0f && gap > prevHeight * 0.8f;
+
+            String prefix = HeadingDetector.headingPrefix(line.source, medianSize, medianHeight);
+            boolean isHeading = !prefix.isEmpty();
+            boolean isBullet = startsWithBullet(text);
+
+            if (isHeading) {
+                flushParagraph(para, out);
+                out.add(prefix + escapeMarkdown(text));
+            } else if (isBullet) {
+                flushParagraph(para, out);
+                out.add(escapeMarkdown(text));
+            } else if (HeadingDetector.isBoldLabel(line.source)) {
+                // Bold but not large enough to be a heading → emphasise as bold, don't promote.
+                flushParagraph(para, out);
+                out.add("**" + escapeMarkdown(text) + "**");
+            } else if (paragraphBreak) {
+                flushParagraph(para, out);
+                para.append(text);
+            } else {
+                if (!para.isEmpty()) {
+                    char fc = text.charAt(0);
+                    boolean noSpace = fc == '\'' || fc == '’' || fc == '‘' || fc == '"';
+                    if (!noSpace) {
+                        para.append(' ');
+                    }
+                }
+                para.append(text);
+            }
+
+            prevBottomY = line.y;
+            prevHeight = line.height;
+        }
+        flushParagraph(para, out);
+    }
+
+    private static boolean startsWithBullet(String text) {
+        return text.startsWith("•") || text.startsWith("▪") || text.startsWith("◦");
+    }
+
+    // --- Word-grid table detection -----------------------------------------
+
+    /**
+     * A detected table. Each row is a list of source lines: usually one, but more when a cell wraps
+     * onto extra lines (those continuation lines are absorbed into the row they belong to).
+     */
+    private record TableBlock(List<List<Line>> rows, float top, float bottom) {
+        String render() {
+            return buildTableFromRows(rows);
+        }
+    }
+
+    /**
+     * Detects table blocks on a page. Anchor rows (lines with table-like column gaps) are grouped
+     * into vertically-contiguous runs separated by large vertical gaps, so multiple separate tables
+     * on one page stay separate. Non-anchor lines that fall within a run's vertical span are
+     * treated as wrapped-cell continuations and absorbed into the nearest anchor row above them.
+     */
+    private static List<TableBlock> findTableBlocks(List<Line> lines) {
+        List<Line> cands =
+                lines.stream()
+                        .filter(l -> isTableCandidate(l.source))
+                        .sorted(Comparator.comparingDouble((Line l) -> l.y).reversed())
+                        .collect(Collectors.toList());
+        if (cands.size() < 2) {
+            return List.of();
+        }
+
+        List<Float> gaps = new ArrayList<>();
+        for (int i = 1; i < cands.size(); i++) {
+            gaps.add(cands.get(i - 1).y - cands.get(i).y);
+        }
+        List<Float> sorted = new ArrayList<>(gaps);
+        sorted.sort(Comparator.naturalOrder());
+        float medianGap = sorted.get(sorted.size() / 2);
+        float splitThreshold = Math.max(medianGap * 2.5f, medianGap + 6f);
+
+        List<List<Line>> anchorGroups = new ArrayList<>();
+        List<Line> current = new ArrayList<>();
+        current.add(cands.get(0));
+        for (int i = 1; i < cands.size(); i++) {
+            float gap = cands.get(i - 1).y - cands.get(i).y;
+            if (gap > splitThreshold) {
+                anchorGroups.add(current);
+                current = new ArrayList<>();
+            }
+            current.add(cands.get(i));
+        }
+        anchorGroups.add(current);
+
+        List<Line> nonCandidates =
+                lines.stream()
+                        .filter(l -> !isTableCandidate(l.source))
+                        .collect(Collectors.toList());
+
+        List<TableBlock> blocks = new ArrayList<>();
+        for (List<Line> anchors : anchorGroups) {
+            if (anchors.size() < 2) {
+                continue;
+            }
+            float top = anchors.get(0).y;
+            float bottom = anchors.get(anchors.size() - 1).y;
+
+            // Each anchor seeds a row; absorb wrapped continuation lines (non-anchors within the
+            // run's vertical span, with a little slack below the last row) into the anchor above.
+            List<List<Line>> rows = new ArrayList<>();
+            for (Line a : anchors) {
+                List<Line> row = new ArrayList<>();
+                row.add(a);
+                rows.add(row);
+            }
+            for (Line nc : nonCandidates) {
+                if (nc.y > top || nc.y < bottom - medianGap) {
+                    continue;
+                }
+                int owner = 0;
+                float bestDelta = Float.MAX_VALUE;
+                for (int i = 0; i < anchors.size(); i++) {
+                    float delta = anchors.get(i).y - nc.y; // positive when anchor is above nc
+                    if (delta >= -1f && delta < bestDelta) {
+                        bestDelta = delta;
+                        owner = i;
+                    }
+                }
+                rows.get(owner).add(nc);
+            }
+
+            if (buildTableFromRows(rows).isBlank()) {
+                continue;
+            }
+            blocks.add(new TableBlock(rows, top, bottom));
+        }
+        return blocks;
+    }
+
+    private static String buildTableFromRows(List<List<Line>> rowGroups) {
+        // Detect columns by vertical-whitespace projection across all lines, rather than a 1-D gap
+        // threshold on pooled word x's. Pooled-gap detection is fragile when numbers are
+        // right-aligned (a 10-digit value starts well left of a 7-digit one) or when sparse cells
+        // sit in their own x-band. Projection asks "which x-bands are occupied across many rows",
+        // which is stable under those conditions.
+        List<Line> flat = rowGroups.stream().flatMap(List::stream).collect(Collectors.toList());
+        List<float[]> columns = findColumnRanges(flat);
+        if (columns.size() < 2 || columns.size() > 15) {
+            return "";
+        }
+
+        float[] centers = new float[columns.size()];
+        for (int i = 0; i < columns.size(); i++) {
+            centers[i] = (columns.get(i)[0] + columns.get(i)[1]) / 2f;
+        }
+
+        int cols = centers.length;
+        List<String[]> rows = new ArrayList<>();
+        for (List<Line> rowLines : rowGroups) {
+            String[] row = new String[cols];
+            for (int i = 0; i < cols; i++) {
+                row[i] = "";
+            }
+            // Top line first so a wrapped cell's words stay in reading order within the cell.
+            rowLines.sort(Comparator.comparingDouble((Line l) -> l.y).reversed());
+            for (Line line : rowLines) {
+                for (TextWord word : line.source.words()) {
+                    String wt = word.text().strip();
+                    if (wt.isEmpty()) {
+                        continue;
+                    }
+                    int col = nearestColumn(word.x() + word.width() / 2f, centers);
+                    row[col] = row[col].isEmpty() ? wt : row[col] + " " + wt;
+                }
+            }
+            rows.add(row);
+        }
+
+        // Guard against false positives while tolerating uneven rows (sparse cells, merged/spanning
+        // headers). The columns already come from cross-row whitespace alignment, so a stable grid
+        // exists. Additionally require: at least one "anchor" row that nearly fills the grid (so
+        // the
+        // column count is real, not an artefact), and that most rows are genuinely multi-column.
+        int anchorWidth = Math.max(2, Math.round(cols * 0.6f));
+        long anchorRows = rows.stream().filter(r -> filledCells(r) >= anchorWidth).count();
+        long multiColumnRows = rows.stream().filter(r -> filledCells(r) >= 2).count();
+        if (anchorRows < 1 || multiColumnRows < 2 || multiColumnRows < rows.size() * 0.5) {
+            return "";
+        }
+        return renderGfm(rows, cols);
+    }
+
+    /**
+     * Visible for testing: column detection depends only on word geometry, so tests can drive it
+     * from synthetic {@link TextLine}s to exercise degenerate-coordinate handling (the crash path
+     * an extreme text matrix can produce) without needing a binary PDF fixture.
+     */
+    static List<float[]> findColumnRangesFromLines(List<TextLine> rows) {
+        return findColumnRanges(rows.stream().map(Line::new).collect(Collectors.toList()));
+    }
+
+    /**
+     * Finds column x-ranges by vertical-whitespace projection. Each row contributes coverage for
+     * the x-bands its words occupy; a column is a contiguous band covered by a sufficient fraction
+     * of rows, and the gaps between such bands are the gutters.
+     */
+    private static List<float[]> findColumnRanges(List<Line> rows) {
+        float minX = Float.MAX_VALUE;
+        float maxX = -Float.MAX_VALUE;
+        for (Line l : rows) {
+            for (TextWord w : l.source.words()) {
+                minX = Math.min(minX, w.x());
+                maxX = Math.max(maxX, w.x() + w.width());
+            }
+        }
+        // Real pages are under ~2000pt wide; anything larger is a malformed/crafted coordinate
+        // that would allocate a multi-GB array or produce a negative span on overflow.
+        if (maxX <= minX || (maxX - minX) > 2000f) {
+            return List.of();
+        }
+
+        int lo = (int) Math.floor(minX);
+        int span = Math.min((int) Math.ceil(maxX) - lo + 1, 2001);
+        int[] coverage = new int[span];
+        for (Line l : rows) {
+            boolean[] covered = new boolean[span];
+            for (TextWord w : l.source.words()) {
+                int a = Math.max(0, (int) Math.floor(w.x()) - lo);
+                int b = Math.min(span, (int) Math.ceil(w.x() + w.width()) - lo);
+                for (int x = a; x < b; x++) {
+                    covered[x] = true;
+                }
+            }
+            for (int x = 0; x < span; x++) {
+                if (covered[x]) {
+                    coverage[x]++;
+                }
+            }
+        }
+
+        // A column band must be occupied by at least this many rows; below it is gutter.
+        int support = Math.max(2, Math.round(rows.size() * 0.35f));
+        List<float[]> columns = new ArrayList<>();
+        int start = -1;
+        for (int x = 0; x < span; x++) {
+            boolean isColumn = coverage[x] >= support;
+            if (isColumn && start < 0) {
+                start = x;
+            } else if (!isColumn && start >= 0) {
+                columns.add(new float[] {lo + start, lo + x});
+                start = -1;
+            }
+        }
+        if (start >= 0) {
+            columns.add(new float[] {(float) (lo + start), (float) (lo + span)});
+        }
+
+        // Merge bands separated by only a narrow gutter. A real column separator is several
+        // characters wide; the gaps *inside* a multi-word cell (ordinary word spacing) are about
+        // one character. Without this, a cell like "January 20th, 2026" — whose words align
+        // vertically across every row — would be split into three spurious columns.
+        float charWidth = averageCharWidth(rows);
+        float minGutter = Math.max(10f, charWidth * 2.5f);
+        List<float[]> merged = new ArrayList<>();
+        for (float[] band : columns) {
+            if (!merged.isEmpty() && band[0] - merged.get(merged.size() - 1)[1] < minGutter) {
+                merged.get(merged.size() - 1)[1] = band[1];
+            } else {
+                merged.add(new float[] {band[0], band[1]});
+            }
+        }
+        return merged;
+    }
+
+    private static float averageCharWidth(List<Line> rows) {
+        double totalWidth = 0;
+        int totalChars = 0;
+        for (Line l : rows) {
+            for (TextWord w : l.source.words()) {
+                totalWidth += w.width();
+                totalChars += Math.max(1, w.text().strip().length());
+            }
+        }
+        return totalChars == 0 ? 6f : (float) (totalWidth / totalChars);
+    }
+
+    private static int nearestColumn(float x, float[] centers) {
+        int best = 0;
+        float bestDist = Float.MAX_VALUE;
+        for (int i = 0; i < centers.length; i++) {
+            float d = Math.abs(x - centers[i]);
+            if (d < bestDist) {
+                bestDist = d;
+                best = i;
+            }
+        }
+        return best;
+    }
+
+    private static int filledCells(String[] row) {
+        int count = 0;
+        for (String cell : row) {
+            if (!cell.isEmpty()) {
+                count++;
+            }
+        }
+        return count;
+    }
+
+    private static String renderGfm(List<String[]> rows, int cols) {
+        if (rows.isEmpty()) {
+            return "";
+        }
+        int[] widths = new int[cols];
+        for (int c = 0; c < cols; c++) {
+            widths[c] = 3;
+        }
+        for (String[] row : rows) {
+            for (int c = 0; c < cols; c++) {
+                if (c < row.length) {
+                    widths[c] = Math.max(widths[c], escapeCell(row[c]).length());
+                }
+            }
+        }
+        StringBuilder sb = new StringBuilder();
+        sb.append(buildGfmRow(rows.get(0), widths, cols)).append('\n');
+        sb.append('|');
+        for (int c = 0; c < cols; c++) {
+            sb.append('-').append("-".repeat(widths[c])).append('-').append('|');
+        }
+        for (int r = 1; r < rows.size(); r++) {
+            sb.append('\n').append(buildGfmRow(rows.get(r), widths, cols));
+        }
+        return sb.toString();
+    }
+
+    /**
+     * A line looks like a table row if it has at least two words separated by a gap far wider than
+     * normal inter-word spacing. The threshold is derived from the line's own character width
+     * rather than a document font size, because some PDFs report a unit (matrix-scaled) font size
+     * that makes absolute thresholds meaningless. (Two-word rows are allowed so two-column tables
+     * are detected; spurious matches are filtered later by block contiguity and column
+     * consistency.)
+     */
+    private static boolean isTableCandidate(TextLine line) {
+        List<TextWord> words = line.words();
+        if (words.size() < 2) {
+            return false;
+        }
+        double totalWidth = 0;
+        int totalChars = 0;
+        for (TextWord w : words) {
+            totalWidth += w.width();
+            totalChars += Math.max(1, w.text().strip().length());
+        }
+        float charWidth = (float) (totalWidth / Math.max(1, totalChars));
+        // A deliberate cell gap is several blank characters wide; ordinary word spaces are ~a third
+        // of a character. Floor at 8pt so tiny fonts still need a real gap.
+        float cellGap = Math.max(8f, charWidth * 3f);
+        for (int i = 1; i < words.size(); i++) {
+            TextWord prev = words.get(i - 1);
+            float gap = words.get(i).x() - (prev.x() + prev.width());
+            if (gap >= cellGap) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static String buildGfmRow(String[] row, int[] widths, int cols) {
+        StringBuilder sb = new StringBuilder().append('|');
+        for (int c = 0; c < cols; c++) {
+            String cell = c < row.length ? escapeCell(row[c]) : "";
+            sb.append(' ').append(padRight(cell, widths[c])).append(' ').append('|');
+        }
+        return sb.toString();
+    }
+
+    private static String escapeCell(String cell) {
+        // Cell content is inline context: escape inline markdown (including the column delimiter)
+        // but not leading block markers, which have no meaning inside a table cell.
+        return escapeMarkdownInline(cell);
+    }
+
+    /**
+     * Escapes Markdown control characters in body text extracted from the PDF so that literal
+     * characters (e.g. a line that reads {@code # Heading} or {@code [label](url)}, or an embedded
+     * {@code <tag>}) are emitted as text rather than being reinterpreted as structure or raw HTML.
+     * Applied to all body text — headings, paragraphs, bold labels, bullets — before emission.
+     *
+     * <p>The generated Markdown should still be treated as untrusted content by any downstream
+     * renderer: this hardens fidelity and is defence-in-depth, not a substitute for safe rendering.
+     */
+    private static String escapeMarkdown(String text) {
+        if (text.isEmpty()) {
+            return text;
+        }
+        String inline = escapeMarkdownInline(text);
+        return escapeLeadingBlockMarker(inline, text);
+    }
+
+    /** Escapes inline-significant Markdown characters anywhere in the string. */
+    private static String escapeMarkdownInline(String text) {
+        StringBuilder sb = new StringBuilder(text.length() + 8);
+        for (int i = 0; i < text.length(); i++) {
+            char c = text.charAt(i);
+            switch (c) {
+                case '\\', '`', '*', '_', '[', ']', '<', '>', '|', '~' -> sb.append('\\').append(c);
+                default -> sb.append(c);
+            }
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Escapes block-level markers that are only significant at the start of a line: ATX headings
+     * ({@code #}), unordered list / thematic break markers ({@code -}, {@code +}), and ordered list
+     * markers ({@code 1.} / {@code 1)}). {@code original} carries the unescaped leading characters,
+     * none of which are altered by inline escaping, so positions line up with {@code escaped}.
+     */
+    private static String escapeLeadingBlockMarker(String escaped, String original) {
+        char c0 = original.charAt(0);
+        if (c0 == '#' || c0 == '-' || c0 == '+') {
+            return "\\" + escaped;
+        }
+        int i = 0;
+        while (i < original.length() && Character.isDigit(original.charAt(i))) {
+            i++;
+        }
+        if (i > 0 && i < original.length()) {
+            char delim = original.charAt(i);
+            if (delim == '.' || delim == ')') {
+                return escaped.substring(0, i) + "\\" + escaped.substring(i);
+            }
+        }
+        return escaped;
+    }
+
+    private static String padRight(String s, int width) {
+        return s.length() >= width ? s : s + " ".repeat(width - s.length());
+    }
+
+    // --- Page-level emission helpers ---------------------------------------
+
+    private static void emitImages(PdfDocument doc, int pageIndex, List<Object> pageItems)
+            throws IOException {
+        try (PdfPage page = doc.page(pageIndex)) {
+            List<ExtractedImage> images =
+                    PdfImageExtractor.extract(page.rawDocHandle(), page.rawHandle(), pageIndex);
+            for (ExtractedImage img : images) {
+                pageItems.add(describeImage(img));
+            }
+        }
+    }
+
+    /**
+     * Builds an image placeholder annotated with whatever metadata JPDFium exposes: pixel
+     * dimensions, on-page placement (points), effective DPI, encoded format, colour space and bit
+     * depth. Missing fields are simply omitted so the line stays valid for any image.
+     */
+    private static String describeImage(ExtractedImage img) {
+        List<String> parts = new ArrayList<>();
+        if (img.width() > 0 && img.height() > 0) {
+            parts.add(img.width() + "x" + img.height() + "px");
+        }
+        Rect b = img.bounds();
+        if (b != null && b.width() > 0 && b.height() > 0) {
+            parts.add(String.format("%.0fx%.0fpt", b.width(), b.height()));
+            if (img.width() > 0) {
+                float dpiX = img.width() / (b.width() / 72f);
+                float dpiY = img.height() / (b.height() / 72f);
+                if (Float.isFinite(dpiX) && dpiX > 0) {
+                    parts.add(String.format("~%.0fdpi", (dpiX + dpiY) / 2f));
+                }
+            }
+        }
+        String ext = img.suggestedExtension();
+        if (ext != null && !ext.isBlank()) {
+            parts.add(ext.replaceFirst("^\\.", "").toUpperCase(java.util.Locale.ROOT));
+        }
+        if (img.colorSpace() != null) {
+            parts.add(img.colorSpace().toString());
+        }
+        if (img.bitsPerPixel() > 0) {
+            parts.add(img.bitsPerPixel() + "bpp");
+        }
+
+        StringBuilder sb = new StringBuilder("<image redacted");
+        if (!parts.isEmpty()) {
+            sb.append(": ").append(String.join(", ", parts));
+        }
+        sb.append('>');
+        return sb.toString();
+    }
+
+    private static void mergeAcrossPageBoundary(List<Object> output, List<Object> pageItems) {
+        if (output.isEmpty() || pageItems.isEmpty()) {
+            return;
+        }
+        // Only merge a sentence continuation between two text paragraphs, never into/out of a
+        // table.
+        if (!(output.get(output.size() - 1) instanceof String last)
+                || !(pageItems.get(0) instanceof String first)) {
+            return;
+        }
+        if (!first.isEmpty()
+                && Character.isLowerCase(first.charAt(0))
+                && !endsWithSentencePunctuation(last)) {
+            output.set(output.size() - 1, last + " " + first);
+            pageItems.remove(0);
+        }
+    }
+
+    /**
+     * Joins tables split across a page break. Two consecutive {@link TableBlock}s (no text between
+     * them — i.e. one ended a page and the next began the following page) are merged when their
+     * column layouts match; a repeated header row on the continuation is dropped.
+     */
+    private static List<Object> stitchTables(List<Object> elements) {
+        List<Object> out = new ArrayList<>();
+        for (Object e : elements) {
+            if (e instanceof TableBlock tb
+                    && !out.isEmpty()
+                    && out.get(out.size() - 1) instanceof TableBlock prev
+                    && columnsMatch(flatten(prev.rows()), flatten(tb.rows()))) {
+                List<List<Line>> merged = new ArrayList<>(prev.rows());
+                List<List<Line>> tail = tb.rows();
+                if (!tail.isEmpty()
+                        && !prev.rows().isEmpty()
+                        && rowText(tail.get(0)).equals(rowText(prev.rows().get(0)))) {
+                    tail = tail.subList(1, tail.size());
+                }
+                merged.addAll(tail);
+                out.set(out.size() - 1, new TableBlock(merged, prev.top(), tb.bottom()));
+            } else {
+                out.add(e);
+            }
+        }
+        return out;
+    }
+
+    private static String normaliseSpace(String s) {
+        return s.strip().replaceAll("\\s+", " ");
+    }
+
+    private static List<Line> flatten(List<List<Line>> rows) {
+        return rows.stream().flatMap(List::stream).collect(Collectors.toList());
+    }
+
+    /** Whitespace-normalised text of a row's lines (top to bottom), for header de-duplication. */
+    /**
+     * Header text of a table at the very bottom of a page, or null if the page does not end in one.
+     * Trailing image placeholders are skipped; any other text after a table means it did not run to
+     * the page bottom and so is not a continuation candidate.
+     */
+    private static String trailingTableHeader(List<Object> pageItems) {
+        for (int i = pageItems.size() - 1; i >= 0; i--) {
+            Object e = pageItems.get(i);
+            if (e instanceof String s && s.strip().startsWith("<image redacted")) {
+                continue;
+            }
+            if (e instanceof TableBlock tb && !tb.rows().isEmpty()) {
+                return rowText(tb.rows().get(0));
+            }
+            return null;
+        }
+        return null;
+    }
+
+    private static String rowText(List<Line> row) {
+        List<Line> ordered = new ArrayList<>(row);
+        ordered.sort(Comparator.comparingDouble((Line l) -> l.y).reversed());
+        StringBuilder sb = new StringBuilder();
+        for (Line l : ordered) {
+            if (sb.length() > 0) {
+                sb.append(' ');
+            }
+            sb.append(l.text);
+        }
+        return normaliseSpace(sb.toString());
+    }
+
+    /** True when two table blocks have the same number of columns at near-identical x-centres. */
+    private static boolean columnsMatch(List<Line> a, List<Line> b) {
+        List<float[]> ca = findColumnRanges(a);
+        List<float[]> cb = findColumnRanges(b);
+        if (ca.size() < 2 || ca.size() != cb.size()) {
+            return false;
+        }
+        for (int i = 0; i < ca.size(); i++) {
+            float centreA = (ca.get(i)[0] + ca.get(i)[1]) / 2f;
+            float centreB = (cb.get(i)[0] + cb.get(i)[1]) / 2f;
+            if (Math.abs(centreA - centreB) > 15f) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private static void flushParagraph(StringBuilder para, List<String> out) {
+        if (!para.isEmpty()) {
+            out.add(escapeMarkdown(para.toString()));
+            para.setLength(0);
+        }
+    }
+
+    private static String repairHyphens(String text) {
+        return SOFT_HYPHEN.matcher(text).replaceAll("$1$2");
+    }
+
+    private static boolean endsWithSentencePunctuation(String s) {
+        if (s.isEmpty()) {
+            return false;
+        }
+        char last = s.charAt(s.length() - 1);
+        return last == '.' || last == '?' || last == '!' || last == ':';
+    }
+
+    // --- Methods used by other components / tests --------------------------
+
+    List<PageText> extractAllPageText(PdfDocument doc) throws IOException {
+        return PdfTextExtractor.extractAll(doc);
+    }
+
+    List<Table> extractTables(PdfDocument doc, int pageIndex) throws IOException {
+        return PdfTableExtractor.extract(doc, pageIndex);
+    }
+
+    List<String> renderTables(List<Table> tables) {
+        return tables.stream().map(TableRenderer::render).toList();
+    }
+}
diff --git a/app/common/src/main/java/stirling/software/common/pdf/TableRenderer.java b/app/common/src/main/java/stirling/software/common/pdf/TableRenderer.java
new file mode 100644
index 0000000000..3f468699fb
--- /dev/null
+++ b/app/common/src/main/java/stirling/software/common/pdf/TableRenderer.java
@@ -0,0 +1,82 @@
+package stirling.software.common.pdf;
+
+import stirling.software.jpdfium.text.Table;
+
+final class TableRenderer {
+    private TableRenderer() {}
+
+    /** Renders a Table as a GitHub-Flavoured Markdown table string. */
+    static String render(Table table) {
+        if (table.rowCount() == 0) {
+            return "";
+        }
+
+        String[][] grid = table.asGrid();
+
+        if (table.rowCount() < 2) {
+            // No separator row possible — return plain lines
+            StringBuilder sb = new StringBuilder();
+            for (int c = 0; c < grid[0].length; c++) {
+                if (c > 0) sb.append('\n');
+                sb.append(escape(grid[0][c].trim()));
+            }
+            return sb.toString();
+        }
+
+        int cols = grid[0].length;
+
+        // Compute column widths: max(3, max content length across all rows)
+        int[] widths = new int[cols];
+        for (int c = 0; c < cols; c++) {
+            widths[c] = 3;
+        }
+        for (String[] row : grid) {
+            for (int c = 0; c < cols; c++) {
+                String cell = c < row.length ? row[c].trim() : "";
+                widths[c] = Math.max(widths[c], escape(cell).length());
+            }
+        }
+
+        StringBuilder sb = new StringBuilder();
+
+        // Header row
+        sb.append(buildRow(grid[0], widths, cols));
+        sb.append('\n');
+
+        // Separator row
+        sb.append('|');
+        for (int c = 0; c < cols; c++) {
+            sb.append('-').append("-".repeat(widths[c])).append('-').append('|');
+        }
+        sb.append('\n');
+
+        // Data rows
+        for (int r = 1; r < grid.length; r++) {
+            sb.append(buildRow(grid[r], widths, cols));
+            if (r < grid.length - 1) {
+                sb.append('\n');
+            }
+        }
+
+        return sb.toString();
+    }
+
+    private static String buildRow(String[] row, int[] widths, int cols) {
+        StringBuilder sb = new StringBuilder();
+        sb.append('|');
+        for (int c = 0; c < cols; c++) {
+            String cell = c < row.length ? escape(row[c].trim()) : "";
+            sb.append(' ').append(padRight(cell, widths[c])).append(' ').append('|');
+        }
+        return sb.toString();
+    }
+
+    private static String escape(String cell) {
+        return cell.replace("|", "\\|");
+    }
+
+    private static String padRight(String s, int width) {
+        if (s.length() >= width) return s;
+        return s + " ".repeat(width - s.length());
+    }
+}
diff --git a/app/common/src/test/java/stirling/software/SPDF/pdf/parser/LineAlignmentTableParserTest.java b/app/common/src/test/java/stirling/software/SPDF/pdf/parser/LineAlignmentTableParserTest.java
deleted file mode 100644
index fbbf5af9cf..0000000000
--- a/app/common/src/test/java/stirling/software/SPDF/pdf/parser/LineAlignmentTableParserTest.java
+++ /dev/null
@@ -1,153 +0,0 @@
-package stirling.software.SPDF.pdf.parser;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static stirling.software.SPDF.pdf.parser.PdfModels.*;
-
-import java.util.List;
-
-import org.junit.jupiter.api.Test;
-
-/**
- * Unit tests for {@link LineAlignmentTableParser}, focused on the coincident-line merge logic and
- * column-grid construction.
- */
-class LineAlignmentTableParserTest {
-
-    private final LineAlignmentTableParser parser = new LineAlignmentTableParser();
-
-    // ── mergeCoincidentLines ─────────────────────────────────────────────────────────────────────
-
-    @Test
-    void mergeCoincidentLines_singleLine_unchanged() {
-        var lines = List.of(tokenized(rawLine(10f, 100f, "Revenue")));
-        assertThat(parser.mergeCoincidentLines(lines)).hasSize(1);
-    }
-
-    @Test
-    void mergeCoincidentLines_distinctYLines_unchanged() {
-        // Two lines at different y positions — must NOT be merged.
-        var lines =
-                List.of(
-                        tokenized(rawLine(10f, 100f, "Revenue")),
-                        tokenized(rawLine(10f, 115f, "Cost")));
-        assertThat(parser.mergeCoincidentLines(lines)).hasSize(2);
-    }
-
-    @Test
-    void mergeCoincidentLines_sameY_merged() {
-        // Simulates a financial-table row split by LineBuilder at the column gap:
-        //   label fragment at x=72  → "Revenue"
-        //   value fragment at x=350 → "1,234"
-        // Both have y=100. After merge they should form one TokenizedLine.
-        var label = rawLine(72f, 100f, "Revenue");
-        var value = rawLine(350f, 100f, "1,234");
-
-        var merged = parser.mergeCoincidentLines(List.of(tokenized(label), tokenized(value)));
-
-        assertThat(merged).hasSize(1);
-        // The merged line should contain tokens from both halves.
-        var tokens = merged.get(0).all();
-        assertThat(tokens.stream().map(t -> t.text()).toList())
-                .containsExactlyInAnyOrder("Revenue", "1,234");
-    }
-
-    @Test
-    void mergeCoincidentLines_sameY_mergedLineHasCorrectBounds() {
-        var label = rawLine(72f, 100f, "Revenue"); // 7 chars × 6pt = 42pt wide → right = 114
-        var value = rawLine(350f, 100f, "1,234"); // 5 chars × 6pt = 30pt wide → right = 380
-
-        var merged = parser.mergeCoincidentLines(List.of(tokenized(label), tokenized(value)));
-
-        var bounds = merged.get(0).line().bounds();
-        assertThat(bounds.x()).isEqualTo(72f);
-        assertThat(bounds.right()).isEqualTo(380f);
-    }
-
-    @Test
-    void mergeCoincidentLines_withinTolerance_merged() {
-        // Lines 1.5pt apart (within ROW_MERGE_TOLERANCE_PT = 2pt) should merge.
-        var a = rawLine(10f, 100.0f, "Alpha");
-        var b = rawLine(200f, 101.5f, "99");
-
-        var merged = parser.mergeCoincidentLines(List.of(tokenized(a), tokenized(b)));
-        assertThat(merged).hasSize(1);
-    }
-
-    @Test
-    void mergeCoincidentLines_beyondTolerance_notMerged() {
-        // Lines 3pt apart (beyond ROW_MERGE_TOLERANCE_PT = 2pt) should NOT merge.
-        var a = rawLine(10f, 100.0f, "Alpha");
-        var b = rawLine(200f, 103.0f, "99");
-
-        var merged = parser.mergeCoincidentLines(List.of(tokenized(a), tokenized(b)));
-        assertThat(merged).hasSize(2);
-    }
-
-    @Test
-    void mergeCoincidentLines_threeCoincident_allMerged() {
-        // Three fragments at the same y (e.g. wide financial table with two value columns).
-        var a = rawLine(72f, 100f, "Revenue");
-        var b = rawLine(300f, 100f, "1,234");
-        var c = rawLine(400f, 100f, "5,678");
-
-        var merged = parser.mergeCoincidentLines(List.of(tokenized(a), tokenized(b), tokenized(c)));
-        assertThat(merged).hasSize(1);
-        assertThat(merged.get(0).all()).hasSize(3);
-    }
-
-    @Test
-    void mergeCoincidentLines_coincidentPairFollowedByDistinctLine_twoGroups() {
-        var a = rawLine(72f, 100f, "Revenue");
-        var b = rawLine(350f, 100f, "1,234"); // same y as a → merges with a
-        var c = rawLine(10f, 115f, "Expenses"); // different y → stays separate
-
-        var merged = parser.mergeCoincidentLines(List.of(tokenized(a), tokenized(b), tokenized(c)));
-        assertThat(merged).hasSize(2);
-    }
-
-    @Test
-    void mergeCoincidentLines_numericAnchorStatus_correctAfterMerge() {
-        // After merging, the combined line should be an anchor (≥2 numeric tokens).
-        // "Revenue" alone → not an anchor. "1,234  567" alone → anchor.
-        // Merged → anchor with at least 2 numerics.
-        var label = rawLine(72f, 100f, "Revenue");
-        var values = rawLineMultiWord(350f, 100f, "1,234", 30f, "567", 30f);
-
-        var merged = parser.mergeCoincidentLines(List.of(tokenized(label), tokenized(values)));
-
-        assertThat(merged).hasSize(1);
-        assertThat(merged.get(0).isAnchor()).isTrue();
-    }
-
-    // ── helpers ──────────────────────────────────────────────────────────────────────────────────
-
-    /** Creates a RawLine with a single TextFragment of the given text at the given position. */
-    private static RawLine rawLine(float x, float y, String text) {
-        float width = text.length() * 6f; // ~6pt per char — rough but consistent
-        float height = 12f;
-        Bounds bounds = new Bounds(x, y, width, height);
-        TextFragment fragment =
-                new TextFragment("tf-test", text, bounds, y + height, 11f, "Helvetica", false);
-        return new RawLine("ln-test", List.of(fragment), bounds, 1);
-    }
-
-    /**
-     * Creates a RawLine with two TextFragments representing two words separated by a small gap.
-     * Used to simulate a values-only line with multiple numeric tokens.
-     */
-    private static RawLine rawLineMultiWord(
-            float x, float y, String word1, float w1, String word2, float w2) {
-        float height = 12f;
-        Bounds b1 = new Bounds(x, y, w1, height);
-        Bounds b2 = new Bounds(x + w1 + 5f, y, w2, height);
-        TextFragment f1 = new TextFragment("tf-1", word1, b1, y + height, 11f, "Helvetica", false);
-        TextFragment f2 = new TextFragment("tf-2", word2, b2, y + height, 11f, "Helvetica", false);
-        Bounds lineBounds = new Bounds(x, y, x + w1 + 5f + w2 - x, height);
-        return new RawLine("ln-test", List.of(f1, f2), lineBounds, 1);
-    }
-
-    /** Tokenises a RawLine via the parser's own tokenise logic (package-private access). */
-    private LineAlignmentTableParser.TokenizedLine tokenized(RawLine line) {
-        return parser.tokenize(line);
-    }
-}
diff --git a/app/common/src/test/java/stirling/software/common/pdf/PdfMarkdownConverterTest.java b/app/common/src/test/java/stirling/software/common/pdf/PdfMarkdownConverterTest.java
new file mode 100644
index 0000000000..b3c104da85
--- /dev/null
+++ b/app/common/src/test/java/stirling/software/common/pdf/PdfMarkdownConverterTest.java
@@ -0,0 +1,269 @@
+package stirling.software.common.pdf;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import stirling.software.jpdfium.PdfDocument;
+import stirling.software.jpdfium.text.TextLine;
+import stirling.software.jpdfium.text.TextWord;
+
+/**
+ * Accuracy and robustness tests for {@link PdfMarkdownConverter}, comparing conversion output
+ * against hand-authored golden Markdown for a set of owned/synthetic fixtures.
+ *
+ * <p>The {@link #gatedFixtures()} set is enforced in CI: those fixtures currently convert within
+ * the accuracy threshold and guard against regressions. Fixtures still being iterated on live in
+ * {@link #wipFixtures()} under a {@link Disabled} test so the goldens stay in the tree without
+ * breaking the build. Enable the WIP test locally to see per-fixture scores while working on the
+ * converter.
+ */
+class PdfMarkdownConverterTest {
+
+    /** Accuracy threshold: output must share at least this fraction of content with the golden. */
+    private static final double THRESHOLD = 0.95;
+
+    @TempDir Path tmp;
+
+    /** Fixtures that meet the accuracy threshold today and therefore gate CI. */
+    static Stream<Arguments> gatedFixtures() {
+        return Stream.of(
+                Arguments.of("multi-column-test_lorem.pdf", "multi-column-test_lorem.md"),
+                Arguments.of("bordered-table-test_widget.pdf", "bordered-table-test_widget.md"),
+                Arguments.of("many-tables-test_stress.pdf", "many-tables-test_stress.md"));
+    }
+
+    /** Fixtures still below the threshold; tracked here, enable locally to iterate. */
+    static Stream<Arguments> wipFixtures() {
+        return Stream.of(
+                Arguments.of(
+                        "wrapped-cell-test_expense-report.pdf",
+                        "wrapped-cell-test_expense-report.md"));
+    }
+
+    @ParameterizedTest(name = "{0}")
+    @MethodSource("gatedFixtures")
+    void convertMatchesGoldenMarkdown(String pdfName, String mdName) throws IOException {
+        assertConversionMatchesGolden(pdfName, mdName);
+    }
+
+    @Disabled("WIP fixtures below the accuracy threshold; enable locally to iterate")
+    @ParameterizedTest(name = "{0}")
+    @MethodSource("wipFixtures")
+    void convertMatchesGoldenMarkdownWip(String pdfName, String mdName) throws IOException {
+        assertConversionMatchesGolden(pdfName, mdName);
+    }
+
+    /**
+     * Degenerate/extreme geometry must not crash the converter. A crafted or malformed PDF can
+     * position text anywhere via a text matrix, so a row's words can span from near the origin to a
+     * coordinate beyond {@link Integer#MAX_VALUE}. The old column-detection code sized an {@code
+     * int[]} straight from {@code (int) Math.ceil(maxX) - lo}, which either allocated a multi-GB
+     * array (OutOfMemoryError) or overflowed to a negative length (NegativeArraySizeException) —
+     * taking down the request thread. Detection must instead bail out and return no columns.
+     */
+    @Test
+    void columnDetectionSurvivesDegenerateGeometry() {
+        // x ≈ 2.5e9 is past Integer.MAX_VALUE; combined with a near-origin word it yields an
+        // implausible span that the pre-fix code turned into a fatal array allocation.
+        List<TextLine> rows = new ArrayList<>();
+        for (int r = 0; r < 4; r++) {
+            float y = 400f - r * 12f;
+            TextWord near = new TextWord(List.of(), 50f, y, 30f, 10f);
+            TextWord far = new TextWord(List.of(), 2_500_000_000f, y, 30f, 10f);
+            rows.add(new TextLine(List.of(near, far), 50f, y, 2_499_999_980f, 10f));
+        }
+
+        List<float[]> columns =
+                assertDoesNotThrow(() -> PdfMarkdownConverter.findColumnRangesFromLines(rows));
+        assertTrue(
+                columns.isEmpty(),
+                "implausible page span should disable column detection, not allocate from it");
+    }
+
+    private void assertConversionMatchesGolden(String pdfName, String mdName) throws IOException {
+        Path pdfPath = tmp.resolve(pdfName);
+        try (InputStream in =
+                getClass().getResourceAsStream("/pdf-ingestion-fixtures/" + pdfName)) {
+            if (in == null) {
+                fail("Fixture not found on classpath: /pdf-ingestion-fixtures/" + pdfName);
+            }
+            Files.copy(in, pdfPath);
+        }
+
+        String actual;
+        try (PdfDocument doc = PdfDocument.open(pdfPath)) {
+            actual = new PdfMarkdownConverter().convert(doc);
+        }
+
+        String expected;
+        try (InputStream in = getClass().getResourceAsStream("/pdf-ingestion-fixtures/" + mdName)) {
+            if (in == null) {
+                fail("Golden file not found on classpath: /pdf-ingestion-fixtures/" + mdName);
+            }
+            expected = new String(in.readAllBytes(), StandardCharsets.UTF_8);
+        }
+
+        // Image placeholders are not scored: their body text is a TODO ("ideally, add the info
+        // available about the image...") rather than real content, so comparing it would penalise
+        // output for matching a placeholder we intend to replace. Drop those lines from both sides.
+        expected = stripImagePlaceholders(expected);
+        actual = stripImagePlaceholders(actual);
+
+        double similarity = similarity(expected, actual);
+        if (similarity < THRESHOLD) {
+            fail(
+                    String.format(
+                            "Markdown output differs from golden file '%s' by %.1f%% (threshold %.0f%%):%n%s",
+                            mdName,
+                            (1.0 - similarity) * 100,
+                            (1.0 - THRESHOLD) * 100,
+                            unifiedDiff(expected, actual)));
+        }
+    }
+
+    /** Substring identifying an image-placeholder line, which is excluded from scoring. */
+    private static final String IMAGE_PLACEHOLDER_MARKER = "Image intentionally redacted";
+
+    /**
+     * Removes non-content lines from the comparison: image placeholders (TODO text we intend to
+     * replace) and GFM table separator rows (the {@code |---|---|} divider, whose exact dash count
+     * is cosmetic — any run of three or more dashes is valid Markdown).
+     */
+    private static String stripImagePlaceholders(String md) {
+        StringBuilder sb = new StringBuilder();
+        for (String line : md.split("\n", -1)) {
+            if (line.contains(IMAGE_PLACEHOLDER_MARKER)
+                    || line.strip().startsWith("<image redacted")
+                    || isTableSeparatorRow(line)) {
+                continue;
+            }
+            if (sb.length() > 0) {
+                sb.append('\n');
+            }
+            sb.append(line);
+        }
+        return sb.toString();
+    }
+
+    /** True for a GFM table separator row, e.g. {@code |---|:--:|---|} (only |, -, :, space). */
+    private static boolean isTableSeparatorRow(String line) {
+        String t = line.strip();
+        if (!t.contains("-")) {
+            return false;
+        }
+        return t.chars().allMatch(c -> c == '|' || c == '-' || c == ':' || c == ' ');
+    }
+
+    /**
+     * Character-level similarity: proportion of expected characters that appear in the LCS. O(n*m)
+     * but golden files are small enough that this is fine.
+     */
+    private static double similarity(String expected, String actual) {
+        if (expected.isEmpty() && actual.isEmpty()) return 1.0;
+        if (expected.isEmpty() || actual.isEmpty()) return 0.0;
+        // Strip all whitespace for a content-focused comparison
+        String e = expected.replaceAll("\\s+", " ").strip();
+        String a = actual.replaceAll("\\s+", " ").strip();
+        int lcs = lcsLength(e, a);
+        return (double) lcs / Math.max(e.length(), a.length());
+    }
+
+    private static int lcsLength(String a, String b) {
+        // Use two-row DP to keep memory reasonable
+        int m = a.length(), n = b.length();
+        int[] prev = new int[n + 1];
+        int[] curr = new int[n + 1];
+        for (int i = 1; i <= m; i++) {
+            for (int j = 1; j <= n; j++) {
+                if (a.charAt(i - 1) == b.charAt(j - 1)) {
+                    curr[j] = prev[j - 1] + 1;
+                } else {
+                    curr[j] = Math.max(curr[j - 1], prev[j]);
+                }
+            }
+            int[] tmp = prev;
+            prev = curr;
+            curr = tmp;
+            java.util.Arrays.fill(curr, 0);
+        }
+        return prev[n];
+    }
+
+    private static String unifiedDiff(String expected, String actual) {
+        String[] expectedLines = expected.split("\n", -1);
+        String[] actualLines = actual.split("\n", -1);
+
+        List<String> diff = new ArrayList<>();
+        diff.add("--- expected");
+        diff.add("+++ actual");
+
+        int maxLines = Math.max(expectedLines.length, actualLines.length);
+        int context = 3;
+        boolean inHunk = false;
+        int hunkStart = -1;
+        List<String> hunkLines = new ArrayList<>();
+
+        for (int i = 0; i < maxLines; i++) {
+            String exp = i < expectedLines.length ? expectedLines[i] : null;
+            String act = i < actualLines.length ? actualLines[i] : null;
+
+            boolean changed = exp == null || act == null || !exp.equals(act);
+            if (changed) {
+                if (!inHunk) {
+                    inHunk = true;
+                    hunkStart = Math.max(0, i - context);
+                    // add context lines before change
+                    for (int c = hunkStart; c < i; c++) {
+                        hunkLines.add(" " + (c < expectedLines.length ? expectedLines[c] : ""));
+                    }
+                }
+                if (exp != null) hunkLines.add("-" + exp);
+                if (act != null) hunkLines.add("+" + act);
+            } else {
+                if (inHunk) {
+                    hunkLines.add(" " + exp);
+                    // check if we're far enough past the last change to close the hunk
+                    boolean moreChanges = false;
+                    for (int j = i + 1; j < Math.min(i + context, maxLines); j++) {
+                        String e2 = j < expectedLines.length ? expectedLines[j] : null;
+                        String a2 = j < actualLines.length ? actualLines[j] : null;
+                        if (e2 == null || a2 == null || !e2.equals(a2)) {
+                            moreChanges = true;
+                            break;
+                        }
+                    }
+                    if (!moreChanges && (i - hunkStart) >= context) {
+                        diff.add("@@ -" + (hunkStart + 1) + " @@");
+                        diff.addAll(hunkLines);
+                        hunkLines.clear();
+                        inHunk = false;
+                    }
+                }
+            }
+        }
+
+        if (inHunk && !hunkLines.isEmpty()) {
+            diff.add("@@ -" + (hunkStart + 1) + " @@");
+            diff.addAll(hunkLines);
+        }
+
+        return String.join("\n", diff);
+    }
+}
diff --git a/app/common/src/test/resources/pdf-ingestion-fixtures/bordered-table-test_widget.md b/app/common/src/test/resources/pdf-ingestion-fixtures/bordered-table-test_widget.md
new file mode 100644
index 0000000000..4b590e4b63
--- /dev/null
+++ b/app/common/src/test/resources/pdf-ingestion-fixtures/bordered-table-test_widget.md
@@ -0,0 +1,10 @@
+# Widget Inventory Report
+
+This report lists current stock levels for each warehouse.
+
+| Region | Units | Status |
+|---|---|---|
+| North | 1200 | OK |
+| South | 950 | Low |
+| East | 1430 | OK |
+| West | 875 | Low |
diff --git a/app/common/src/test/resources/pdf-ingestion-fixtures/bordered-table-test_widget.pdf b/app/common/src/test/resources/pdf-ingestion-fixtures/bordered-table-test_widget.pdf
new file mode 100644
index 0000000000..8da041e28d
Binary files /dev/null and b/app/common/src/test/resources/pdf-ingestion-fixtures/bordered-table-test_widget.pdf differ
diff --git a/app/common/src/test/resources/pdf-ingestion-fixtures/many-tables-test_stress.md b/app/common/src/test/resources/pdf-ingestion-fixtures/many-tables-test_stress.md
new file mode 100644
index 0000000000..4b456165df
--- /dev/null
+++ b/app/common/src/test/resources/pdf-ingestion-fixtures/many-tables-test_stress.md
@@ -0,0 +1,222 @@
+Intro paragraph for section 1.
+
+| Name | Qty |
+|---|---|
+| alpha | 101 |
+| delta | 201 |
+
+# Section 2 Heading
+
+| Name | Qty | Price |
+|---|---|---|
+| alpha | 101 | charlie |
+| delta | 201 | foxtrot |
+| golf | 301 | india |
+
+## Section 3 Heading
+
+| Name | Qty | Price | Region |
+|---|---|---|---|
+| alpha | 101 | charlie | 3 |
+| delta | 201 | foxtrot | 13 |
+| golf | 301 | india | 23 |
+| juliet | 401 | lima | 33 |
+
+Intro paragraph for section 4.
+
+| Name | Qty | Price | Region | Status |
+|---|---|---|---|---|
+| alpha | 101 | charlie | 3 | echo |
+| delta | 201 | foxtrot | 13 | hotel |
+| golf | 301 | india | 23 | kilo |
+| juliet | 401 | lima | 33 | november |
+| mike | 501 | oscar | 43 | alpha |
+
+# Section 5 Heading
+
+| Name | Qty |
+|---|---|
+| alpha | 101 |
+| delta | 201 |
+| golf | 301 |
+| juliet | 401 |
+| mike | 501 |
+| papa | 601 |
+
+| Name | Qty | Price |
+|---|---|---|
+| alpha | 101 | charlie |
+| delta | 201 | foxtrot |
+
+# Section 7 Heading
+
+Intro paragraph for section 7.
+
+| Name | Qty | Price | Region |
+|---|---|---|---|
+| alpha | 101 | charlie | 3 |
+| delta | 201 | foxtrot | 13 |
+| golf | 301 | india | 23 |
+
+## Section 8 Heading
+
+| Name | Qty | Price | Region | Status |
+|---|---|---|---|---|
+| alpha | 101 | charlie | 3 | echo |
+| delta | 201 | foxtrot | 13 | hotel |
+| golf | 301 | india | 23 | kilo |
+| juliet | 401 | lima | 33 | november |
+
+| Name | Qty |
+|---|---|
+| alpha | 101 |
+| delta | 201 |
+| golf | 301 |
+| juliet | 401 |
+| mike | 501 |
+
+# Section 10 Heading
+
+Intro paragraph for section 10.
+
+| Name | Qty | Price |
+|---|---|---|
+| alpha | 101 | charlie |
+| delta | 201 | foxtrot |
+| golf | 301 | india |
+| juliet | 401 | lima |
+| mike | 501 | oscar |
+| papa | 601 | bravo |
+
+| Name | Qty | Price | Region |
+|---|---|---|---|
+| alpha | 101 | charlie | 3 |
+| delta | 201 | foxtrot | 13 |
+
+# Section 12 Heading
+
+| Name | Qty | Price | Region | Status |
+|---|---|---|---|---|
+| alpha | 101 | charlie | 3 | echo |
+| delta | 201 | foxtrot | 13 | hotel |
+| golf | 301 | india | 23 | kilo |
+
+## Section 13 Heading
+
+Intro paragraph for section 13.
+
+| Name | Qty |
+|---|---|
+| alpha | 101 |
+| delta | 201 |
+| golf | 301 |
+| juliet | 401 |
+
+| Name | Qty | Price |
+|---|---|---|
+| alpha | 101 | charlie |
+| delta | 201 | foxtrot |
+| golf | 301 | india |
+| juliet | 401 | lima |
+| mike | 501 | oscar |
+
+# Section 15 Heading
+
+| Name | Qty | Price | Region |
+|---|---|---|---|
+| alpha | 101 | charlie | 3 |
+| delta | 201 | foxtrot | 13 |
+| golf | 301 | india | 23 |
+| juliet | 401 | lima | 33 |
+| mike | 501 | oscar | 43 |
+| papa | 601 | bravo | 53 |
+
+Intro paragraph for section 16.
+
+| Name | Qty | Price | Region | Status |
+|---|---|---|---|---|
+| alpha | 101 | charlie | 3 | echo |
+| delta | 201 | foxtrot | 13 | hotel |
+
+# Section 17 Heading
+
+| Name | Qty |
+|---|---|
+| alpha | 101 |
+| delta | 201 |
+| golf | 301 |
+
+## Section 18 Heading
+
+| Name | Qty | Price |
+|---|---|---|
+| alpha | 101 | charlie |
+| delta | 201 | foxtrot |
+| golf | 301 | india |
+| juliet | 401 | lima |
+
+Intro paragraph for section 19.
+
+| Name | Qty | Price | Region |
+|---|---|---|---|
+| alpha | 101 | charlie | 3 |
+| delta | 201 | foxtrot | 13 |
+| golf | 301 | india | 23 |
+| juliet | 401 | lima | 33 |
+| mike | 501 | oscar | 43 |
+
+# Section 20 Heading
+
+| Name | Qty | Price | Region | Status |
+|---|---|---|---|---|
+| alpha | 101 | charlie | 3 | echo |
+| delta | 201 | foxtrot | 13 | hotel |
+| golf | 301 | india | 23 | kilo |
+| juliet | 401 | lima | 33 | november |
+| mike | 501 | oscar | 43 | alpha |
+| papa | 601 | bravo | 53 | delta |
+
+| Name | Qty |
+|---|---|
+| alpha | 101 |
+| delta | 201 |
+
+# Section 22 Heading
+
+Intro paragraph for section 22.
+
+| Name | Qty | Price |
+|---|---|---|
+| alpha | 101 | charlie |
+| delta | 201 | foxtrot |
+| golf | 301 | india |
+
+## Section 23 Heading
+
+| Name | Qty | Price | Region |
+|---|---|---|---|
+| alpha | 101 | charlie | 3 |
+| delta | 201 | foxtrot | 13 |
+| golf | 301 | india | 23 |
+| juliet | 401 | lima | 33 |
+
+| Name | Qty | Price | Region | Status |
+|---|---|---|---|---|
+| alpha | 101 | charlie | 3 | echo |
+| delta | 201 | foxtrot | 13 | hotel |
+| golf | 301 | india | 23 | kilo |
+| juliet | 401 | lima | 33 | november |
+| mike | 501 | oscar | 43 | alpha |
+
+# Section 25 Heading
+
+Intro paragraph for section 25.
+
+| Name | Qty |
+|---|---|
+| alpha | 101 |
+| delta | 201 |
+| golf | 301 |
+| juliet | 401 |
+| mike | 501 |
+| papa | 601 |
diff --git a/app/common/src/test/resources/pdf-ingestion-fixtures/many-tables-test_stress.pdf b/app/common/src/test/resources/pdf-ingestion-fixtures/many-tables-test_stress.pdf
new file mode 100644
index 0000000000..f12925cda3
Binary files /dev/null and b/app/common/src/test/resources/pdf-ingestion-fixtures/many-tables-test_stress.pdf differ
diff --git a/app/common/src/test/resources/pdf-ingestion-fixtures/multi-column-test_lorem.md b/app/common/src/test/resources/pdf-ingestion-fixtures/multi-column-test_lorem.md
new file mode 100644
index 0000000000..5c35de111f
--- /dev/null
+++ b/app/common/src/test/resources/pdf-ingestion-fixtures/multi-column-test_lorem.md
@@ -0,0 +1,25 @@
+# Lorem Ipsum in Two Columns
+
+## 1. Origins
+
+Lorem ipsum dolor sit amet consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+
+## 2. Structure
+
+Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+
+## 3. Usage
+
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
+
+## 4. Variations
+
+Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum.
+
+## 5. Typography
+
+Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam various turpis et commodo pharetra est.
+
+## 6. Conclusion
+
+Nunc nonummy metus. Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros et nisl sagittis vestibulum.
diff --git a/app/common/src/test/resources/pdf-ingestion-fixtures/multi-column-test_lorem.pdf b/app/common/src/test/resources/pdf-ingestion-fixtures/multi-column-test_lorem.pdf
new file mode 100644
index 0000000000..36dc3a1a65
Binary files /dev/null and b/app/common/src/test/resources/pdf-ingestion-fixtures/multi-column-test_lorem.pdf differ
diff --git a/app/common/src/test/resources/pdf-ingestion-fixtures/wrapped-cell-test_expense-report.md b/app/common/src/test/resources/pdf-ingestion-fixtures/wrapped-cell-test_expense-report.md
new file mode 100644
index 0000000000..8008a3b970
--- /dev/null
+++ b/app/common/src/test/resources/pdf-ingestion-fixtures/wrapped-cell-test_expense-report.md
@@ -0,0 +1,62 @@
+# Employee Expense Report
+
+Reimbursement Request
+
+EMP-1047
+
+**Report Header**
+
+| Employee Name | Michael Tran |
+|---|---|
+| Employee ID | EMP-1047 |
+| Department | Client Services |
+| Report Date | January 20th, 2026 |
+| Reporting Period | January 5th–16th, 2026 |
+| Manager Approver | Laura Simmons |
+
+**Company Information**
+
+| Company | Summit Consulting Partners |
+|---|---|
+| Company Address | 88 Riverside Plaza, Suite 1400, New York, NY 10069 |
+| Accounting Department Email | expenses@example.com |
+
+**Trip Purpose**
+
+The trip was undertaken for client onsite meetings with Atlantic Energy Solutions in Boston, MA.
+
+**Expense Details**
+
+| Description | Amount | Date | Category |
+|---|---|---|---|
+| Flight (NYC to Boston roundtrip) | $325.40 | January 5th, 2026 | Airline ticket |
+| Hotel (3 nights at Harborview Hotel) | $822.75 | January 5th–8th, 2026 | Lodging |
+| Taxi from airport to hotel | $48.00 | January 5th, 2026 | Ground transportation |
+| Client dinner (3 attendees) | $186.20 | January 6th, 2026 | Meals |
+| Parking at JFK Airport | $72.00 | January 5th–8th, 2026 | Parking |
+| Breakfast (per diem not used) | $18.50 | January 7th, 2026 | Meals |
+
+| Description | Amount | Date | Category |
+|---|---|---|---|
+| Uber to client office | $22.10 | January 7th, 2026 | Ground transportation |
+| Printing + presentation materials | $46.90 | January 8th, 2026 | Materials |
+| Lunch with client | $39.75 | January 8th, 2026 | Meals |
+| Office supplies (notebooks, pens) | $27.60 | January 10th, 2026 | Supplies |
+| Mileage reimbursement (client visit in NJ, 42 miles @ $0.67/mile) | $28.14 | January 14th, 2026 | Mileage |
+| Team lunch meeting (internal) | $64.30 | January 15th, 2026 | Meals |
+
+Total Expenses $1,701.64
+
+Reimbursement Method
+
+Reimbursement method Direct deposit
+
+Notes
+
+All receipts are attached. Expenses are business-related and comply with company travel policy.
+
+**Approval**
+
+Michael Tran, Employee
+
+Laura Simmons, Manager
diff --git a/app/common/src/test/resources/pdf-ingestion-fixtures/wrapped-cell-test_expense-report.pdf b/app/common/src/test/resources/pdf-ingestion-fixtures/wrapped-cell-test_expense-report.pdf
new file mode 100644
index 0000000000..95a0b2e07a
Binary files /dev/null and b/app/common/src/test/resources/pdf-ingestion-fixtures/wrapped-cell-test_expense-report.pdf differ
diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdown.java b/app/core/src/main/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdown.java
index ce5a610789..42ebd51ab3 100644
--- a/app/core/src/main/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdown.java
+++ b/app/core/src/main/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdown.java
@@ -1,11 +1,13 @@
 package stirling.software.SPDF.model.api.converters;
 
-import org.springframework.core.io.Resource;
+import java.nio.charset.StandardCharsets;
+
 import org.springframework.http.MediaType;
 import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.ModelAttribute;
 import org.springframework.web.multipart.MultipartFile;
 
+import io.github.pixee.security.Filenames;
 import io.swagger.v3.oas.annotations.Operation;
 
 import lombok.RequiredArgsConstructor;
@@ -15,8 +17,11 @@
 import stirling.software.common.annotations.api.ConvertApi;
 import stirling.software.common.enumeration.ResourceWeight;
 import stirling.software.common.model.api.PDFFile;
-import stirling.software.common.util.PDFToFile;
+import stirling.software.common.pdf.PdfMarkdownConverter;
+import stirling.software.common.util.TempFile;
 import stirling.software.common.util.TempFileManager;
+import stirling.software.common.util.WebResponseUtils;
+import stirling.software.jpdfium.PdfDocument;
 
 @ConvertApi
 @RequiredArgsConstructor
@@ -33,10 +38,27 @@ public class ConvertPDFToMarkdown {
             summary = "Convert PDF to Markdown",
             description =
                     "This endpoint converts a PDF file to Markdown format. Input:PDF Output:Markdown Type:SISO")
-    public ResponseEntity<Resource> processPdfToMarkdown(@ModelAttribute PDFFile file)
+    public ResponseEntity<byte[]> processPdfToMarkdown(@ModelAttribute PDFFile file)
             throws Exception {
         MultipartFile inputFile = file.getFileInput();
-        PDFToFile pdfToFile = new PDFToFile(tempFileManager);
-        return pdfToFile.processPdfToMarkdown(inputFile);
+
+        String originalName = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
+        String baseName =
+                originalName.contains(".")
+                        ? originalName.substring(0, originalName.lastIndexOf('.'))
+                        : originalName;
+
+        String markdown;
+        try (TempFile tempInput = new TempFile(tempFileManager, ".pdf")) {
+            inputFile.transferTo(tempInput.getFile());
+            try (PdfDocument doc = PdfDocument.open(tempInput.getPath())) {
+                markdown = new PdfMarkdownConverter().convert(doc);
+            }
+        }
+
+        return WebResponseUtils.bytesToWebResponse(
+                markdown.getBytes(StandardCharsets.UTF_8),
+                baseName + ".md",
+                MediaType.valueOf("text/markdown"));
     }
 }
diff --git a/app/core/src/test/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdownTest.java b/app/core/src/test/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdownTest.java
index b63e58b524..3bd6b7fadb 100644
--- a/app/core/src/test/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdownTest.java
+++ b/app/core/src/test/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdownTest.java
@@ -1,16 +1,17 @@
 package stirling.software.SPDF.model.api.converters;
 
-import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.Mockito.*;
 import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.multipart;
 import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*;
 
+import java.io.File;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
 
 import org.junit.jupiter.api.Test;
-import org.mockito.ArgumentCaptor;
 import org.mockito.MockedConstruction;
+import org.mockito.MockedStatic;
 import org.mockito.Mockito;
 import org.springframework.core.io.ByteArrayResource;
 import org.springframework.core.io.Resource;
@@ -21,9 +22,10 @@
 import org.springframework.test.web.servlet.setup.MockMvcBuilders;
 import org.springframework.web.bind.annotation.ExceptionHandler;
 import org.springframework.web.bind.annotation.RestControllerAdvice;
-import org.springframework.web.multipart.MultipartFile;
 
-import stirling.software.common.util.PDFToFile;
+import stirling.software.common.pdf.PdfMarkdownConverter;
+import stirling.software.common.util.TempFile;
+import stirling.software.jpdfium.PdfDocument;
 
 class ConvertPDFToMarkdownTest {
 
@@ -47,68 +49,68 @@ ResponseEntity<Resource> handle(Exception ex) {
     @Test
     void pdfToMarkdownReturnsMarkdownBytes() throws Exception {
         byte[] md = "# heading\n\ncontent\n".getBytes(StandardCharsets.UTF_8);
-
-        try (MockedConstruction<PDFToFile> construction =
-                Mockito.mockConstruction(
-                        PDFToFile.class,
-                        (mock, ctx) -> {
-                            when(mock.processPdfToMarkdown(any(MultipartFile.class)))
-                                    .thenAnswer(
-                                            inv ->
-                                                    ResponseEntity.ok()
-                                                            .header("Content-Type", "text/markdown")
-                                                            .body(new ByteArrayResource(md)));
-                        })) {
-
-            MockMvc mvc = mockMvc();
+        String expectedMd = "# heading\n\ncontent\n";
+
+        File tmpFile = File.createTempFile("test", ".pdf");
+        tmpFile.deleteOnExit();
+
+        try (MockedConstruction<TempFile> tempMock =
+                        Mockito.mockConstruction(
+                                TempFile.class,
+                                (mock, ctx) -> {
+                                    when(mock.getFile()).thenReturn(tmpFile);
+                                    when(mock.getPath()).thenReturn(tmpFile.toPath());
+                                });
+                MockedStatic<PdfDocument> docStatic = Mockito.mockStatic(PdfDocument.class);
+                MockedConstruction<PdfMarkdownConverter> converterMock =
+                        Mockito.mockConstruction(
+                                PdfMarkdownConverter.class,
+                                (mock, ctx) -> when(mock.convert(any())).thenReturn(expectedMd))) {
+
+            PdfDocument mockDoc = Mockito.mock(PdfDocument.class);
+            docStatic.when(() -> PdfDocument.open(any(Path.class))).thenReturn(mockDoc);
 
             MockMultipartFile file =
                     new MockMultipartFile(
-                            "fileInput", // must match the field name in PDFFile
-                            "input.pdf",
-                            "application/pdf",
-                            new byte[] {1, 2, 3});
-
-            // ResponseEntity<Resource> is written synchronously on the request thread,
-            // so there is no async dispatch to wait for (unlike the old StreamingResponseBody
-            // path).
-            mvc.perform(multipart("/api/v1/convert/pdf/markdown").file(file))
+                            "fileInput", "input.pdf", "application/pdf", new byte[] {1, 2, 3});
+
+            mockMvc()
+                    .perform(multipart("/api/v1/convert/pdf/markdown").file(file))
                     .andExpect(status().isOk())
                     .andExpect(header().string("Content-Type", "text/markdown"))
                     .andExpect(content().bytes(md));
-
-            // Verify that exactly one instance was created
-            assert construction.constructed().size() == 1;
-
-            // And that the uploaded file was passed to processPdfToMarkdown()
-            PDFToFile created = construction.constructed().get(0);
-            ArgumentCaptor<MultipartFile> captor = ArgumentCaptor.forClass(MultipartFile.class);
-            verify(created, times(1)).processPdfToMarkdown(captor.capture());
-            MultipartFile passed = captor.getValue();
-
-            // Minimal plausibility checks
-            assertEquals("input.pdf", passed.getOriginalFilename());
-            assertEquals("application/pdf", passed.getContentType());
         }
     }
 
     @Test
     void pdfToMarkdownWhenServiceThrowsReturns500() throws Exception {
-        try (MockedConstruction<PDFToFile> ignored =
-                Mockito.mockConstruction(
-                        PDFToFile.class,
-                        (mock, ctx) -> {
-                            when(mock.processPdfToMarkdown(any(MultipartFile.class)))
-                                    .thenThrow(new RuntimeException("boom"));
-                        })) {
-
-            MockMvc mvc = mockMvc();
+        File tmpFile = File.createTempFile("test", ".pdf");
+        tmpFile.deleteOnExit();
+
+        try (MockedConstruction<TempFile> tempMock =
+                        Mockito.mockConstruction(
+                                TempFile.class,
+                                (mock, ctx) -> {
+                                    when(mock.getFile()).thenReturn(tmpFile);
+                                    when(mock.getPath()).thenReturn(tmpFile.toPath());
+                                });
+                MockedStatic<PdfDocument> docStatic = Mockito.mockStatic(PdfDocument.class);
+                MockedConstruction<PdfMarkdownConverter> converterMock =
+                        Mockito.mockConstruction(
+                                PdfMarkdownConverter.class,
+                                (mock, ctx) ->
+                                        when(mock.convert(any()))
+                                                .thenThrow(new RuntimeException("boom")))) {
+
+            PdfDocument mockDoc = Mockito.mock(PdfDocument.class);
+            docStatic.when(() -> PdfDocument.open(any(Path.class))).thenReturn(mockDoc);
 
             MockMultipartFile file =
                     new MockMultipartFile(
                             "fileInput", "x.pdf", "application/pdf", new byte[] {0x01});
 
-            mvc.perform(multipart("/api/v1/convert/pdf/markdown").file(file))
+            mockMvc()
+                    .perform(multipart("/api/v1/convert/pdf/markdown").file(file))
                     .andExpect(status().isInternalServerError());
         }
     }
diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowOutcome.java b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowOutcome.java
index a7239e8f90..2bed56f0f0 100644
--- a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowOutcome.java
+++ b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowOutcome.java
@@ -21,7 +21,8 @@ public enum AiWorkflowOutcome {
     COMPLETED("completed"),
     UNSUPPORTED_CAPABILITY("unsupported_capability"),
     CANNOT_CONTINUE("cannot_continue"),
-    GENERATE_FILE("generate_file");
+    GENERATE_FILE("generate_file"),
+    CONVERT_MARKDOWN("convert_markdown");
 
     private final String value;
 
diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/service/AiWorkflowService.java b/app/proprietary/src/main/java/stirling/software/proprietary/service/AiWorkflowService.java
index 95f8afcea9..e332b1fed7 100644
--- a/app/proprietary/src/main/java/stirling/software/proprietary/service/AiWorkflowService.java
+++ b/app/proprietary/src/main/java/stirling/software/proprietary/service/AiWorkflowService.java
@@ -67,6 +67,7 @@
 public class AiWorkflowService {
 
     private static final String DOCUMENTS_ENDPOINT = "/api/v1/documents";
+    private static final String PDF_TO_MARKDOWN_ENDPOINT = "/api/v1/convert/pdf/markdown";
 
     private final CustomPDFDocumentFactory pdfDocumentFactory;
     private final AiEngineClient aiEngineClient;
@@ -208,6 +209,7 @@ private WorkflowState advance(
         return switch (response.getOutcome()) {
             case NEED_CONTENT -> onNeedContent(response, filesById, request, listener);
             case NEED_INGEST -> onNeedIngest(response, filesById, request, listener);
+            case CONVERT_MARKDOWN -> onConvertMarkdown(response, filesById, listener);
             case TOOL_CALL -> onToolCall(response, filesById, listener);
             case PLAN -> onPlan(response, filesById, request, listener);
             case ANSWER -> onAnswer(response, filesById, request, listener);
@@ -344,6 +346,69 @@ private WorkflowState onNeedIngest(
         return new WorkflowState.Pending(nextRequest);
     }
 
+    /**
+     * Deterministically convert each requested PDF to Markdown via the {@code
+     * /convert/pdf/markdown} endpoint (backed by {@code PdfMarkdownConverter}) and return the
+     * {@code .md} file(s) as a completed result. No AI resume — the conversion output is the final
+     * answer.
+     */
+    private WorkflowState onConvertMarkdown(
+            AiWorkflowResponse response,
+            Map<String, MultipartFile> filesById,
+            ProgressListener listener) {
+        List<AiFile> filesToConvert = response.getFilesToIngest();
+        if (filesToConvert == null || filesToConvert.isEmpty()) {
+            return new WorkflowState.Terminal(
+                    cannotContinue(
+                            "AI engine requested markdown conversion without listing any files."));
+        }
+
+        try {
+            List<Resource> resultFiles = new ArrayList<>();
+            List<String> inputNames = new ArrayList<>();
+            for (int i = 0; i < filesToConvert.size(); i++) {
+                AiFile file = filesToConvert.get(i);
+                MultipartFile multipartFile = filesById.get(file.getId());
+                if (multipartFile == null) {
+                    return new WorkflowState.Terminal(
+                            cannotContinue(
+                                    "AI engine requested markdown conversion for unknown file: "
+                                            + file.getName()));
+                }
+                listener.onProgress(
+                        AiWorkflowProgressEvent.executingTool(
+                                PDF_TO_MARKDOWN_ENDPOINT, i + 1, filesToConvert.size()));
+                Resource input = toResource(multipartFile);
+                ToolResult result =
+                        callEndpoint(PDF_TO_MARKDOWN_ENDPOINT, Map.of(), List.of(input));
+                resultFiles.addAll(result.files());
+                inputNames.add(multipartFile.getOriginalFilename());
+            }
+            return new WorkflowState.Terminal(
+                    buildCompletedResponse(null, resultFiles, inputNames, null));
+        } catch (InternalApiTimeoutException e) {
+            log.error("PDF to Markdown conversion timed out: {}", e.getMessage());
+            return new WorkflowState.Terminal(
+                    cannotContinue(toolTimeoutMessage(PDF_TO_MARKDOWN_ENDPOINT, e)));
+        } catch (Exception e) {
+            log.error("Failed to convert PDF to Markdown: {}", e.getMessage(), e);
+            return new WorkflowState.Terminal(
+                    cannotContinue(toolFailureMessage(PDF_TO_MARKDOWN_ENDPOINT, e)));
+        }
+    }
+
+    private Resource toResource(MultipartFile file) throws IOException {
+        TempFile tempFile = tempFileManager.createManagedTempFile("ai-workflow");
+        file.transferTo(tempFile.getPath());
+        final String originalName = Filenames.toSimpleFileName(file.getOriginalFilename());
+        return new FileSystemResource(tempFile.getFile()) {
+            @Override
+            public String getFilename() {
+                return originalName;
+            }
+        };
+    }
+
     private void ingestFile(AiFile file, MultipartFile multipartFile) throws IOException {
         List<AiPageText> pages = new ArrayList<>();
         try (PDDocument document = pdfDocumentFactory.load(multipartFile, true)) {
@@ -670,16 +735,7 @@ private static boolean containsStructuredElements(List<?> list) {
     private List<Resource> toResources(Map<String, MultipartFile> filesById) throws IOException {
         List<Resource> resources = new ArrayList<>();
         for (MultipartFile file : filesById.values()) {
-            TempFile tempFile = tempFileManager.createManagedTempFile("ai-workflow");
-            file.transferTo(tempFile.getPath());
-            final String originalName = Filenames.toSimpleFileName(file.getOriginalFilename());
-            resources.add(
-                    new FileSystemResource(tempFile.getFile()) {
-                        @Override
-                        public String getFilename() {
-                            return originalName;
-                        }
-                    });
+            resources.add(toResource(file));
         }
         return resources;
     }
diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/service/PdfContentExtractor.java b/app/proprietary/src/main/java/stirling/software/proprietary/service/PdfContentExtractor.java
index c06007f318..9dccb91f38 100644
--- a/app/proprietary/src/main/java/stirling/software/proprietary/service/PdfContentExtractor.java
+++ b/app/proprietary/src/main/java/stirling/software/proprietary/service/PdfContentExtractor.java
@@ -30,11 +30,7 @@
 import lombok.extern.slf4j.Slf4j;
 
 import stirling.software.SPDF.pdf.parser.PageImageLocator;
-import stirling.software.SPDF.pdf.parser.PdfIngester;
-import stirling.software.SPDF.pdf.parser.PdfModels.ParsedPage;
-import stirling.software.SPDF.pdf.parser.PdfModels.RawLine;
 import stirling.software.SPDF.pdf.parser.PdfModels.TableFragment;
-import stirling.software.SPDF.pdf.parser.PdfModels.TextFragment;
 import stirling.software.SPDF.pdf.parser.TabulaTableParser;
 import stirling.software.common.util.ExceptionUtils;
 import stirling.software.common.util.PdfUtils;
@@ -50,7 +46,6 @@
 public class PdfContentExtractor {
 
     private final TabulaTableParser tabulaTableParser;
-    private final PdfIngester pdfIngester;
 
     private static final int MAX_CHARACTERS_PER_PAGE = 4_000;
 
@@ -196,8 +191,6 @@ private Optional<PdfContentResult> dispatchContentType(
             case PAGE_TEXT, FULL_TEXT ->
                     Optional.<PdfContentResult>ofNullable(
                             extractText(lf, fileReq, remainingPages, remainingCharacters));
-            case PAGE_LAYOUT ->
-                    Optional.<PdfContentResult>ofNullable(extractPageLayout(lf, remainingPages));
             default -> {
                 log.warn(
                         "Content type {} not yet implemented, skipping for {}",
@@ -222,35 +215,6 @@ private ExtractedFileText extractText(
         return extracted.isEmpty() ? null : buildExtractedFileText(lf.fileName(), extracted);
     }
 
-    private PageLayoutFileResult extractPageLayout(LoadedFile lf, int maxPages) throws IOException {
-        List<ParsedPage> parsedPages = pdfIngester.parse(lf.document(), maxPages);
-        List<LayoutPage> pages = new ArrayList<>();
-        for (ParsedPage pp : parsedPages) {
-            if (pp.layoutLines().isEmpty()) continue;
-            List<LayoutLine> lines = new ArrayList<>();
-            for (RawLine rawLine : pp.layoutLines()) {
-                List<LayoutFragment> fragments = new ArrayList<>();
-                for (TextFragment tf : rawLine.fragments()) {
-                    fragments.add(
-                            new LayoutFragment(
-                                    tf.text(),
-                                    tf.bounds().x(),
-                                    tf.bounds().y(),
-                                    tf.bounds().width(),
-                                    tf.fontSize(),
-                                    tf.bold()));
-                }
-                lines.add(new LayoutLine(rawLine.bounds().y(), fragments));
-            }
-            pages.add(new LayoutPage(pp.pageNumber(), lines));
-        }
-        if (pages.isEmpty()) return null;
-        PageLayoutFileResult result = new PageLayoutFileResult();
-        result.setFileName(lf.fileName());
-        result.setPages(pages);
-        return result;
-    }
-
     private WorkflowArtifact buildArtifact(ArtifactKind kind, List<PdfContentResult> results) {
         return switch (kind) {
             case EXTRACTED_TEXT -> {
@@ -258,11 +222,6 @@ private WorkflowArtifact buildArtifact(ArtifactKind kind, List<PdfContentResult>
                 artifact.setFiles(results.stream().map(ExtractedFileText.class::cast).toList());
                 yield artifact;
             }
-            case PAGE_LAYOUT -> {
-                PageLayoutArtifact artifact = new PageLayoutArtifact();
-                artifact.setFiles(results.stream().map(PageLayoutFileResult.class::cast).toList());
-                yield artifact;
-            }
             case TOOL_REPORT ->
                     throw new IllegalArgumentException(
                             "TOOL_REPORT artifacts are not produced by PdfContentExtractor");
@@ -569,7 +528,6 @@ default int charactersConsumed() {
      */
     enum ArtifactKind {
         EXTRACTED_TEXT("extracted_text"),
-        PAGE_LAYOUT("page_layout"),
         TOOL_REPORT("tool_report");
 
         private final String value;
@@ -633,40 +591,4 @@ static final class ToolReportArtifact implements WorkflowArtifact {
             this.report = report;
         }
     }
-
-    // Serialization contract with the Python engine — see PageLayoutArtifactContractTest.
-
-    /** One text fragment with its bounding-box geometry and font properties. */
-    record LayoutFragment(
-            String text, float x, float y, float width, float fontSize, boolean bold) {}
-
-    /** A visual line on the page: y-coordinate and all fragments on that line. */
-    record LayoutLine(float y, List<LayoutFragment> fragments) {}
-
-    /** All layout lines for a single page. */
-    record LayoutPage(int pageNumber, List<LayoutLine> lines) {}
-
-    /** Page layout data for one file, as a PdfContentResult. */
-    @Data
-    static final class PageLayoutFileResult implements PdfContentResult {
-        private String fileName;
-        private List<LayoutPage> pages = new ArrayList<>();
-
-        @Override
-        public ArtifactKind getArtifactKind() {
-            return ArtifactKind.PAGE_LAYOUT;
-        }
-
-        @Override
-        public int pagesConsumed() {
-            return pages.size();
-        }
-    }
-
-    /** Artifact carrying full spatial page layout for all input files. */
-    @Data
-    static final class PageLayoutArtifact implements WorkflowArtifact {
-        private final ArtifactKind kind = ArtifactKind.PAGE_LAYOUT;
-        private List<PageLayoutFileResult> files = new ArrayList<>();
-    }
 }
diff --git a/app/proprietary/src/test/java/stirling/software/proprietary/service/AiWorkflowServiceTest.java b/app/proprietary/src/test/java/stirling/software/proprietary/service/AiWorkflowServiceTest.java
index 5b73ea5cc6..288eeae9b9 100644
--- a/app/proprietary/src/test/java/stirling/software/proprietary/service/AiWorkflowServiceTest.java
+++ b/app/proprietary/src/test/java/stirling/software/proprietary/service/AiWorkflowServiceTest.java
@@ -436,6 +436,33 @@ void generateFileStoresContentDirectlyWithoutToolCall() throws IOException {
         verify(internalApiClient, never()).post(anyString(), any());
     }
 
+    @Test
+    void convertMarkdownRunsDeterministicConversionAndReturnsMdFile() throws IOException {
+        MockMultipartFile input = pdf("shortened.pdf", "pdf-bytes");
+        when(fileIdStrategy.idFor(any())).thenReturn("doc-1");
+        stubOrchestrator(
+                """
+                {
+                  "outcome":"convert_markdown",
+                  "reason":"PDF to Markdown requested.",
+                  "filesToIngest":[{"id":"doc-1","name":"shortened.pdf"}]
+                }
+                """);
+        when(toolMetadataService.shouldUnpackZipResponse("/api/v1/convert/pdf/markdown"))
+                .thenReturn(false);
+        stubEndpoint("/api/v1/convert/pdf/markdown", pdfResource("# Title", "shortened.md"));
+        AtomicInteger ids = stubFileStorage();
+
+        AiWorkflowResponse result = service.orchestrate(requestFor(input, "convert to markdown"));
+
+        assertEquals(AiWorkflowOutcome.COMPLETED, result.getOutcome());
+        assertEquals(1, result.getResultFiles().size());
+        // Extension changes (pdf -> md), so the converter's response filename wins.
+        assertEquals("shortened.md", result.getResultFiles().get(0).getFileName());
+        assertEquals(1, ids.get());
+        verify(internalApiClient, times(1)).post(eq("/api/v1/convert/pdf/markdown"), any());
+    }
+
     @Test
     void toolCallWithoutEndpointFallsBackToCannotContinue() throws IOException {
         MockMultipartFile input = pdf("input.pdf", "bytes");
diff --git a/app/proprietary/src/test/java/stirling/software/proprietary/service/PageLayoutArtifactContractTest.java b/app/proprietary/src/test/java/stirling/software/proprietary/service/PageLayoutArtifactContractTest.java
deleted file mode 100644
index ae853b2e6a..0000000000
--- a/app/proprietary/src/test/java/stirling/software/proprietary/service/PageLayoutArtifactContractTest.java
+++ /dev/null
@@ -1,66 +0,0 @@
-package stirling.software.proprietary.service;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.util.List;
-
-import org.junit.jupiter.api.Test;
-
-import stirling.software.proprietary.service.PdfContentExtractor.LayoutFragment;
-import stirling.software.proprietary.service.PdfContentExtractor.LayoutLine;
-import stirling.software.proprietary.service.PdfContentExtractor.LayoutPage;
-import stirling.software.proprietary.service.PdfContentExtractor.PageLayoutArtifact;
-import stirling.software.proprietary.service.PdfContentExtractor.PageLayoutFileResult;
-
-import tools.jackson.databind.JsonNode;
-import tools.jackson.databind.json.JsonMapper;
-
-/**
- * Contract test: verifies that {@link PageLayoutArtifact} serializes to the JSON field names that
- * the Python engine expects in {@code engine/src/stirling/contracts/pdf_to_markdown.py}.
- *
- * <p>The companion Python test in {@code tests/test_pdf_to_markdown.py} deserializes the same JSON
- * literal and asserts field values. If either side renames a field, one of these tests fails.
- */
-class PageLayoutArtifactContractTest {
-
-    static final String CONTRACT_JSON =
-            """
-            {"kind":"page_layout","files":[{"fileName":"test.pdf","pages":[{"pageNumber":1,"lines":[{"y":10.0,"fragments":[{"text":"Hello","x":1.0,"y":2.0,"width":30.0,"fontSize":12.0,"bold":true}]}]}]}]}""";
-
-    @Test
-    void pageLayoutArtifact_serialisesToExpectedJson() throws Exception {
-        LayoutFragment fragment = new LayoutFragment("Hello", 1.0f, 2.0f, 30.0f, 12.0f, true);
-        LayoutLine line = new LayoutLine(10.0f, List.of(fragment));
-        LayoutPage page = new LayoutPage(1, List.of(line));
-
-        PageLayoutFileResult fileResult = new PageLayoutFileResult();
-        fileResult.setFileName("test.pdf");
-        fileResult.setPages(List.of(page));
-
-        PageLayoutArtifact artifact = new PageLayoutArtifact();
-        artifact.setFiles(List.of(fileResult));
-
-        JsonNode json = new JsonMapper().valueToTree(artifact);
-
-        assertEquals("page_layout", json.get("kind").asText());
-
-        JsonNode file = json.get("files").get(0);
-        assertEquals("test.pdf", file.get("fileName").asText());
-
-        JsonNode pg = file.get("pages").get(0);
-        assertEquals(1, pg.get("pageNumber").asInt());
-
-        JsonNode ln = pg.get("lines").get(0);
-        assertEquals(10.0, ln.get("y").asDouble(), 0.001);
-
-        JsonNode frag = ln.get("fragments").get(0);
-        assertEquals("Hello", frag.get("text").asText());
-        assertEquals(1.0, frag.get("x").asDouble(), 0.001);
-        assertEquals(2.0, frag.get("y").asDouble(), 0.001);
-        assertEquals(30.0, frag.get("width").asDouble(), 0.001);
-        assertEquals(12.0, frag.get("fontSize").asDouble(), 0.001);
-        assertTrue(frag.get("bold").asBoolean());
-    }
-}
diff --git a/engine/src/stirling/agents/__init__.py b/engine/src/stirling/agents/__init__.py
index cddd0275c3..5410ac098a 100644
--- a/engine/src/stirling/agents/__init__.py
+++ b/engine/src/stirling/agents/__init__.py
@@ -5,7 +5,6 @@
 from .pdf_edit import PdfEditAgent, PdfEditParameterSelector, PdfEditPlanSelection
 from .pdf_questions import PdfQuestionAgent
 from .pdf_review import PdfReviewAgent
-from .pdf_to_markdown import PdfToMarkdownAgent
 from .user_spec import UserSpecAgent
 
 __all__ = [
@@ -16,6 +15,5 @@
     "PdfEditPlanSelection",
     "PdfQuestionAgent",
     "PdfReviewAgent",
-    "PdfToMarkdownAgent",
     "UserSpecAgent",
 ]
diff --git a/engine/src/stirling/agents/orchestrator.py b/engine/src/stirling/agents/orchestrator.py
index 4dbf0b65ab..d2a0b4a19b 100644
--- a/engine/src/stirling/agents/orchestrator.py
+++ b/engine/src/stirling/agents/orchestrator.py
@@ -11,14 +11,13 @@
 from stirling.agents.pdf_edit import PdfEditAgent
 from stirling.agents.pdf_questions import PdfQuestionAgent
 from stirling.agents.pdf_review import PdfReviewAgent
-from stirling.agents.pdf_to_markdown import PdfToMarkdownAgent
 from stirling.agents.user_spec import UserSpecAgent
 from stirling.contracts import (
     AgentDraftWorkflowResponse,
+    ConvertMarkdownResponse,
     ExtractedTextArtifact,
     OrchestratorRequest,
     OrchestratorResponse,
-    PageLayoutArtifact,
     PdfEditResponse,
     PdfQuestionOrchestrateResponse,
     PdfReviewOrchestrateResponse,
@@ -27,7 +26,6 @@
     format_conversation_history,
     format_file_names,
 )
-from stirling.contracts.pdf_to_markdown import PdfToMarkdownOrchestrateResponse
 from stirling.services import AppRuntime
 
 logger = logging.getLogger(__name__)
@@ -72,9 +70,11 @@ def __init__(self, runtime: AppRuntime) -> None:
                     ),
                 ),
                 ToolOutput(
-                    self.delegate_pdf_to_markdown,
-                    name="delegate_pdf_to_markdown",
-                    description=("Delegate requests to reconstruct a PDF as a Markdown document."),
+                    self.delegate_pdf_ingest,
+                    name="delegate_pdf_ingest",
+                    description=(
+                        "Delegate requests to convert a PDF to Markdown or extract its content as readable text."
+                    ),
                 ),
                 ToolOutput(
                     self.unsupported_capability,
@@ -92,8 +92,8 @@ def __init__(self, runtime: AppRuntime) -> None:
                 "Use delegate_pdf_review when the user wants the PDF returned with review"
                 " comments attached — anything like 'review this', 'annotate with comments',"
                 " 'leave feedback on the PDF'. "
-                "Use delegate_pdf_to_markdown for any request to convert a PDF to Markdown "
-                "or reconstruct its content as readable text. "
+                "Use delegate_pdf_ingest for any request to convert a PDF to Markdown "
+                "or extract its content as readable text. "
                 "Use unsupported_capability when the user asks about the assistant itself "
                 "or when none of the other outputs fit; supply a helpful message."
             ),
@@ -133,13 +133,12 @@ async def _resume(self, request: OrchestratorRequest, capability: SupportedCapab
                 return await self._run_pdf_edit(request)
             case SupportedCapability.AGENT_DRAFT:
                 return await self._run_agent_draft(request)
-            case SupportedCapability.PDF_TO_MARKDOWN:
-                return await self._run_pdf_to_markdown(request)
             case (
                 SupportedCapability.ORCHESTRATE
                 | SupportedCapability.AGENT_REVISE
                 | SupportedCapability.AGENT_NEXT_ACTION
                 | SupportedCapability.MATH_AUDITOR_AGENT
+                | SupportedCapability.PDF_TO_MARKDOWN
             ):
                 raise ValueError(f"Cannot resume orchestrator with capability: {capability}")
             case _ as unreachable:
@@ -163,11 +162,12 @@ async def delegate_user_spec(self, ctx: RunContext[OrchestratorDeps]) -> AgentDr
     async def _run_agent_draft(self, request: OrchestratorRequest) -> AgentDraftWorkflowResponse:
         return await UserSpecAgent(self.runtime).orchestrate(request)
 
-    async def delegate_pdf_to_markdown(self, ctx: RunContext[OrchestratorDeps]) -> PdfToMarkdownOrchestrateResponse:
-        return await self._run_pdf_to_markdown(ctx.deps.request)
-
-    async def _run_pdf_to_markdown(self, request: OrchestratorRequest) -> PdfToMarkdownOrchestrateResponse:
-        return await PdfToMarkdownAgent(self.runtime).orchestrate(request)
+    async def delegate_pdf_ingest(self, ctx: RunContext[OrchestratorDeps]) -> ConvertMarkdownResponse:
+        request = ctx.deps.request
+        return ConvertMarkdownResponse(
+            reason="PDF to Markdown requested — Java converts deterministically.",
+            files_to_ingest=request.files,
+        )
 
     async def delegate_pdf_review(self, ctx: RunContext[OrchestratorDeps]) -> PdfReviewOrchestrateResponse:
         return await self._run_pdf_review(ctx.deps.request)
@@ -204,10 +204,5 @@ def _describe_artifacts(self, request: OrchestratorRequest) -> str:
                 file_names = [f.file_name for f in artifact.files]
                 descriptions.append(f"- extracted_text: {total_pages} pages from {file_names}")
                 continue
-            if isinstance(artifact, PageLayoutArtifact):
-                total_pages = sum(len(f.pages) for f in artifact.files)
-                file_names = [f.file_name for f in artifact.files]
-                descriptions.append(f"- page_layout: {total_pages} pages from {file_names}")
-                continue
             descriptions.append("- unknown artifact")
         return "\n".join(descriptions)
diff --git a/engine/src/stirling/agents/pdf_to_markdown/__init__.py b/engine/src/stirling/agents/pdf_to_markdown/__init__.py
deleted file mode 100644
index d35ae05c7c..0000000000
--- a/engine/src/stirling/agents/pdf_to_markdown/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .agent import PdfToMarkdownAgent
-
-__all__ = ["PdfToMarkdownAgent"]
diff --git a/engine/src/stirling/agents/pdf_to_markdown/agent.py b/engine/src/stirling/agents/pdf_to_markdown/agent.py
deleted file mode 100644
index 8c0d7d8ee5..0000000000
--- a/engine/src/stirling/agents/pdf_to_markdown/agent.py
+++ /dev/null
@@ -1,435 +0,0 @@
-"""PDF to Markdown Agent.
-
-Converts a parsed PDF document into a single clean Markdown document, preserving
-headings, paragraphs, and tables in reading order.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-import re
-import time
-
-from pydantic import BaseModel, Field
-from pydantic_ai import Agent
-from pydantic_ai.output import NativeOutput
-
-from stirling.contracts import (
-    EditCannotDoResponse,
-    GenerateFileResponse,
-    NeedContentFileRequest,
-    NeedContentResponse,
-    OrchestratorRequest,
-    PdfContentType,
-    SupportedCapability,
-    format_conversation_history,
-)
-from stirling.contracts.pdf_to_markdown import (
-    PageLayout,
-    PageLayoutArtifact,
-    PdfToMarkdownCannotDoResponse,
-    PdfToMarkdownOrchestrateResponse,
-    PdfToMarkdownRequest,
-    PdfToMarkdownResponse,
-    PdfToMarkdownSuccessResponse,
-)
-from stirling.services import AppRuntime
-
-logger = logging.getLogger(__name__)
-
-
-# Warn when output tokens are close to the typical model output limit (~8192 for most
-# configurations). The actual limit is model-specific; this threshold catches likely truncation.
-_OUTPUT_TOKEN_TRUNCATION_THRESHOLD = 7500
-
-# Chunking limits — keep each LLM call to a manageable payload size.
-# Fragment count is the primary driver of JSON payload size (each fragment carries x/y/width/
-# fontSize/bold metadata beyond its text). Page cap prevents low-text pages accumulating.
-_MAX_CHUNK_FRAGMENTS = 1_000
-_MAX_CHUNK_PAGES = 10
-
-# Max concurrent LLM calls — limits API rate pressure on large documents.
-_MAX_PARALLEL_CHUNKS = 3
-
-# ── LLM output model ────────────────────────────────────────────────────────────────────────────
-
-
-class _ReconstructionOutput(BaseModel):
-    markdown: str = Field(description="Full document reconstructed as clean Markdown.")
-
-
-# ── Agent ────────────────────────────────────────────────────────────────────────────────────────
-
-
-class PdfToMarkdownAgent:
-    def __init__(self, runtime: AppRuntime) -> None:
-        self.runtime = runtime
-        self._sem = asyncio.Semaphore(_MAX_PARALLEL_CHUNKS)
-        self._reconstruct_agent = Agent(
-            model=runtime.smart_model,
-            output_type=NativeOutput(_ReconstructionOutput),
-            system_prompt=(
-                "You reconstruct PDF pages into clean Markdown from spatial fragment data.\n"
-                "Input: PAGE LAYOUT — per-fragment x/y/font data for structural analysis.\n\n"
-                "COLUMN DETECTION (for tables in page_layout):\n"
-                "- Look at the x-positions of fragments across 3+ consecutive lines.\n"
-                "- If fragments cluster at the same x-positions across multiple lines, those are table columns.\n"
-                "- Each distinct x-cluster is one column."
-                " Name them from the header row (the first line in the cluster).\n"
-                "- Do NOT merge values from different x-columns into one cell.\n\n"
-                "ROW DETECTION:\n"
-                "- Each unique y-coordinate (or group within 3pt) is one table row.\n"
-                "- Every line of layout data is its own row — do not merge rows.\n"
-                "- If a column has no fragment on a given y-row, that cell is empty.\n\n"
-                "TABLE RENDERING:\n"
-                "- Render as: | col1 | col2 | col3 |\n"
-                "             | --- | --- | --- |\n"
-                "             | val | val | val |\n"
-                "- One source row = one table row. Never collapse multiple rows into one.\n"
-                "- Preserve numeric values exactly (no rounding, no formatting changes).\n"
-                "- Bold cells: wrap with ** in the Markdown cell.\n"
-                "- CRITICAL: the separator row `| --- | --- |` appears EXACTLY ONCE per table, immediately\n"
-                "  after the header row. NEVER put `| --- |` after a data row or between data rows.\n"
-                "  NEVER put a blank line inside a table. All rows (header + data) must be consecutive.\n"
-                "- Do NOT produce a header-only table followed by a second table with the data rows.\n"
-                "  One logical table = one markdown table block, with header, one separator, then all data.\n\n"
-                "GROUP HEADERS (label-only rows inside a table):\n"
-                "- A row is a group header when: the first column has text AND every numeric column is empty.\n"
-                "- Do NOT render group headers as table rows with empty cells.\n"
-                "- Break the table, emit the label as **bold text** on its own line,"
-                " then start a new table for the rows that follow.\n"
-                "- Example labels: 'Policy functions', 'Non-current assets'.\n\n"
-                "TOTAL AND SUBTOTAL ROWS:\n"
-                "- Detect rows whose first cell contains (case-insensitive):"
-                " total, subtotal, surplus, balance, net, sum.\n"
-                "- These rows have numeric content — they are NOT group headers.\n"
-                "- Render the entire row in bold: | **Total income** | **1,234** | **5,678** |\n"
-                "- Keep total rows attached to the group they summarise.\n\n"
-                "MULTI-LEVEL TABLES (year or period as a row label):\n"
-                "- Detect when a row contains only a single label (a year like '2010' or period like 'Q1 2023')"
-                " with no numeric content, followed by repeated metric rows.\n"
-                "- Do NOT render the year as a table row.\n"
-                "- Normalise: add 'Year' as the first column, 'Metric' as the second,"
-                " and repeat the year value on each metric row.\n\n"
-                "PROSE REGIONS:\n"
-                "- Lines where x-positions vary across lines (not repeating columns) are prose.\n"
-                "- Merge lines at the same x-level into paragraphs. Separate indented lines.\n\n"
-                "HEADINGS:\n"
-                "- A line is a heading when it is bold OR font_size ≥2pt above body.\n"
-                "  CRITICAL EXCEPTION: a bold fragment is a TABLE HEADER CELL, not a document heading, when\n"
-                "  the same y-row in page_layout contains other fragments at different x-positions.\n"
-                "  Only classify a bold line as a document heading when it is the SOLE fragment on its y-row.\n"
-                "  Example: 'Non-current assets' at y=120 with '2010'@x=350, '2009'@x=420, '2008'@x=490\n"
-                "  → this is a table header row, NOT a heading. Render it as the first cell of the table.\n"
-                "- Use ## for section headings, ### for sub-headings. Use # only for the document title.\n\n"
-                "ORDERING:\n"
-                "- Process content top-to-bottom as it appears on the page.\n"
-                "- Interleave prose blocks and table blocks in page order.\n"
-                "- Do not move text that appears before a table to after it, or vice versa.\n\n"
-                "FIDELITY:\n"
-                "- Do NOT invent, summarise, or omit any content.\n"
-                "- Do NOT add commentary, metadata, or JSON — output Markdown only."
-            ),
-            model_settings={
-                **runtime.smart_model_settings,
-                "temperature": 0.0,
-                "max_tokens": _OUTPUT_TOKEN_TRUNCATION_THRESHOLD,
-            },
-        )
-
-    async def orchestrate(self, request: OrchestratorRequest) -> PdfToMarkdownOrchestrateResponse:
-        """Entry point for the orchestrator delegate.
-
-        First turn: requests PAGE_LAYOUT extraction from Java via NeedContentResponse.
-        Resume turn: runs the LLM reconstruction and returns a write-file plan step.
-        """
-        layout_artifact = next(
-            (a for a in request.artifacts if isinstance(a, PageLayoutArtifact)),
-            None,
-        )
-        if layout_artifact is None:
-            return NeedContentResponse(
-                resume_with=SupportedCapability.PDF_TO_MARKDOWN,
-                reason="Page layout data is required to reconstruct the document.",
-                files=[
-                    NeedContentFileRequest(file=f, content_types=[PdfContentType.PAGE_LAYOUT]) for f in request.files
-                ],
-                max_pages=self.runtime.settings.max_pages,
-                max_characters=self.runtime.settings.max_characters,
-            )
-
-        page_layout = [page for entry in layout_artifact.files for page in entry.pages]
-        file_names = [f.name for f in request.files]
-        result = await self.handle(
-            PdfToMarkdownRequest(
-                user_message=request.user_message,
-                file_names=file_names,
-                conversation_history=request.conversation_history,
-                page_layout=page_layout,
-            )
-        )
-        if isinstance(result, PdfToMarkdownCannotDoResponse):
-            return EditCannotDoResponse(reason=result.reason)
-
-        base = file_names[0].rsplit(".", 1)[0] if file_names else "document"
-        return GenerateFileResponse(
-            content=result.markdown,
-            filename=f"{base}-reconstruction.md",
-            summary="Reconstructed the document as a Markdown file.",
-        )
-
-    async def handle(self, request: PdfToMarkdownRequest) -> PdfToMarkdownResponse:
-        total_fragments = sum(len(line.fragments) for page in request.page_layout for line in page.lines)
-        logger.info(
-            "[pdf-to-markdown] received layout-pages=%d fragments=%d",
-            len(request.page_layout),
-            total_fragments,
-        )
-
-        if not request.page_layout:
-            logger.warning("[pdf-to-markdown] no content extracted from document; returning cannot_do")
-            return PdfToMarkdownCannotDoResponse(
-                reason=(
-                    "No content was extracted from the document. "
-                    "The file may be a scanned image PDF with no readable text. "
-                    "Try running OCR on the document first."
-                )
-            )
-
-        chunks = _build_page_chunks(request.page_layout)
-        logger.info("[pdf-to-markdown] chunks=%d (max %d in parallel)", len(chunks), _MAX_PARALLEL_CHUNKS)
-
-        if len(chunks) == 1:
-            return await self._reconstruct_chunk(request, chunks[0], chunk_num=1, total_chunks=1)
-
-        total = len(chunks)
-        results = await asyncio.gather(
-            *(
-                self._reconstruct_chunk(request, chunk, chunk_num=i + 1, total_chunks=total)
-                for i, chunk in enumerate(chunks)
-            )
-        )
-
-        markdown_parts: list[str] = []
-        for result in results:
-            if isinstance(result, PdfToMarkdownSuccessResponse) and result.markdown:
-                markdown_parts.append(result.markdown)
-            elif isinstance(result, PdfToMarkdownCannotDoResponse):
-                logger.warning("[pdf-to-markdown] chunk dropped: %s", result.reason)
-
-        if not markdown_parts:
-            return PdfToMarkdownCannotDoResponse(reason="The document could not be reconstructed. All chunks failed.")
-
-        logger.info("[pdf-to-markdown] assembly: %d/%d chunks produced output", len(markdown_parts), len(chunks))
-        return PdfToMarkdownSuccessResponse(markdown="\n\n".join(markdown_parts))
-
-    async def _reconstruct_chunk(
-        self,
-        request: PdfToMarkdownRequest,
-        pages: list[PageLayout],
-        chunk_num: int,
-        total_chunks: int,
-    ) -> PdfToMarkdownResponse:
-        chunk_request = PdfToMarkdownRequest(
-            user_message=request.user_message,
-            file_names=request.file_names,
-            conversation_history=request.conversation_history,
-            page_layout=pages,
-        )
-        try:
-            async with self._sem:
-                return await self._reconstruct_document(chunk_request, chunk_num, total_chunks)
-        except Exception as e:
-            logger.error("[pdf-to-markdown] chunk %d/%d failed: %s", chunk_num, total_chunks, e, exc_info=True)
-            return PdfToMarkdownCannotDoResponse(
-                reason="The document could not be reconstructed. The AI model failed to process it."
-            )
-
-    async def _reconstruct_document(
-        self, request: PdfToMarkdownRequest, chunk_num: int = 1, total_chunks: int = 1
-    ) -> PdfToMarkdownSuccessResponse:
-        content = _build_reconstruction_prompt(request)
-        logger.info("[timing] chunk %d/%d llm-call prompt-chars=%d", chunk_num, total_chunks, len(content))
-        t0 = time.monotonic()
-        result = await self._reconstruct_agent.run([content])
-        llm_ms = int((time.monotonic() - t0) * 1000)
-        output: _ReconstructionOutput = result.output
-        usage = result.usage()
-        logger.info(
-            "[timing] chunk %d/%d llm-done ms=%d input-tokens=%s output-tokens=%s markdown-chars=%d",
-            chunk_num,
-            total_chunks,
-            llm_ms,
-            usage.input_tokens,
-            usage.output_tokens,
-            len(output.markdown),
-        )
-        if usage.output_tokens and usage.output_tokens >= _OUTPUT_TOKEN_TRUNCATION_THRESHOLD:
-            logger.warning(
-                "[timing] chunk %d/%d output likely truncated (output-tokens=%d)",
-                chunk_num,
-                total_chunks,
-                usage.output_tokens,
-            )
-        markdown = _remove_extra_separators(_fix_markdown_tables(_merge_orphaned_table_rows(output.markdown)))
-        return PdfToMarkdownSuccessResponse(markdown=markdown)
-
-
-# ── Chunking ────────────────────────────────────────────────────────────────────────────────────
-
-
-def _build_page_chunks(pages: list[PageLayout]) -> list[list[PageLayout]]:
-    chunks: list[list[PageLayout]] = []
-    current: list[PageLayout] = []
-    current_fragments = 0
-    for page in pages:
-        page_fragments = sum(len(line.fragments) for line in page.lines)
-        fragment_full = current and current_fragments + page_fragments > _MAX_CHUNK_FRAGMENTS
-        page_full = len(current) >= _MAX_CHUNK_PAGES
-        if fragment_full or page_full:
-            chunks.append(current)
-            current = []
-            current_fragments = 0
-        current.append(page)
-        current_fragments += page_fragments
-    if current:
-        chunks.append(current)
-    return chunks
-
-
-# ── Prompt builders (module-level, no state) ────────────────────────────────────────────────────
-
-
-def _build_reconstruction_prompt(request: PdfToMarkdownRequest) -> str:
-    history = format_conversation_history(request.conversation_history)
-    file_names = ", ".join(request.file_names) if request.file_names else "Unknown files"
-    layout_section = _format_layout(request.page_layout)
-
-    return (
-        f"Files: {file_names}\n\n"
-        f"User request: {request.user_message}\n\n"
-        f"Conversation history:\n{history}\n\n"
-        "PAGE LAYOUT (structural source — x/y fragment positions):\n"
-        "Each line is: y=NNN | text@(x,y) fs=N  text@(x,y) fs=N ...\n"
-        "- y=NNN is the vertical position (row). Lines close in y are the same visual row.\n"
-        "- x=NNN is the horizontal position (column). Consistent x across rows = a column.\n"
-        "- fs=N is font size. Larger = likely a heading.\n"
-        "- **bold** markers indicate bold text.\n\n"
-        f"{layout_section}"
-    )
-
-
-# ── LLM output post-processing ──────────────────────────────────────────────────────────────────
-
-
-def _fix_markdown_tables(markdown: str) -> str:
-    """Remove blank lines between table rows produced by the LLM."""
-    lines = markdown.split("\n")
-    result: list[str] = []
-    i = 0
-    while i < len(lines):
-        result.append(lines[i])
-        if lines[i].strip().startswith("|"):
-            j = i + 1
-            while j < len(lines) and lines[j].strip() == "":
-                j += 1
-            if j < len(lines) and lines[j].strip().startswith("|"):
-                i = j
-                continue
-        i += 1
-    return "\n".join(result)
-
-
-_SEP_CELL = re.compile(r"^:?-+:?$")
-
-
-def _is_sep_row(line: str) -> bool:
-    """Return True when a pipe row is a Markdown table separator (| --- | --- |)."""
-    stripped = line.strip()
-    if not stripped.startswith("|"):
-        return False
-    cells = [c.strip() for c in stripped.split("|") if c.strip()]
-    return bool(cells) and all(_SEP_CELL.match(c) for c in cells)
-
-
-def _merge_orphaned_table_rows(markdown: str) -> str:
-    """Merge pipe-row blocks that lack a separator into the preceding table.
-
-    When the LLM incorrectly breaks a table (e.g. on a false group-header), it emits
-    orphaned pipe rows with no header or separator. These are invalid markdown and get
-    merged back into the preceding table, discarding the intervening non-table content.
-    """
-    lines = markdown.split("\n")
-
-    segments: list[tuple[str, list[str]]] = []
-    i = 0
-    while i < len(lines):
-        if lines[i].strip().startswith("|"):
-            block: list[str] = []
-            while i < len(lines) and lines[i].strip().startswith("|"):
-                block.append(lines[i])
-                i += 1
-            has_sep = any(_is_sep_row(row) for row in block)
-            segments.append(("table" if has_sep else "orphan", block))
-        else:
-            block = []
-            while i < len(lines) and not lines[i].strip().startswith("|"):
-                block.append(lines[i])
-                i += 1
-            segments.append(("prose", block))
-
-    result: list[tuple[str, list[str]]] = []
-    last_table_idx: int | None = None
-    for seg_type, seg_lines in segments:
-        if seg_type == "orphan":
-            if last_table_idx is not None:
-                result = result[: last_table_idx + 1]
-                result[-1] = ("table", result[-1][1] + seg_lines)
-            else:
-                result.append((seg_type, seg_lines))
-        else:
-            if seg_type == "table":
-                last_table_idx = len(result)
-            result.append((seg_type, seg_lines))
-
-    return "\n".join(line for _, seg_lines in result for line in seg_lines)
-
-
-def _remove_extra_separators(markdown: str) -> str:
-    """Within each contiguous table block, keep only the first separator row."""
-    lines = markdown.split("\n")
-    result: list[str] = []
-    seen_sep = False
-
-    for line in lines:
-        if not line.strip().startswith("|"):
-            seen_sep = False
-            result.append(line)
-            continue
-        if _is_sep_row(line):
-            if seen_sep:
-                continue
-            seen_sep = True
-        result.append(line)
-
-    return "\n".join(result)
-
-
-# ── Formatting helpers (module-level, no state) ──────────────────────────────────────────────────
-
-
-def _format_layout(pages: list[PageLayout]) -> str:
-    if not pages:
-        return "None"
-    parts: list[str] = []
-    for page in pages:
-        line_strs: list[str] = []
-        for line in page.lines:
-            frags = " ".join(
-                f"{'**' if f.bold else ''}{f.text}{'**' if f.bold else ''}@({f.x:.0f},{f.y:.0f}) fs={f.font_size:.0f}"
-                for f in line.fragments
-            )
-            line_strs.append(f"y={line.y:.0f} | {frags}")
-        parts.append(f"--- Page {page.page_number} ---\n" + "\n".join(line_strs))
-    return "\n\n".join(parts)
diff --git a/engine/src/stirling/contracts/__init__.py b/engine/src/stirling/contracts/__init__.py
index 696749d7d7..4bc4febcf5 100644
--- a/engine/src/stirling/contracts/__init__.py
+++ b/engine/src/stirling/contracts/__init__.py
@@ -13,6 +13,7 @@
     AiFile,
     ArtifactKind,
     ConversationMessage,
+    ConvertMarkdownResponse,
     ExtractedFileText,
     GenerateFileResponse,
     MathAuditorToolReportArtifact,
@@ -96,17 +97,6 @@
     PdfQuestionTerminalResponse,
 )
 from .pdf_review import PdfReviewOrchestrateResponse
-from .pdf_to_markdown import (
-    LayoutFragment,
-    LayoutLine,
-    PageLayout,
-    PageLayoutArtifact,
-    PageLayoutFileEntry,
-    PdfToMarkdownCannotDoResponse,
-    PdfToMarkdownOrchestrateResponse,
-    PdfToMarkdownRequest,
-    PdfToMarkdownResponse,
-)
 from .progress import (
     ProgressEvent,
     WholeDocCompressionRound,
@@ -139,10 +129,6 @@
     "ConversationMessage",
     "DeleteDocumentResponse",
     "PurgeOwnerResponse",
-    "PdfToMarkdownCannotDoResponse",
-    "PdfToMarkdownOrchestrateResponse",
-    "PdfToMarkdownRequest",
-    "PdfToMarkdownResponse",
     "Discrepancy",
     "DiscrepancyKind",
     "EditCannotDoResponse",
@@ -166,15 +152,11 @@
     "NeedContentFileRequest",
     "NeedContentResponse",
     "NeedIngestResponse",
+    "ConvertMarkdownResponse",
     "NextExecutionAction",
     "OrchestratorRequest",
     "OrchestratorResponse",
-    "LayoutFragment",
-    "LayoutLine",
     "Page",
-    "PageLayout",
-    "PageLayoutArtifact",
-    "PageLayoutFileEntry",
     "PageRange",
     "PageText",
     "PdfCommentInstruction",
diff --git a/engine/src/stirling/contracts/common.py b/engine/src/stirling/contracts/common.py
index 05103b1a4a..b8030c58b6 100644
--- a/engine/src/stirling/contracts/common.py
+++ b/engine/src/stirling/contracts/common.py
@@ -62,6 +62,7 @@ class WorkflowOutcome(StrEnum):
     CANNOT_CONTINUE = "cannot_continue"
     UNSUPPORTED_CAPABILITY = "unsupported_capability"
     GENERATE_FILE = "generate_file"
+    CONVERT_MARKDOWN = "convert_markdown"
 
 
 class ArtifactKind(StrEnum):
@@ -183,6 +184,19 @@ class NeedIngestResponse(ApiModel):
     content_types: list[PdfContentType] = Field(default_factory=list)
 
 
+class ConvertMarkdownResponse(ApiModel):
+    """Terminal signal: convert the listed files to Markdown deterministically.
+
+    This is a deterministic, non-AI conversion. Java runs the PDF→Markdown converter
+    (``PdfMarkdownConverter``) on each file and returns the resulting ``.md`` file(s) as a
+    completed result. There is no resume turn — the conversion output is the final answer.
+    """
+
+    outcome: Literal[WorkflowOutcome.CONVERT_MARKDOWN] = WorkflowOutcome.CONVERT_MARKDOWN
+    reason: str
+    files_to_ingest: list[AiFile]
+
+
 class ToolOperationStep(ApiModel):
     kind: Literal[StepKind.TOOL] = StepKind.TOOL
     tool: AnyToolId
diff --git a/engine/src/stirling/contracts/orchestrator.py b/engine/src/stirling/contracts/orchestrator.py
index 1bf0f6eb36..8b916ccaff 100644
--- a/engine/src/stirling/contracts/orchestrator.py
+++ b/engine/src/stirling/contracts/orchestrator.py
@@ -11,6 +11,7 @@
     AiFile,
     ArtifactKind,
     ConversationMessage,
+    ConvertMarkdownResponse,
     ExtractedFileText,
     GenerateFileResponse,
     NeedContentResponse,
@@ -23,7 +24,6 @@
 from .execution import NextExecutionAction
 from .pdf_edit import PdfEditTerminalResponse
 from .pdf_questions import PdfQuestionTerminalResponse
-from .pdf_to_markdown import PageLayoutArtifact
 
 
 class ExtractedTextArtifact(ApiModel):
@@ -32,7 +32,7 @@ class ExtractedTextArtifact(ApiModel):
 
 
 WorkflowArtifact = Annotated[
-    ExtractedTextArtifact | PageLayoutArtifact | ToolReportArtifact,
+    ExtractedTextArtifact | ToolReportArtifact,
     Field(discriminator="kind"),
 ]
 
@@ -61,6 +61,7 @@ class UnsupportedCapabilityResponse(ApiModel):
     | GenerateFileResponse
     | NeedContentResponse
     | NeedIngestResponse
+    | ConvertMarkdownResponse
     | AgentDraftResponse
     | NextExecutionAction
     | UnsupportedCapabilityResponse,
diff --git a/engine/src/stirling/contracts/pdf_to_markdown.py b/engine/src/stirling/contracts/pdf_to_markdown.py
deleted file mode 100644
index 4d272e6e2a..0000000000
--- a/engine/src/stirling/contracts/pdf_to_markdown.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""Contracts for the PDF to Markdown Agent.
-
-The agent accepts a parsed document and returns a single Markdown document that
-faithfully reconstructs the PDF content — headings, paragraphs, and tables in
-reading order, using page_layout as the primary source of truth for structure.
-
-Java extracts page layout via PdfIngester and returns it as a PageLayoutArtifact
-through the orchestrator resume_with pattern.
-"""
-
-from __future__ import annotations
-
-from typing import Annotated, Literal
-
-from pydantic import Field
-
-from stirling.models import ApiModel
-
-from .common import ArtifactKind, ConversationMessage, GenerateFileResponse, NeedContentResponse
-from .pdf_edit import EditCannotDoResponse
-
-# ── Input: layout models (mirror Java's RawLine / TextFragment geometry) ────────────────────────
-
-
-class LayoutFragment(ApiModel):
-    """One text fragment with its bounding-box geometry and font properties."""
-
-    text: str
-    x: float
-    y: float
-    width: float
-    font_size: float
-    bold: bool
-
-
-class LayoutLine(ApiModel):
-    """A visual line on the page: one y-coordinate and all fragments on that line."""
-
-    y: float
-    fragments: list[LayoutFragment]
-
-
-class PageLayout(ApiModel):
-    """All layout lines for a single page, in top-to-bottom order."""
-
-    page_number: int
-    lines: list[LayoutLine]
-
-
-# ── Artifact: page layout (produced by Java, consumed by orchestrate()) ──────────────────────────
-
-
-class PageLayoutFileEntry(ApiModel):
-    """Page layout data for one file, as extracted by Java's PdfIngester."""
-
-    file_name: str
-    pages: list[PageLayout] = Field(default_factory=list)
-
-
-class PageLayoutArtifact(ApiModel):
-    """Artifact carrying full spatial page layout for all input files."""
-
-    kind: Literal[ArtifactKind.PAGE_LAYOUT] = ArtifactKind.PAGE_LAYOUT
-    files: list[PageLayoutFileEntry] = Field(default_factory=list)
-
-
-# ── Input: full request ──────────────────────────────────────────────────────────────────────────
-
-
-class PdfToMarkdownRequest(ApiModel):
-    """Request sent by Java after PdfIngester has parsed the document.
-
-    page_layout: per-fragment positional data from the original (y-sorted) line order.
-        Each fragment carries its x/y position, width, font size, and bold flag.
-        This is the primary source of truth for column detection and heading hierarchy.
-    """
-
-    user_message: str
-    file_names: list[str] = Field(default_factory=list)
-    conversation_history: list[ConversationMessage] = Field(default_factory=list)
-    page_layout: list[PageLayout] = Field(default_factory=list)
-
-
-# ── Output: response variants ────────────────────────────────────────────────────────────────────
-
-
-class PdfToMarkdownSuccessResponse(ApiModel):
-    outcome: Literal["document_reconstructed"] = "document_reconstructed"
-    markdown: str
-
-
-class PdfToMarkdownCannotDoResponse(ApiModel):
-    outcome: Literal["cannot_do"] = "cannot_do"
-    reason: str
-
-
-type PdfToMarkdownResponse = Annotated[
-    PdfToMarkdownSuccessResponse | PdfToMarkdownCannotDoResponse,
-    Field(discriminator="outcome"),
-]
-
-type PdfToMarkdownOrchestrateResponse = Annotated[
-    GenerateFileResponse | EditCannotDoResponse | NeedContentResponse,
-    Field(discriminator="outcome"),
-]
diff --git a/engine/tests/test_pdf_to_markdown.py b/engine/tests/test_pdf_to_markdown.py
deleted file mode 100644
index 32870a9459..0000000000
--- a/engine/tests/test_pdf_to_markdown.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""Tests for PDF to Markdown agent.
-
-Two cases:
-1. Narrative-only page: request validates and routes to reconstruction.
-2. Mixed text + table page: layout with table region validates correctly.
-"""
-
-from __future__ import annotations
-
-from stirling.contracts.pdf_to_markdown import (
-    LayoutFragment,
-    LayoutLine,
-    PageLayout,
-    PageLayoutArtifact,
-    PdfToMarkdownRequest,
-    PdfToMarkdownSuccessResponse,
-)
-
-
-def _frag(text: str, x: float, y: float, font_size: float = 10.0, bold: bool = False) -> LayoutFragment:
-    return LayoutFragment(text=text, x=x, y=y, width=float(len(text) * 6), font_size=font_size, bold=bold)
-
-
-def _line(y: float, *frags: LayoutFragment) -> LayoutLine:
-    return LayoutLine(y=y, fragments=list(frags))
-
-
-# ── Test 1: Narrative-only reconstruction ────────────────────────────────────────────────────────
-
-
-# ── Contract test: Java serialization ↔ Python deserialization ──────────────────────────────────
-# This JSON is also asserted field-by-field in PageLayoutArtifactContractTest.java.
-# If either side renames a field, one of these tests fails.
-_CONTRACT_JSON = (
-    '{"kind":"page_layout","files":[{"fileName":"test.pdf","pages":'
-    '[{"pageNumber":1,"lines":[{"y":10.0,"fragments":'
-    '[{"text":"Hello","x":1.0,"y":2.0,"width":30.0,"fontSize":12.0,"bold":true}]}]}]}]}'
-)
-
-
-def test_page_layout_artifact_deserialises_java_json() -> None:
-    artifact = PageLayoutArtifact.model_validate_json(_CONTRACT_JSON)
-
-    assert artifact.kind == "page_layout"
-    assert artifact.files[0].file_name == "test.pdf"
-    page = artifact.files[0].pages[0]
-    assert page.page_number == 1
-    line = page.lines[0]
-    assert line.y == 10.0
-    frag = line.fragments[0]
-    assert frag.text == "Hello"
-    assert frag.x == 1.0
-    assert frag.y == 2.0
-    assert frag.width == 30.0
-    assert frag.font_size == 12.0
-    assert frag.bold is True
-
-
-def test_narrative_reconstruction_request_validates() -> None:
-    """A prose-only page with no tables produces a valid PdfToMarkdownRequest."""
-    layout = PageLayout(
-        page_number=1,
-        lines=[
-            _line(72.0, _frag("Annual Report 2023", x=72.0, y=72.0, font_size=18.0, bold=True)),
-            _line(100.0, _frag("Our revenue grew significantly", x=72.0, y=100.0)),
-            _line(114.0, _frag("during the fiscal year ended", x=72.0, y=114.0)),
-            _line(128.0, _frag("December 31, 2023.", x=72.0, y=128.0)),
-        ],
-    )
-    request = PdfToMarkdownRequest(
-        user_message="reconstruct this document",
-        page_layout=[layout],
-    )
-
-    assert len(request.page_layout) == 1
-    assert len(request.page_layout[0].lines) == 4
-    assert request.page_layout[0].lines[0].fragments[0].bold is True
-    assert request.page_layout[0].lines[0].fragments[0].font_size == 18.0
-
-
-def test_narrative_reconstruction_response_validates() -> None:
-    """PdfToMarkdownSuccessResponse accepts markdown and returns document_reconstructed outcome."""
-    response = PdfToMarkdownSuccessResponse(
-        markdown="# Annual Report 2023\n\nOur revenue grew significantly during the fiscal year.",
-    )
-
-    assert response.outcome == "document_reconstructed"
-    assert response.markdown.startswith("#")
-
-
-# ── Test 2: Mixed text + table reconstruction ─────────────────────────────────────────────────────
-
-
-def test_mixed_page_layout_validates() -> None:
-    """A page with both prose lines and a table region produces a valid request."""
-    layout = PageLayout(
-        page_number=1,
-        lines=[
-            # Prose heading
-            _line(50.0, _frag("Projects in Development", x=72.0, y=50.0, font_size=14.0, bold=True)),
-            # Table header row
-            _line(
-                80.0,
-                _frag("Project Name", x=72.0, y=80.0, bold=True),
-                _frag("Location", x=200.0, y=80.0, bold=True),
-                _frag("Size (MW)", x=290.0, y=80.0, bold=True),
-            ),
-            # Table data rows
-            _line(
-                95.0,
-                _frag("Chaplin Wind 1", x=72.0, y=95.0),
-                _frag("Saskatchewan", x=200.0, y=95.0),
-                _frag("177", x=290.0, y=95.0),
-            ),
-            _line(
-                110.0,
-                _frag("Amherst Island 2", x=72.0, y=110.0),
-                _frag("Ontario", x=200.0, y=110.0),
-                _frag("75", x=290.0, y=110.0),
-            ),
-            # Prose after table
-            _line(140.0, _frag("Notes:", x=72.0, y=140.0, bold=True)),
-            _line(154.0, _frag("1 PPA signed", x=85.0, y=154.0)),
-        ],
-    )
-    request = PdfToMarkdownRequest(
-        user_message="markdown",
-        page_layout=[layout],
-    )
-
-    assert len(request.page_layout[0].lines) == 6
-    # Header line has 3 fragments at distinct x-positions (column detection)
-    header_line = request.page_layout[0].lines[1]
-    xs = [f.x for f in header_line.fragments]
-    assert xs == [72.0, 200.0, 290.0]
-    # Data rows have matching x-positions
-    data_row = request.page_layout[0].lines[2]
-    assert [f.x for f in data_row.fragments] == [72.0, 200.0, 290.0]