|
| 1 | +<# |
| 2 | +.SYNOPSIS |
| 3 | + Benchmark Issue_Files: convert user-reported xlsx/docx to PDF (MiniPdf + LibreOffice) → compare → report. |
| 4 | +
|
| 5 | +.DESCRIPTION |
| 6 | + Converts files in tests/Issue_Files/xlsx and tests/Issue_Files/docx using both |
| 7 | + MiniPdf and LibreOffice, then runs compare_pdfs.py to produce a comparison report. |
| 8 | +
|
| 9 | +.EXAMPLE |
| 10 | + .\scripts\Run-Benchmark_issues.ps1 |
| 11 | + .\scripts\Run-Benchmark_issues.ps1 -SkipReference |
| 12 | + .\scripts\Run-Benchmark_issues.ps1 -CompareOnly |
| 13 | +#> |
| 14 | + |
| 15 | +param( |
| 16 | + [switch]$CompareOnly, |
| 17 | + [switch]$SkipMiniPdf, |
| 18 | + [switch]$SkipReference, |
| 19 | + [switch]$SkipInstall |
| 20 | +) |
| 21 | + |
| 22 | +$ErrorActionPreference = "Continue" |
| 23 | +$ScriptRoot = Split-Path -Parent $PSScriptRoot |
| 24 | +$IssueDir = Join-Path (Join-Path $ScriptRoot "tests") "Issue_Files" |
| 25 | +$BenchmarkDir = Join-Path (Join-Path $ScriptRoot "tests") "MiniPdf.Benchmark" |
| 26 | +$ScriptsDir = Join-Path (Join-Path $ScriptRoot "tests") "MiniPdf.Scripts" |
| 27 | + |
| 28 | +# Issue source dirs |
| 29 | +$XlsxIssueDir = Join-Path $IssueDir "xlsx" |
| 30 | +$DocxIssueDir = Join-Path $IssueDir "docx" |
| 31 | + |
| 32 | +# MiniPdf output dirs |
| 33 | +$MiniPdfXlsx = Join-Path $IssueDir "minipdf_xlsx" |
| 34 | +$MiniPdfDocx = Join-Path $IssueDir "minipdf_docx" |
| 35 | + |
| 36 | +# LibreOffice reference output dirs |
| 37 | +$RefXlsx = Join-Path $IssueDir "reference_xlsx" |
| 38 | +$RefDocx = Join-Path $IssueDir "reference_docx" |
| 39 | + |
| 40 | +# Report dirs |
| 41 | +$ReportXlsx = Join-Path $IssueDir "reports_xlsx" |
| 42 | +$ReportDocx = Join-Path $IssueDir "reports_docx" |
| 43 | + |
| 44 | +Write-Host "`n============================================================" -ForegroundColor Cyan |
| 45 | +Write-Host " MiniPdf Issue Files Benchmark" -ForegroundColor Cyan |
| 46 | +Write-Host "============================================================`n" -ForegroundColor Cyan |
| 47 | + |
| 48 | +# Step 0: Install Python dependencies |
| 49 | +if (-not $SkipInstall) { |
| 50 | + Write-Host "[Step 0] Installing Python dependencies..." -ForegroundColor Yellow |
| 51 | + pip install openpyxl pymupdf python-docx Pillow --quiet 2>$null |
| 52 | + Write-Host " OK" -ForegroundColor Green |
| 53 | +} |
| 54 | + |
| 55 | +# Ensure output dirs |
| 56 | +foreach ($d in @($MiniPdfXlsx, $MiniPdfDocx, $RefXlsx, $RefDocx, $ReportXlsx, $ReportDocx)) { |
| 57 | + if (-not (Test-Path $d)) { New-Item -ItemType Directory -Path $d -Force | Out-Null } |
| 58 | +} |
| 59 | + |
| 60 | +# ── XLSX ── |
| 61 | +$xlsxFiles = Get-ChildItem -Path $XlsxIssueDir -Filter "*.xlsx" -ErrorAction SilentlyContinue |
| 62 | +if ($xlsxFiles -and $xlsxFiles.Count -gt 0) { |
| 63 | + $cnt = $xlsxFiles.Count |
| 64 | + Write-Host "`n--- XLSX Issue Files: $cnt files ---" -ForegroundColor Cyan |
| 65 | + |
| 66 | + if (-not $CompareOnly -and -not $SkipMiniPdf) { |
| 67 | + Write-Host "[Step 1] Converting XLSX -> PDF (MiniPdf)..." -ForegroundColor Yellow |
| 68 | + Push-Location $ScriptsDir |
| 69 | + try { |
| 70 | + dotnet run convert_xlsx_to_pdf.cs -- $XlsxIssueDir $MiniPdfXlsx |
| 71 | + } finally { |
| 72 | + Pop-Location |
| 73 | + } |
| 74 | + } |
| 75 | + |
| 76 | + if (-not $CompareOnly -and -not $SkipReference) { |
| 77 | + Write-Host "[Step 2] Converting XLSX -> PDF (LibreOffice)..." -ForegroundColor Yellow |
| 78 | + Push-Location $BenchmarkDir |
| 79 | + try { |
| 80 | + python generate_reference_pdfs.py --xlsx-dir $XlsxIssueDir --pdf-dir $RefXlsx |
| 81 | + } finally { |
| 82 | + Pop-Location |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + Write-Host "[Step 3] Comparing XLSX PDFs..." -ForegroundColor Yellow |
| 87 | + Push-Location $BenchmarkDir |
| 88 | + try { |
| 89 | + python compare_pdfs.py --minipdf-dir $MiniPdfXlsx --reference-dir $RefXlsx --report-dir $ReportXlsx |
| 90 | + } finally { |
| 91 | + Pop-Location |
| 92 | + } |
| 93 | +} else { |
| 94 | + Write-Host "No XLSX files in Issue_Files/xlsx — skipping." -ForegroundColor DarkYellow |
| 95 | +} |
| 96 | + |
| 97 | +# ── DOCX ── |
| 98 | +$docxFiles = Get-ChildItem -Path $DocxIssueDir -Filter "*.docx" -ErrorAction SilentlyContinue |
| 99 | +if ($docxFiles -and $docxFiles.Count -gt 0) { |
| 100 | + $cnt = $docxFiles.Count |
| 101 | + Write-Host "`n--- DOCX Issue Files: $cnt files ---" -ForegroundColor Cyan |
| 102 | + |
| 103 | + if (-not $CompareOnly -and -not $SkipMiniPdf) { |
| 104 | + Write-Host "[Step 1] Converting DOCX -> PDF (MiniPdf)..." -ForegroundColor Yellow |
| 105 | + Push-Location $ScriptsDir |
| 106 | + try { |
| 107 | + dotnet run convert_docx_to_pdf.cs -- $DocxIssueDir $MiniPdfDocx |
| 108 | + } finally { |
| 109 | + Pop-Location |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + if (-not $CompareOnly -and -not $SkipReference) { |
| 114 | + Write-Host "[Step 2] Converting DOCX -> PDF (LibreOffice)..." -ForegroundColor Yellow |
| 115 | + Push-Location $BenchmarkDir |
| 116 | + try { |
| 117 | + python generate_reference_pdfs_docx.py --docx-dir $DocxIssueDir --pdf-dir $RefDocx |
| 118 | + } finally { |
| 119 | + Pop-Location |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + Write-Host "[Step 3] Comparing DOCX PDFs..." -ForegroundColor Yellow |
| 124 | + Push-Location $BenchmarkDir |
| 125 | + try { |
| 126 | + python compare_pdfs.py --minipdf-dir $MiniPdfDocx --reference-dir $RefDocx --report-dir $ReportDocx |
| 127 | + } finally { |
| 128 | + Pop-Location |
| 129 | + } |
| 130 | +} else { |
| 131 | + Write-Host "No DOCX files in Issue_Files/docx — skipping." -ForegroundColor DarkYellow |
| 132 | +} |
| 133 | + |
| 134 | +# ── Summary ── |
| 135 | +Write-Host "`n============================================================" -ForegroundColor Cyan |
| 136 | +Write-Host " Done! Reports:" -ForegroundColor Cyan |
| 137 | +Write-Host "============================================================" -ForegroundColor Cyan |
| 138 | + |
| 139 | +$xlsxReport = Join-Path $ReportXlsx "comparison_report.md" |
| 140 | +$docxReport = Join-Path $ReportDocx "comparison_report.md" |
| 141 | + |
| 142 | +if (Test-Path $xlsxReport) { |
| 143 | + Write-Host " XLSX: $xlsxReport" -ForegroundColor Green |
| 144 | +} |
| 145 | +if (Test-Path $docxReport) { |
| 146 | + Write-Host " DOCX: $docxReport" -ForegroundColor Green |
| 147 | +} |
| 148 | + |
| 149 | +# Open first available report |
| 150 | +$code = Get-Command code -ErrorAction SilentlyContinue |
| 151 | +if ((Test-Path $xlsxReport) -and $code) { code $xlsxReport } |
| 152 | +elseif ((Test-Path $docxReport) -and $code) { code $docxReport } |
0 commit comments