Skip to content

Commit da732a0

Browse files
committed
feat: enhance Excel processing with print area support and page setup in conversion
1 parent f5500ab commit da732a0

File tree

143 files changed

+1368
-22
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+1368
-22
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
- English
22
- .NET security policy
3+
- System can't auto git push
34

45
## Issue Summary Workflow
56
When user says "summary on issue #N", use `gh issue comment N --body-file -`

scripts/Run-Benchmark_issues.ps1

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
<#
2+
.SYNOPSIS
3+
Benchmark Issue_Files: convert user-reported xlsx/docx to PDF (MiniPdf + LibreOffice) → compare → report.
4+
5+
.DESCRIPTION
6+
Converts files in tests/Issue_Files/xlsx and tests/Issue_Files/docx using both
7+
MiniPdf and LibreOffice, then runs compare_pdfs.py to produce a comparison report.
8+
9+
.EXAMPLE
10+
.\scripts\Run-Benchmark_issues.ps1
11+
.\scripts\Run-Benchmark_issues.ps1 -SkipReference
12+
.\scripts\Run-Benchmark_issues.ps1 -CompareOnly
13+
#>
14+
15+
param(
16+
[switch]$CompareOnly,
17+
[switch]$SkipMiniPdf,
18+
[switch]$SkipReference,
19+
[switch]$SkipInstall
20+
)
21+
22+
$ErrorActionPreference = "Continue"
23+
$ScriptRoot = Split-Path -Parent $PSScriptRoot
24+
$IssueDir = Join-Path (Join-Path $ScriptRoot "tests") "Issue_Files"
25+
$BenchmarkDir = Join-Path (Join-Path $ScriptRoot "tests") "MiniPdf.Benchmark"
26+
$ScriptsDir = Join-Path (Join-Path $ScriptRoot "tests") "MiniPdf.Scripts"
27+
28+
# Issue source dirs
29+
$XlsxIssueDir = Join-Path $IssueDir "xlsx"
30+
$DocxIssueDir = Join-Path $IssueDir "docx"
31+
32+
# MiniPdf output dirs
33+
$MiniPdfXlsx = Join-Path $IssueDir "minipdf_xlsx"
34+
$MiniPdfDocx = Join-Path $IssueDir "minipdf_docx"
35+
36+
# LibreOffice reference output dirs
37+
$RefXlsx = Join-Path $IssueDir "reference_xlsx"
38+
$RefDocx = Join-Path $IssueDir "reference_docx"
39+
40+
# Report dirs
41+
$ReportXlsx = Join-Path $IssueDir "reports_xlsx"
42+
$ReportDocx = Join-Path $IssueDir "reports_docx"
43+
44+
Write-Host "`n============================================================" -ForegroundColor Cyan
45+
Write-Host " MiniPdf Issue Files Benchmark" -ForegroundColor Cyan
46+
Write-Host "============================================================`n" -ForegroundColor Cyan
47+
48+
# Step 0: Install Python dependencies
49+
if (-not $SkipInstall) {
50+
Write-Host "[Step 0] Installing Python dependencies..." -ForegroundColor Yellow
51+
pip install openpyxl pymupdf python-docx Pillow --quiet 2>$null
52+
Write-Host " OK" -ForegroundColor Green
53+
}
54+
55+
# Ensure output dirs
56+
foreach ($d in @($MiniPdfXlsx, $MiniPdfDocx, $RefXlsx, $RefDocx, $ReportXlsx, $ReportDocx)) {
57+
if (-not (Test-Path $d)) { New-Item -ItemType Directory -Path $d -Force | Out-Null }
58+
}
59+
60+
# ── XLSX ──
61+
$xlsxFiles = Get-ChildItem -Path $XlsxIssueDir -Filter "*.xlsx" -ErrorAction SilentlyContinue
62+
if ($xlsxFiles -and $xlsxFiles.Count -gt 0) {
63+
$cnt = $xlsxFiles.Count
64+
Write-Host "`n--- XLSX Issue Files: $cnt files ---" -ForegroundColor Cyan
65+
66+
if (-not $CompareOnly -and -not $SkipMiniPdf) {
67+
Write-Host "[Step 1] Converting XLSX -> PDF (MiniPdf)..." -ForegroundColor Yellow
68+
Push-Location $ScriptsDir
69+
try {
70+
dotnet run convert_xlsx_to_pdf.cs -- $XlsxIssueDir $MiniPdfXlsx
71+
} finally {
72+
Pop-Location
73+
}
74+
}
75+
76+
if (-not $CompareOnly -and -not $SkipReference) {
77+
Write-Host "[Step 2] Converting XLSX -> PDF (LibreOffice)..." -ForegroundColor Yellow
78+
Push-Location $BenchmarkDir
79+
try {
80+
python generate_reference_pdfs.py --xlsx-dir $XlsxIssueDir --pdf-dir $RefXlsx
81+
} finally {
82+
Pop-Location
83+
}
84+
}
85+
86+
Write-Host "[Step 3] Comparing XLSX PDFs..." -ForegroundColor Yellow
87+
Push-Location $BenchmarkDir
88+
try {
89+
python compare_pdfs.py --minipdf-dir $MiniPdfXlsx --reference-dir $RefXlsx --report-dir $ReportXlsx
90+
} finally {
91+
Pop-Location
92+
}
93+
} else {
94+
Write-Host "No XLSX files in Issue_Files/xlsx — skipping." -ForegroundColor DarkYellow
95+
}
96+
97+
# ── DOCX ──
98+
$docxFiles = Get-ChildItem -Path $DocxIssueDir -Filter "*.docx" -ErrorAction SilentlyContinue
99+
if ($docxFiles -and $docxFiles.Count -gt 0) {
100+
$cnt = $docxFiles.Count
101+
Write-Host "`n--- DOCX Issue Files: $cnt files ---" -ForegroundColor Cyan
102+
103+
if (-not $CompareOnly -and -not $SkipMiniPdf) {
104+
Write-Host "[Step 1] Converting DOCX -> PDF (MiniPdf)..." -ForegroundColor Yellow
105+
Push-Location $ScriptsDir
106+
try {
107+
dotnet run convert_docx_to_pdf.cs -- $DocxIssueDir $MiniPdfDocx
108+
} finally {
109+
Pop-Location
110+
}
111+
}
112+
113+
if (-not $CompareOnly -and -not $SkipReference) {
114+
Write-Host "[Step 2] Converting DOCX -> PDF (LibreOffice)..." -ForegroundColor Yellow
115+
Push-Location $BenchmarkDir
116+
try {
117+
python generate_reference_pdfs_docx.py --docx-dir $DocxIssueDir --pdf-dir $RefDocx
118+
} finally {
119+
Pop-Location
120+
}
121+
}
122+
123+
Write-Host "[Step 3] Comparing DOCX PDFs..." -ForegroundColor Yellow
124+
Push-Location $BenchmarkDir
125+
try {
126+
python compare_pdfs.py --minipdf-dir $MiniPdfDocx --reference-dir $RefDocx --report-dir $ReportDocx
127+
} finally {
128+
Pop-Location
129+
}
130+
} else {
131+
Write-Host "No DOCX files in Issue_Files/docx — skipping." -ForegroundColor DarkYellow
132+
}
133+
134+
# ── Summary ──
135+
Write-Host "`n============================================================" -ForegroundColor Cyan
136+
Write-Host " Done! Reports:" -ForegroundColor Cyan
137+
Write-Host "============================================================" -ForegroundColor Cyan
138+
139+
$xlsxReport = Join-Path $ReportXlsx "comparison_report.md"
140+
$docxReport = Join-Path $ReportDocx "comparison_report.md"
141+
142+
if (Test-Path $xlsxReport) {
143+
Write-Host " XLSX: $xlsxReport" -ForegroundColor Green
144+
}
145+
if (Test-Path $docxReport) {
146+
Write-Host " DOCX: $docxReport" -ForegroundColor Green
147+
}
148+
149+
# Open first available report
150+
$code = Get-Command code -ErrorAction SilentlyContinue
151+
if ((Test-Path $xlsxReport) -and $code) { code $xlsxReport }
152+
elseif ((Test-Path $docxReport) -and $code) { code $docxReport }

0 commit comments

Comments
 (0)