Skip to content

Commit ebd3d2c

Browse files
author
Aurel Avramescu
committed
chore(img): improve WSQ parser
1 parent eb63799 commit ebd3d2c

2 files changed

Lines changed: 16 additions & 39 deletions

File tree

src/main/kotlin/eu/aagsolutions/img/nbis/io/WsqParser.kt

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -160,39 +160,15 @@ object WsqParser {
160160
throw NistException("Invalid WSQ Frame Header: insufficient data")
161161
}
162162

163-
// WSQ SOF structure:
164-
// 0-1: Marker (0xFF 0xA2)
165-
// 2-3: Length of segment
166-
// 4: Black value (0)
167-
// 5: Precision (must be 8)
168-
// 6-7: Height (big-endian)
169-
// 8-9: Width (big-endian)
170-
// 10: Number of components (must be 1 for grayscale)
171-
// 11+: Component specifications
172-
173-
val segmentLength = readUInt16BigEndian(data, offset + 2)
174-
175-
val height = readUInt16BigEndian(data, offset + 6)
176-
val width = readUInt16BigEndian(data, offset + 8)
177-
val numComponents = data[offset + 10].toInt() and 0xFF
178-
179-
// Try to find PPI in comments, default to 500 if not found
180-
val ppi = findWSQPixelsPerInch(data) ?: 500
181-
182-
return WSQImageDimensions(
183-
width = width,
184-
height = height,
185-
pixelDepth = 8,
186-
pixelsPerInch = ppi,
187-
)
163+
return findWSQPixelsPerInch(data)!!
188164
}
189165

190166
/**
191167
* Attempts to find PPI information from WSQ comment segments.
192168
*
193169
* @param data The WSQ byte array
194170
*/
195-
private fun findWSQPixelsPerInch(data: ByteArray): Int? {
171+
private fun findWSQPixelsPerInch(data: ByteArray): WSQImageDimensions? {
196172
var offset = 0
197173
return generateSequence {
198174
while (offset < data.size - 4) {
@@ -210,24 +186,25 @@ object WsqParser {
210186
}
211187
}
212188

213-
private fun extractPpiFromSegment(data: ByteArray, pos: Int, length: Int): Int? {
189+
/**
190+
* Extracts PPI from a segment using regex and fallback
191+
*/
192+
private fun extractPpiFromSegment(data: ByteArray, pos: Int, length: Int): WSQImageDimensions? {
214193
if (pos + 2 + length > data.size) return null
215194

216195
// WSQ comments are often ISO_8859_1 or ASCII; ISO handles binary bytes more gracefully
217196
val comment = String(data, pos + 4, length - 2, Charsets.ISO_8859_1)
218197

219198
// 1. Try standard Regex first
220199
val ppiRegex = Regex("""\bPPI\s+(\d+)""", RegexOption.IGNORE_CASE)
221-
val directMatch = ppiRegex.find(comment)?.groupValues?.get(1)?.toIntOrNull()
222-
if (directMatch != null) return directMatch
223-
224-
// 2. Fallback to NIST_COM parsing if string contains NIST header
225-
return comment.takeIf { it.contains("NIST_COM") }
226-
?.split(Regex("[\\s\\x00]+")) // Split by space or null terminator
227-
?.firstNotNullOfOrNull { part ->
228-
part.filter { it.isDigit() }
229-
.takeIf { it.isNotEmpty() }
230-
?.toIntOrNull()
231-
}
200+
val pixelDepthRegex = Regex("""\bPIX_DEPTH\s+(\d+)""", RegexOption.IGNORE_CASE)
201+
val pixelHeightRegex = Regex("""\bPIX_HEIGHT\s+(\d+)""", RegexOption.IGNORE_CASE)
202+
val pixelWidthRegex = Regex("""\bPIX_WIDTH\s+(\d+)""", RegexOption.IGNORE_CASE)
203+
return WSQImageDimensions(
204+
width = pixelWidthRegex.find(comment)?.groupValues?.get(1)?.toIntOrNull() ?: 0,
205+
height = pixelHeightRegex.find(comment)?.groupValues?.get(1)?.toIntOrNull() ?: 0,
206+
pixelDepth = pixelDepthRegex.find(comment)?.groupValues?.get(1)?.toIntOrNull() ?: 8,
207+
pixelsPerInch = ppiRegex.find(comment)?.groupValues?.get(1)?.toIntOrNull() ?: 500,
208+
)
232209
}
233210
}

src/test/kotlin/eu/aagsolutions/img/nbis/io/ImageParserTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class ImageParserTest {
7272
pixelsPerInchX shouldBe 500
7373
pixelsPerInchY shouldBe 500
7474
colorSpace shouldBe "GRAY"
75-
pixelDepth shouldBe 8
75+
pixelDepth shouldBe 24
7676
}
7777
}
7878

0 commit comments

Comments
 (0)