Skip to content

Commit eb63799

Browse files
author
Aurel Avramescu
committed
chore(img): refactor PPI extraction
1 parent 7ab5080 commit eb63799

2 files changed

Lines changed: 46 additions & 28 deletions

File tree

src/main/kotlin/eu/aagsolutions/img/nbis/io/WsqParser.kt

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -194,38 +194,40 @@ object WsqParser {
194194
*/
195195
private fun findWSQPixelsPerInch(data: ByteArray): Int? {
196196
var offset = 0
197-
var extractedPpiValue: Int? = null
198-
while (offset < data.size - 4) {
199-
if (data[offset] == 0xFF.toByte() && data[offset + 1] == 0xA8.toByte()) { // COM segment
200-
val segmentLength = readUInt16BigEndian(data, offset + 2)
201-
if (offset + 4 + segmentLength <= data.size) {
202-
val commentBytes = data.sliceArray(offset + 4 until offset + 4 + segmentLength - 2)
203-
val comment = String(commentBytes, Charsets.US_ASCII)
204-
205-
// Look for PPI information in the comment
206-
val ppiRegex = Regex("""\bPPI\s+(\d+)""", RegexOption.IGNORE_CASE)
207-
val match = ppiRegex.find(comment)
208-
if (match != null) {
209-
extractedPpiValue = match.groupValues[1].toIntOrNull()
210-
} else if (comment.contains("NIST_COM")) {
211-
// Parse NIST comment format if needed
212-
val parts = comment.split("\u0000")
213-
for (part in parts) {
214-
if (part.contains("PPI")) {
215-
val ppiValue = part.filter { it.isDigit() }
216-
if (ppiValue.isNotEmpty()) {
217-
extractedPpiValue = ppiValue.toIntOrNull()
218-
}
219-
}
220-
}
221-
}
197+
return generateSequence {
198+
while (offset < data.size - 4) {
199+
val currentPos = offset
200+
if (data[offset] == 0xFF.toByte() && data[offset + 1] == 0xA8.toByte()) {
201+
val length = readUInt16BigEndian(data, offset + 2)
202+
offset += 2 + length
203+
return@generateSequence currentPos to length
222204
}
223-
offset += 4 + segmentLength - 2
224-
} else {
225205
offset++
226206
}
207+
null
208+
}.firstNotNullOfOrNull { (pos, length) ->
209+
extractPpiFromSegment(data, pos, length)
227210
}
211+
}
228212

229-
return extractedPpiValue
213+
private fun extractPpiFromSegment(data: ByteArray, pos: Int, length: Int): Int? {
214+
if (pos + 2 + length > data.size) return null
215+
216+
// WSQ comments are often ISO_8859_1 or ASCII; ISO handles binary bytes more gracefully
217+
val comment = String(data, pos + 4, length - 2, Charsets.ISO_8859_1)
218+
219+
// 1. Try standard Regex first
220+
val ppiRegex = Regex("""\bPPI\s+(\d+)""", RegexOption.IGNORE_CASE)
221+
val directMatch = ppiRegex.find(comment)?.groupValues?.get(1)?.toIntOrNull()
222+
if (directMatch != null) return directMatch
223+
224+
// 2. Fallback to NIST_COM parsing if string contains NIST header
225+
return comment.takeIf { it.contains("NIST_COM") }
226+
?.split(Regex("[\\s\\x00]+")) // Split by space or null terminator
227+
?.firstNotNullOfOrNull { part ->
228+
part.filter { it.isDigit() }
229+
.takeIf { it.isNotEmpty() }
230+
?.toIntOrNull()
231+
}
230232
}
231233
}

src/test/kotlin/eu/aagsolutions/img/nbis/io/ImageParserTest.kt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,20 @@ class ImageParserTest {
7575
pixelDepth shouldBe 8
7676
}
7777
}
78+
79+
@Test
80+
fun `it should successful detect WSQ image properties for sample2`() {
81+
val url = ImageParserTest::class.java.getResource("/img/sample2.wsq")
82+
val faceImage = url!!.openStream().use { inputStream -> inputStream.readAllBytes() }
83+
val imageInfo = ImageParser.readImageInfo(faceImage)
84+
imageInfo.apply {
85+
compressionAlgorithm shouldBe CompressionAlgorithm.WSQ20
86+
width shouldBe 400
87+
height shouldBe 512
88+
pixelsPerInchX shouldBe 500
89+
pixelsPerInchY shouldBe 500
90+
colorSpace shouldBe "GRAY"
91+
pixelDepth shouldBe 8
92+
}
93+
}
7894
}

0 commit comments

Comments
 (0)