@@ -194,38 +194,40 @@ object WsqParser {
194194 */
195195 private fun findWSQPixelsPerInch (data : ByteArray ): Int? {
196196 var offset = 0
197- var extractedPpiValue: Int? = null
198- while (offset < data.size - 4 ) {
199- if (data[offset] == 0xFF .toByte() && data[offset + 1 ] == 0xA8 .toByte()) { // COM segment
200- val segmentLength = readUInt16BigEndian(data, offset + 2 )
201- if (offset + 4 + segmentLength <= data.size) {
202- val commentBytes = data.sliceArray(offset + 4 until offset + 4 + segmentLength - 2 )
203- val comment = String (commentBytes, Charsets .US_ASCII )
204-
205- // Look for PPI information in the comment
206- val ppiRegex = Regex (""" \bPPI\s+(\d+)""" , RegexOption .IGNORE_CASE )
207- val match = ppiRegex.find(comment)
208- if (match != null ) {
209- extractedPpiValue = match.groupValues[1 ].toIntOrNull()
210- } else if (comment.contains(" NIST_COM" )) {
211- // Parse NIST comment format if needed
212- val parts = comment.split(" \u0000 " )
213- for (part in parts) {
214- if (part.contains(" PPI" )) {
215- val ppiValue = part.filter { it.isDigit() }
216- if (ppiValue.isNotEmpty()) {
217- extractedPpiValue = ppiValue.toIntOrNull()
218- }
219- }
220- }
221- }
197+ return generateSequence {
198+ while (offset < data.size - 4 ) {
199+ val currentPos = offset
200+ if (data[offset] == 0xFF .toByte() && data[offset + 1 ] == 0xA8 .toByte()) {
201+ val length = readUInt16BigEndian(data, offset + 2 )
202+ offset + = 2 + length
203+ return @generateSequence currentPos to length
222204 }
223- offset + = 4 + segmentLength - 2
224- } else {
225205 offset++
226206 }
207+ null
208+ }.firstNotNullOfOrNull { (pos, length) ->
209+ extractPpiFromSegment(data, pos, length)
227210 }
211+ }
228212
229- return extractedPpiValue
213+ private fun extractPpiFromSegment (data : ByteArray , pos : Int , length : Int ): Int? {
214+ if (pos + 2 + length > data.size) return null
215+
216+ // WSQ comments are often ISO_8859_1 or ASCII; ISO handles binary bytes more gracefully
217+ val comment = String (data, pos + 4 , length - 2 , Charsets .ISO_8859_1 )
218+
219+ // 1. Try standard Regex first
220+ val ppiRegex = Regex (""" \bPPI\s+(\d+)""" , RegexOption .IGNORE_CASE )
221+ val directMatch = ppiRegex.find(comment)?.groupValues?.get(1 )?.toIntOrNull()
222+ if (directMatch != null ) return directMatch
223+
224+ // 2. Fallback to NIST_COM parsing if string contains NIST header
225+ return comment.takeIf { it.contains(" NIST_COM" ) }
226+ ?.split(Regex (" [\\ s\\ x00]+" )) // Split by space or null terminator
227+ ?.firstNotNullOfOrNull { part ->
228+ part.filter { it.isDigit() }
229+ .takeIf { it.isNotEmpty() }
230+ ?.toIntOrNull()
231+ }
230232 }
231233}
0 commit comments