Skip to content

Commit ddafaa4

Browse files
committed
Optimize parsing of binary property lists containing many references to the same obj
1 parent 81be6e6 commit ddafaa4

File tree

1 file changed

+42
-14
lines changed

1 file changed

+42
-14
lines changed

src/main/java/com/dd/plist/BinaryPropertyListParser.java

+42-14
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
import java.nio.charset.StandardCharsets;
3232
import java.nio.file.Files;
3333
import java.nio.file.Path;
34+
import java.util.HashMap;
35+
import java.util.HashSet;
3436
import java.util.Objects;
3537
import java.util.function.BiFunction;
3638

@@ -82,6 +84,7 @@ public final class BinaryPropertyListParser {
8284
private int offsetSize;
8385
private int numObjects;
8486
private int offsetTableOffset;
87+
private HashMap<Integer, NSObject> parsedObjects = new HashMap<>();
8588

8689
/**
8790
* Protected constructor so that instantiation is fully controlled by the
@@ -320,44 +323,66 @@ private NSObject doParse(byte[] data) throws PropertyListFormatException, Unsupp
320323
*/
321324
private NSObject parseObject(ParsedObjectStack stack, int obj) throws PropertyListFormatException, UnsupportedEncodingException {
322325
stack = stack.push(obj);
326+
327+
if (this.parsedObjects.containsKey(obj)) {
328+
return this.parsedObjects.get(obj);
329+
}
330+
323331
int offset = this.getObjectOffset(obj);
324332
byte type = this.bytes[offset];
325333
int objType = (type & 0xF0) >> 4;
326334
int objInfo = type & 0x0F;
335+
NSObject result;
327336
switch (objType) {
328337
case SIMPLE_TYPE:
329-
return this.parseSimpleObject(offset, objInfo, objType, obj);
338+
result = this.parseSimpleObject(offset, objInfo, objType, obj);
339+
break;
330340
case INT_TYPE:
331-
return this.parseNumber(offset, objInfo, NSNumber.INTEGER);
341+
result = this.parseNumber(offset, objInfo, NSNumber.INTEGER);
342+
break;
332343
case REAL_TYPE:
333-
return this.parseNumber(offset, objInfo, NSNumber.REAL);
344+
result = this.parseNumber(offset, objInfo, NSNumber.REAL);
345+
break;
334346
case DATE_TYPE:
335-
return this.parseDate(offset, objInfo);
347+
result = this.parseDate(offset, objInfo);
348+
break;
336349
case DATA_TYPE:
337-
return this.parseData(offset, objInfo);
350+
result = this.parseData(offset, objInfo);
351+
break;
338352
case ASCII_STRING_TYPE:
339-
return this.parseString(offset, objInfo, (o, l) -> l, StandardCharsets.US_ASCII.name());
353+
result = this.parseString(offset, objInfo, (o, l) -> l, StandardCharsets.US_ASCII.name());
354+
break;
340355
case UTF16_STRING_TYPE:
341356
// UTF-16 characters can have variable length, but the Core Foundation reference implementation
342357
// assumes 2 byte characters, thus only covering the Basic Multilingual Plane
343-
return this.parseString(offset, objInfo, (o, l) -> 2 * l, StandardCharsets.UTF_16BE.name());
358+
result = this.parseString(offset, objInfo, (o, l) -> 2 * l, StandardCharsets.UTF_16BE.name());
359+
break;
344360
case UTF8_STRING_TYPE:
345361
// UTF-8 characters can have variable length, so we need to calculate the byte length dynamically
346362
// by reading the UTF-8 characters one by one
347-
return this.parseString(offset, objInfo, this::calculateUtf8StringLength, StandardCharsets.UTF_8.name());
363+
result = this.parseString(offset, objInfo, this::calculateUtf8StringLength, StandardCharsets.UTF_8.name());
364+
break;
348365
case UID_TYPE:
349-
return this.parseUid(obj, offset, objInfo + 1);
366+
result = this.parseUid(obj, offset, objInfo + 1);
367+
break;
350368
case ARRAY_TYPE:
351-
return this.parseArray(offset, objInfo, stack);
369+
result = this.parseArray(offset, objInfo, stack);
370+
break;
352371
case ORDERED_SET_TYPE:
353-
return this.parseSet(offset, objInfo, true, stack);
372+
result = this.parseSet(offset, objInfo, true, stack);
373+
break;
354374
case SET_TYPE:
355-
return this.parseSet(offset, objInfo, false, stack);
375+
result = this.parseSet(offset, objInfo, false, stack);
376+
break;
356377
case DICTIONARY_TYPE:
357-
return this.parseDictionary(offset, objInfo, stack);
378+
result = this.parseDictionary(offset, objInfo, stack);
379+
break;
358380
default:
359381
throw new PropertyListFormatException("The given binary property list contains an object of unknown type (" + objType + ")");
360382
}
383+
384+
this.parsedObjects.put(obj, result);
385+
return result;
361386
}
362387

363388
private NSDate parseDate(int offset, int objInfo) throws PropertyListFormatException {
@@ -450,9 +475,12 @@ private NSSet parseSet(int offset, int objInfo, boolean ordered, ParsedObjectSta
450475
int setOffset = offset + lengthAndOffset[1];
451476

452477
NSSet set = new NSSet(ordered);
478+
HashSet<Integer> addedObjectReferences = new HashSet<>();
453479
for (int i = 0; i < length; i++) {
454480
int objRef = this.parseObjectReferenceFromList(setOffset, i);
455-
set.addObject(this.parseObject(stack, objRef));
481+
if (addedObjectReferences.add(objRef)) {
482+
set.addObject(this.parseObject(stack, objRef));
483+
}
456484
}
457485

458486
return set;

0 commit comments

Comments
 (0)