diff --git a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/e2etest/MkvPlaybackTest.java b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/e2etest/MkvPlaybackTest.java index bc557596504..457e7e1b00d 100644 --- a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/e2etest/MkvPlaybackTest.java +++ b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/e2etest/MkvPlaybackTest.java @@ -57,7 +57,8 @@ public static ImmutableList mediaSamples() { "sample_with_overlapping_srt.mkv", "sample_with_vtt_subtitles.mkv", "sample_with_null_terminated_vtt_subtitles.mkv", - "sample_with_vobsub.mkv"); + "sample_with_vobsub.mkv", + "sample_recursive_seekhead.mkv"); } @ParameterizedRobolectricTestRunner.Parameter public String inputFile; diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mkv/MatroskaExtractor.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mkv/MatroskaExtractor.java index 405951074c6..8ea5f9b3fa3 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/mkv/MatroskaExtractor.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/mkv/MatroskaExtractor.java @@ -456,6 +456,9 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser // Cue related elements. private boolean seekForCues; + private boolean seekForSeekContent; + private long seekPositionAfterSeekingForHead = C.INDEX_UNSET; + private long seekHeadContentPosition = C.INDEX_UNSET; private long cuesContentPosition = C.INDEX_UNSET; private long seekPositionAfterBuildingCues = C.INDEX_UNSET; private long clusterTimecodeUs = C.TIME_UNSET; @@ -764,6 +767,10 @@ protected void startMasterElement(int id, long contentPosition, long contentSize if (seekForCuesEnabled && cuesContentPosition != C.INDEX_UNSET) { // We know where the Cues element is located. Seek to request it. seekForCues = true; + } else if (seekForCuesEnabled && seekHeadContentPosition != C.INDEX_UNSET) { + // We do not know where the cues are located, however we have a seek-head entry + // we have not yet visited + seekForSeekContent = true; } else { // We don't know where the Cues element is located. It's most likely omitted. Allow // playback, but disable seeking. @@ -816,9 +823,16 @@ protected void endMasterElement(int id) throws ParserException { if (seekEntryId == UNSET_ENTRY_ID || seekEntryPosition == C.INDEX_UNSET) { throw ParserException.createForMalformedContainer( "Mandatory element SeekID or SeekPosition not found", /* cause= */ null); - } - if (seekEntryId == ID_CUES) { + } else if (seekEntryId == ID_SEEK_HEAD) { + seekHeadContentPosition = seekEntryPosition; + } else if (seekEntryId == ID_CUES) { cuesContentPosition = seekEntryPosition; + + // We are currently seeking from the seek-head, so we seek again to get to the cues + // instead of waiting for the cluster + if (seekForCuesEnabled && seekPositionAfterSeekingForHead != C.INDEX_UNSET) { + seekForCues = true; + } } break; case ID_CUES: @@ -1936,6 +1950,13 @@ private SeekMap buildSeekMap( * @return Whether the seek position was updated. */ private boolean maybeSeekForCues(PositionHolder seekPosition, long currentPosition) { + if (seekForSeekContent) { + seekPositionAfterSeekingForHead = currentPosition; + seekPosition.position = seekHeadContentPosition; + seekForSeekContent = false; + return true; + } + if (seekForCues) { seekPositionAfterBuildingCues = currentPosition; seekPosition.position = cuesContentPosition; @@ -1949,6 +1970,16 @@ private boolean maybeSeekForCues(PositionHolder seekPosition, long currentPositi seekPositionAfterBuildingCues = C.INDEX_UNSET; return true; } + + // After we have seeked back from seekPositionAfterBuildingCues seek back again to parse the + // rest of the file. This ends the double jump that is preformed when the beginning metadata + // only contains a ID_SEEK_HEAD without a ID_CUES. + if (sentSeekMap && seekPositionAfterSeekingForHead != C.INDEX_UNSET) { + seekPosition.position = seekPositionAfterSeekingForHead; + seekPositionAfterSeekingForHead = C.INDEX_UNSET; + return true; + } + return false; } diff --git a/libraries/extractor/src/test/java/androidx/media3/extractor/mkv/MatroskaExtractorTest.java b/libraries/extractor/src/test/java/androidx/media3/extractor/mkv/MatroskaExtractorTest.java index f267eff6aa1..b92d8783ba9 100644 --- a/libraries/extractor/src/test/java/androidx/media3/extractor/mkv/MatroskaExtractorTest.java +++ b/libraries/extractor/src/test/java/androidx/media3/extractor/mkv/MatroskaExtractorTest.java @@ -140,6 +140,17 @@ public void mkvSample_withNullTerminatedVttSubtitles() throws Exception { simulationConfig); } + // https://github.com/androidx/media/issues/1143 + @Test + public void mkvSample_withRecursiveSeekHead() throws Exception { + ExtractorAsserts.assertBehavior( + getExtractorFactory(subtitlesParsedDuringExtraction), + "media/mkv/sample_recursive_seekhead.mkv", + getAssertionConfigWithPrefix( + "media/mkv/sample_recursive_seekhead.mkv", subtitlesParsedDuringExtraction), + simulationConfig); + } + @Test public void mkvSample_withVorbisAudio() throws Exception { ExtractorAsserts.assertBehavior( diff --git a/libraries/test_data/src/test/assets/media/mkv/sample_recursive_seekhead.mkv b/libraries/test_data/src/test/assets/media/mkv/sample_recursive_seekhead.mkv new file mode 100644 index 00000000000..9714113e445 Binary files /dev/null and b/libraries/test_data/src/test/assets/media/mkv/sample_recursive_seekhead.mkv differ diff --git a/libraries/test_data/src/test/assets/playbackdumps/mkv/sample_recursive_seekhead.mkv.dump b/libraries/test_data/src/test/assets/playbackdumps/mkv/sample_recursive_seekhead.mkv.dump new file mode 100644 index 00000000000..cecf8863949 --- /dev/null +++ b/libraries/test_data/src/test/assets/playbackdumps/mkv/sample_recursive_seekhead.mkv.dump @@ -0,0 +1,524 @@ +MediaCodecAdapter (exotest.audio.ac3): + inputBuffers: + count = 30 + input buffer #0: + timeUs = 1000000129000 + contents = length 416, hash 211F2286 + input buffer #1: + timeUs = 1000000164000 + contents = length 418, hash 77425A86 + input buffer #2: + timeUs = 1000000198829 + contents = length 418, hash A0FE5CA1 + input buffer #3: + timeUs = 1000000233000 + contents = length 418, hash 2309B066 + input buffer #4: + timeUs = 1000000268000 + contents = length 418, hash 928A653B + input buffer #5: + timeUs = 1000000303000 + contents = length 418, hash 3422F0CB + input buffer #6: + timeUs = 1000000337829 + contents = length 418, hash EFF43D5B + input buffer #7: + timeUs = 1000000373000 + contents = length 418, hash FC8093C7 + input buffer #8: + timeUs = 1000000408000 + contents = length 418, hash CCC08A16 + input buffer #9: + timeUs = 1000000443000 + contents = length 418, hash 2A6EE863 + input buffer #10: + timeUs = 1000000477829 + contents = length 418, hash D69A9251 + input buffer #11: + timeUs = 1000000512000 + contents = length 418, hash BCFB758D + input buffer #12: + timeUs = 1000000547000 + contents = length 418, hash 11B66799 + input buffer #13: + timeUs = 1000000581829 + contents = length 418, hash C824D392 + input buffer #14: + timeUs = 1000000617000 + contents = length 418, hash C167D872 + input buffer #15: + timeUs = 1000000652000 + contents = length 418, hash 4221C855 + input buffer #16: + timeUs = 1000000687000 + contents = length 418, hash 4D4FF934 + input buffer #17: + timeUs = 1000000721829 + contents = length 418, hash 984AA025 + input buffer #18: + timeUs = 1000000757000 + contents = length 418, hash BB788B46 + input buffer #19: + timeUs = 1000000791000 + contents = length 418, hash 9EFBFD97 + input buffer #20: + timeUs = 1000000826000 + contents = length 418, hash DF1A460C + input buffer #21: + timeUs = 1000000860829 + contents = length 418, hash 2BDB56A + input buffer #22: + timeUs = 1000000896000 + contents = length 418, hash CA230060 + input buffer #23: + timeUs = 1000000931000 + contents = length 418, hash D2F19F41 + input buffer #24: + timeUs = 1000000965000 + contents = length 418, hash AF392D79 + input buffer #25: + timeUs = 1000000999829 + contents = length 418, hash C5D7F2A3 + input buffer #26: + timeUs = 1000001035000 + contents = length 418, hash 733A35AE + input buffer #27: + timeUs = 1000001069829 + contents = length 418, hash DE46E5D3 + input buffer #28: + timeUs = 1000001104000 + contents = length 418, hash 56AB8D37 + input buffer #29: + timeUs = 0 + flags = 4 + contents = length 0, hash 1 + outputBuffers: + count = 29 + output buffer #0: + timeUs = 1000000129000 + size = 0 + rendered = false + output buffer #1: + timeUs = 1000000164000 + size = 0 + rendered = false + output buffer #2: + timeUs = 1000000198829 + size = 0 + rendered = false + output buffer #3: + timeUs = 1000000233000 + size = 0 + rendered = false + output buffer #4: + timeUs = 1000000268000 + size = 0 + rendered = false + output buffer #5: + timeUs = 1000000303000 + size = 0 + rendered = false + output buffer #6: + timeUs = 1000000337829 + size = 0 + rendered = false + output buffer #7: + timeUs = 1000000373000 + size = 0 + rendered = false + output buffer #8: + timeUs = 1000000408000 + size = 0 + rendered = false + output buffer #9: + timeUs = 1000000443000 + size = 0 + rendered = false + output buffer #10: + timeUs = 1000000477829 + size = 0 + rendered = false + output buffer #11: + timeUs = 1000000512000 + size = 0 + rendered = false + output buffer #12: + timeUs = 1000000547000 + size = 0 + rendered = false + output buffer #13: + timeUs = 1000000581829 + size = 0 + rendered = false + output buffer #14: + timeUs = 1000000617000 + size = 0 + rendered = false + output buffer #15: + timeUs = 1000000652000 + size = 0 + rendered = false + output buffer #16: + timeUs = 1000000687000 + size = 0 + rendered = false + output buffer #17: + timeUs = 1000000721829 + size = 0 + rendered = false + output buffer #18: + timeUs = 1000000757000 + size = 0 + rendered = false + output buffer #19: + timeUs = 1000000791000 + size = 0 + rendered = false + output buffer #20: + timeUs = 1000000826000 + size = 0 + rendered = false + output buffer #21: + timeUs = 1000000860829 + size = 0 + rendered = false + output buffer #22: + timeUs = 1000000896000 + size = 0 + rendered = false + output buffer #23: + timeUs = 1000000931000 + size = 0 + rendered = false + output buffer #24: + timeUs = 1000000965000 + size = 0 + rendered = false + output buffer #25: + timeUs = 1000000999829 + size = 0 + rendered = false + output buffer #26: + timeUs = 1000001035000 + size = 0 + rendered = false + output buffer #27: + timeUs = 1000001069829 + size = 0 + rendered = false + output buffer #28: + timeUs = 1000001104000 + size = 0 + rendered = false +MediaCodecAdapter (exotest.video.avc): + inputBuffers: + count = 31 + input buffer #0: + timeUs = 1000000067000 + contents = length 36477, hash F0F36CFE + input buffer #1: + timeUs = 1000000134000 + contents = length 5341, hash 40B85E2 + input buffer #2: + timeUs = 1000000100000 + contents = length 596, hash 357B4D92 + input buffer #3: + timeUs = 1000000267000 + contents = length 7704, hash A39EDA06 + input buffer #4: + timeUs = 1000000200000 + contents = length 989, hash 2813C72D + input buffer #5: + timeUs = 1000000167000 + contents = length 721, hash C50D1C73 + input buffer #6: + timeUs = 1000000234000 + contents = length 519, hash 65FE1911 + input buffer #7: + timeUs = 1000000400000 + contents = length 6160, hash E1CAC0EC + input buffer #8: + timeUs = 1000000334000 + contents = length 953, hash 7160C661 + input buffer #9: + timeUs = 1000000300000 + contents = length 620, hash 7A7AE07C + input buffer #10: + timeUs = 1000000367000 + contents = length 405, hash 5CC7F4E7 + input buffer #11: + timeUs = 1000000500000 + contents = length 4852, hash 9DB6979D + input buffer #12: + timeUs = 1000000467000 + contents = length 547, hash E31A6979 + input buffer #13: + timeUs = 1000000434000 + contents = length 570, hash FEC40D00 + input buffer #14: + timeUs = 1000000634000 + contents = length 5525, hash 7C478F7E + input buffer #15: + timeUs = 1000000567000 + contents = length 1082, hash DA07059A + input buffer #16: + timeUs = 1000000534000 + contents = length 807, hash 93478E6B + input buffer #17: + timeUs = 1000000600000 + contents = length 744, hash 9A8E6026 + input buffer #18: + timeUs = 1000000767000 + contents = length 4732, hash C73B23C0 + input buffer #19: + timeUs = 1000000700000 + contents = length 1004, hash 8A19A228 + input buffer #20: + timeUs = 1000000667000 + contents = length 794, hash 8126022C + input buffer #21: + timeUs = 1000000734000 + contents = length 645, hash F08300E5 + input buffer #22: + timeUs = 1000000900000 + contents = length 2684, hash 727FE378 + input buffer #23: + timeUs = 1000000834000 + contents = length 787, hash 419A7821 + input buffer #24: + timeUs = 1000000800000 + contents = length 649, hash 5C159346 + input buffer #25: + timeUs = 1000000867000 + contents = length 509, hash F912D655 + input buffer #26: + timeUs = 1000001034000 + contents = length 1226, hash 29815C21 + input buffer #27: + timeUs = 1000000967000 + contents = length 898, hash D997AD0A + input buffer #28: + timeUs = 1000000934000 + contents = length 476, hash A0423645 + input buffer #29: + timeUs = 1000001000000 + contents = length 486, hash DDF32CBB + input buffer #30: + timeUs = 0 + flags = 4 + contents = length 0, hash 1 + outputBuffers: + count = 30 + output buffer #0: + timeUs = 1000000067000 + size = 36477 + rendered = true + output buffer #1: + timeUs = 1000000134000 + size = 5341 + rendered = true + output buffer #2: + timeUs = 1000000100000 + size = 596 + rendered = true + output buffer #3: + timeUs = 1000000267000 + size = 7704 + rendered = true + output buffer #4: + timeUs = 1000000200000 + size = 989 + rendered = true + output buffer #5: + timeUs = 1000000167000 + size = 721 + rendered = true + output buffer #6: + timeUs = 1000000234000 + size = 519 + rendered = true + output buffer #7: + timeUs = 1000000400000 + size = 6160 + rendered = true + output buffer #8: + timeUs = 1000000334000 + size = 953 + rendered = true + output buffer #9: + timeUs = 1000000300000 + size = 620 + rendered = true + output buffer #10: + timeUs = 1000000367000 + size = 405 + rendered = true + output buffer #11: + timeUs = 1000000500000 + size = 4852 + rendered = true + output buffer #12: + timeUs = 1000000467000 + size = 547 + rendered = true + output buffer #13: + timeUs = 1000000434000 + size = 570 + rendered = true + output buffer #14: + timeUs = 1000000634000 + size = 5525 + rendered = true + output buffer #15: + timeUs = 1000000567000 + size = 1082 + rendered = true + output buffer #16: + timeUs = 1000000534000 + size = 807 + rendered = true + output buffer #17: + timeUs = 1000000600000 + size = 744 + rendered = true + output buffer #18: + timeUs = 1000000767000 + size = 4732 + rendered = true + output buffer #19: + timeUs = 1000000700000 + size = 1004 + rendered = true + output buffer #20: + timeUs = 1000000667000 + size = 794 + rendered = true + output buffer #21: + timeUs = 1000000734000 + size = 645 + rendered = true + output buffer #22: + timeUs = 1000000900000 + size = 2684 + rendered = true + output buffer #23: + timeUs = 1000000834000 + size = 787 + rendered = true + output buffer #24: + timeUs = 1000000800000 + size = 649 + rendered = true + output buffer #25: + timeUs = 1000000867000 + size = 509 + rendered = true + output buffer #26: + timeUs = 1000001034000 + size = 1226 + rendered = true + output buffer #27: + timeUs = 1000000967000 + size = 898 + rendered = true + output buffer #28: + timeUs = 1000000934000 + size = 476 + rendered = true + output buffer #29: + timeUs = 1000001000000 + size = 486 + rendered = true +AudioSink: + buffer count = 29 + config: + pcmEncoding = 2 + channelCount = 1 + sampleRate = 44100 + buffer #0: + time = 1000000129000 + data = 1 + buffer #1: + time = 1000000164000 + data = 1 + buffer #2: + time = 1000000198829 + data = 1 + buffer #3: + time = 1000000233000 + data = 1 + buffer #4: + time = 1000000268000 + data = 1 + buffer #5: + time = 1000000303000 + data = 1 + buffer #6: + time = 1000000337829 + data = 1 + buffer #7: + time = 1000000373000 + data = 1 + buffer #8: + time = 1000000408000 + data = 1 + buffer #9: + time = 1000000443000 + data = 1 + buffer #10: + time = 1000000477829 + data = 1 + buffer #11: + time = 1000000512000 + data = 1 + buffer #12: + time = 1000000547000 + data = 1 + buffer #13: + time = 1000000581829 + data = 1 + buffer #14: + time = 1000000617000 + data = 1 + buffer #15: + time = 1000000652000 + data = 1 + buffer #16: + time = 1000000687000 + data = 1 + buffer #17: + time = 1000000721829 + data = 1 + buffer #18: + time = 1000000757000 + data = 1 + buffer #19: + time = 1000000791000 + data = 1 + buffer #20: + time = 1000000826000 + data = 1 + buffer #21: + time = 1000000860829 + data = 1 + buffer #22: + time = 1000000896000 + data = 1 + buffer #23: + time = 1000000931000 + data = 1 + buffer #24: + time = 1000000965000 + data = 1 + buffer #25: + time = 1000000999829 + data = 1 + buffer #26: + time = 1000001035000 + data = 1 + buffer #27: + time = 1000001069829 + data = 1 + buffer #28: + time = 1000001104000 + data = 1