@@ -567,10 +567,8 @@ VideoDecoder::FrameOutput VideoDecoder::getNextFrame() {
567
567
568
568
VideoDecoder::FrameOutput VideoDecoder::getNextFrameInternal (
569
569
std::optional<torch::Tensor> preAllocatedOutputTensor) {
570
- AVFrameStream avFrameStream = decodeAVFrame ([this ](AVFrame* avFrame) {
571
- StreamInfo& activeStreamInfo = streamInfos_[activeStreamIndex_];
572
- return avFrame->pts >= activeStreamInfo.discardFramesBeforePts ;
573
- });
570
+ AVFrameStream avFrameStream = decodeAVFrame (
571
+ [this ](AVFrame* avFrame) { return avFrame->pts >= cursor_; });
574
572
return convertAVFrameToFrameOutput (avFrameStream, preAllocatedOutputTensor);
575
573
}
576
574
@@ -842,7 +840,9 @@ VideoDecoder::FrameBatchOutput VideoDecoder::getFramesPlayedInRange(
842
840
// --------------------------------------------------------------------------
843
841
844
842
void VideoDecoder::setCursorPtsInSeconds (double seconds) {
845
- desiredPtsSeconds_ = seconds;
843
+ cursorWasJustSet_ = true ;
844
+ cursor_ =
845
+ secondsToClosestPts (seconds, streamInfos_[activeStreamIndex_].timeBase );
846
846
}
847
847
848
848
/*
@@ -870,25 +870,25 @@ I P P P I P P P I P P I P P I P
870
870
871
871
(2) is more efficient than (1) if there is an I frame between x and y.
872
872
*/
873
- bool VideoDecoder::canWeAvoidSeeking (int64_t targetPts ) const {
873
+ bool VideoDecoder::canWeAvoidSeeking () const {
874
874
int64_t lastDecodedAvFramePts =
875
875
streamInfos_.at (activeStreamIndex_).lastDecodedAvFramePts ;
876
- if (targetPts < lastDecodedAvFramePts) {
876
+ if (cursor_ < lastDecodedAvFramePts) {
877
877
// We can never skip a seek if we are seeking backwards.
878
878
return false ;
879
879
}
880
- if (lastDecodedAvFramePts == targetPts ) {
880
+ if (lastDecodedAvFramePts == cursor_ ) {
881
881
// We are seeking to the exact same frame as we are currently at. Without
882
882
// caching we have to rewind back and decode the frame again.
883
883
// TODO: https://github.com/pytorch-labs/torchcodec/issues/84 we could
884
884
// implement caching.
885
885
return false ;
886
886
}
887
887
// We are seeking forwards.
888
- // We can only skip a seek if both lastDecodedAvFramePts and targetPts share
889
- // the same keyframe.
888
+ // We can only skip a seek if both lastDecodedAvFramePts and
889
+ // cursor_ share the same keyframe.
890
890
int lastDecodedAvFrameIndex = getKeyFrameIndexForPts (lastDecodedAvFramePts);
891
- int targetKeyFrameIndex = getKeyFrameIndexForPts (targetPts );
891
+ int targetKeyFrameIndex = getKeyFrameIndexForPts (cursor_ );
892
892
return lastDecodedAvFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
893
893
lastDecodedAvFrameIndex == targetKeyFrameIndex;
894
894
}
@@ -900,16 +900,14 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
900
900
validateActiveStream (AVMEDIA_TYPE_VIDEO);
901
901
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
902
902
903
- int64_t desiredPts =
904
- secondsToClosestPts (*desiredPtsSeconds_, streamInfo.timeBase );
905
- streamInfo.discardFramesBeforePts = desiredPts;
906
-
907
903
decodeStats_.numSeeksAttempted ++;
908
- if (canWeAvoidSeeking (desiredPts )) {
904
+ if (canWeAvoidSeeking ()) {
909
905
decodeStats_.numSeeksSkipped ++;
910
906
return ;
911
907
}
912
908
909
+ int64_t desiredPts = cursor_;
910
+
913
911
// For some encodings like H265, FFMPEG sometimes seeks past the point we
914
912
// set as the max_ts. So we use our own index to give it the exact pts of
915
913
// the key frame that we want to seek to.
@@ -948,10 +946,9 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame(
948
946
949
947
resetDecodeStats ();
950
948
951
- // Seek if needed.
952
- if (desiredPtsSeconds_.has_value ()) {
949
+ if (cursorWasJustSet_) {
953
950
maybeSeekToBeforeDesiredPts ();
954
- desiredPtsSeconds_ = std::nullopt ;
951
+ cursorWasJustSet_ = false ;
955
952
}
956
953
957
954
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
0 commit comments