diff --git a/format/mp4/boxes.go b/format/mp4/boxes.go index e388019f0..d7fbc2de5 100644 --- a/format/mp4/boxes.go +++ b/format/mp4/boxes.go @@ -591,13 +591,14 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) { size := d.FieldU32("size") dataFormat := d.FieldUTF8("type", 4, dataFormatNames, scalar.ActualTrimSpace) subType := "" - if t := ctx.currentTrack(); t != nil { - t.sampleDescriptions = append(t.sampleDescriptions, sampleDescription{ + track := ctx.currentTrack() + if track != nil { + track.sampleDescriptions = append(track.sampleDescriptions, sampleDescription{ dataFormat: dataFormat, }) - if t.seenHdlr { - subType = t.subType + if track.seenHdlr { + subType = track.subType } else { // TODO: seems to be ffmpeg mov.c, where is this documented in specs? // no hdlr box found, guess using dataFormat @@ -617,7 +618,6 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) { switch subType { case "soun", "vide": - version := d.FieldU16("version") d.FieldU16("revision_level") d.FieldU32("max_packet_size") // TODO: vendor for some subtype? @@ -626,9 +626,10 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) { case "soun": // AudioSampleEntry // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html#//apple_ref/doc/uid/TP40000939-CH205-SW1 + var numAudioChannels uint64 switch version { case 0: - d.FieldU16("num_audio_channels") + numAudioChannels = d.FieldU16("num_audio_channels") d.FieldU16("sample_size") d.FieldU16("compression_id") d.FieldU16("packet_size") @@ -637,7 +638,7 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) { decodeBoxes(ctx, d) } case 1: - d.FieldU16("num_audio_channels") + numAudioChannels = d.FieldU16("num_audio_channels") d.FieldU16("sample_size") d.FieldU16("compression_id") d.FieldU16("packet_size") @@ -657,7 +658,7 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) { d.FieldU32("always_65536") d.FieldU32("size_of_struct_only") d.FieldF64("audio_sample_rate") - d.FieldU32("num_audio_channels") + numAudioChannels = d.FieldU32("num_audio_channels") d.FieldU32("always_7f000000") d.FieldU32("const_bits_per_channel") d.FieldU32("format_specific_flags") @@ -669,6 +670,9 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) { default: d.FieldRawLen("data", d.BitsLeft()) } + if track != nil { + track.stsdNumAudioChannels = numAudioChannels + } case "vide": // VideoSampleEntry // TODO: version 0 and 1 same? @@ -1832,6 +1836,87 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) { d.FieldRawLen("uid", 128) } }) + case "pcmC": + d.FieldU8("version") + d.FieldU24("flags") + d.FieldU8("format_flags") + d.FieldU8("sample_size") + case "chnl": + version := d.FieldU8("version") + d.FieldU24("flags") + + if version == 0 { + hasObjects := false + hasChannels := false + d.FieldStruct("stream_structure", func(d *decode.D) { + d.FieldRawLen("unused", 6) + hasObjects = d.FieldBool("objects") + hasChannels = d.FieldBool("channels") + }) + if hasChannels { + definedLayout := d.FieldU8("defined_layout") + if definedLayout == 0 { + track := ctx.currentTrack() + if track == nil { + d.FieldRawLen("rest", d.BitsLeft()) + break + } + d.FieldArray("channels", func(d *decode.D) { + for i := 0; i < int(track.stsdNumAudioChannels); i++ { + d.FieldStruct("channel", func(d *decode.D) { + speakerPosition := d.FieldU8("speaker_position") + if speakerPosition == 126 { + d.FieldS16("azimuth") + d.FieldS8("elevation") + } + }) + } + }) + } else { + d.FieldU64("omitted_channels_map") + } + } + if hasObjects { + d.FieldU8("object_count") + } + } else { + hasChannels := false + d.FieldStruct("stream_structure", func(d *decode.D) { + d.FieldRawLen("unused", 2) + d.FieldBool("objects") + hasChannels = d.FieldBool("channels") + }) + d.FieldU4("format_ordering") + d.FieldU8("base_channel_count") + if hasChannels { + definedLayout := d.FieldU8("defined_layout") + if definedLayout == 0 { + layoutChannelCount := d.FieldU8("layout_channel_count") + d.FieldArray("channels", func(d *decode.D) { + for i := 0; i < int(layoutChannelCount); i++ { + d.FieldStruct("channel", func(d *decode.D) { + speakerPosition := d.FieldU8("speaker_position") + if speakerPosition == 126 { + d.FieldS16("azimuth") + d.FieldS8("elevation") + } + }) + } + }) + } else { + d.FieldRawLen("reserved", 4) + d.FieldU3("channel_order_definition") + omittedChannelsPresent := d.FieldBool("omitted_channels_present") + if omittedChannelsPresent { + d.FieldU64("omitted_channels_map") + } + } + } + // if hasObjects { + // // ISO/IEC 14496-12:2022: + // // > object_count is derived from baseChannelCount + // } + } default: // there are at least 4 ways to encode udta metadata in mov/mp4 files. diff --git a/format/mp4/mp4.go b/format/mp4/mp4.go index 30f36ea0d..33241c2b2 100644 --- a/format/mp4/mp4.go +++ b/format/mp4/mp4.go @@ -136,20 +136,21 @@ type stsz struct { } type track struct { - seenHdlr bool - fragment bool - id int - sampleDescriptions []sampleDescription - subType string - stco []int64 - stsc []stsc - stsz []stsz - formatInArg any - objectType int // if data format is "mp4a" - defaultIVSize int - moofs []*moof // for fmp4 - dref bool - drefURL string + seenHdlr bool + fragment bool + id int + sampleDescriptions []sampleDescription + subType string + stco []int64 + stsc []stsc + stsz []stsz + formatInArg any + objectType int // if data format is "mp4a" + defaultIVSize int + moofs []*moof // for fmp4 + dref bool + drefURL string + stsdNumAudioChannels uint64 } type pathEntry struct { diff --git a/format/mp4/testdata/chnl-ver1 b/format/mp4/testdata/chnl-ver1 new file mode 100644 index 000000000..ab871ee98 Binary files /dev/null and b/format/mp4/testdata/chnl-ver1 differ diff --git a/format/mp4/testdata/chnl-ver1.fqtest b/format/mp4/testdata/chnl-ver1.fqtest new file mode 100644 index 000000000..a284f3963 --- /dev/null +++ b/format/mp4/testdata/chnl-ver1.fqtest @@ -0,0 +1,18 @@ +$ fq -o force=true -d mp4 dv chnl-ver1 + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: chnl-ver1 (mp4) 0x0-0x10 (16) + | | | boxes[0:1]: 0x0-0x10 (16) + | | | [0]{}: box 0x0-0x10 (16) +0x00|00 00 00 10 |.... | size: 16 0x0-0x4 (4) +0x00| 63 68 6e 6c | chnl | type: "chnl" 0x4-0x8 (4) +0x00| 01 | . | version: 1 0x8-0x9 (1) +0x00| 00 00 00 | ... | flags: 0 0x9-0xc (3) + | | | stream_structure{}: 0xc-0xc.4 (0.4) +0x00| 11 | . | unused: raw bits 0xc-0xc.2 (0.2) +0x00| 11 | . | objects: false 0xc.2-0xc.3 (0.1) +0x00| 11 | . | channels: true 0xc.3-0xc.4 (0.1) +0x00| 11 | . | format_ordering: 1 0xc.4-0xd (0.4) +0x00| 02 | . | base_channel_count: 2 0xd-0xe (1) +0x00| 02 | . | defined_layout: 2 0xe-0xf (1) +0x00| 00| .| reserved: raw bits 0xf-0xf.4 (0.4) +0x00| 00| .| channel_order_definition: 0 0xf.4-0xf.7 (0.3) +0x00| 00| .| omitted_channels_present: false 0xf.7-0x10 (0.1) diff --git a/format/mp4/testdata/pcmC b/format/mp4/testdata/pcmC new file mode 100644 index 000000000..11c70cc25 Binary files /dev/null and b/format/mp4/testdata/pcmC differ diff --git a/format/mp4/testdata/pcmC.fqtest b/format/mp4/testdata/pcmC.fqtest new file mode 100644 index 000000000..2e2dc801f --- /dev/null +++ b/format/mp4/testdata/pcmC.fqtest @@ -0,0 +1,10 @@ +$ fq -o force=true -d mp4 dv pcmC + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: pcmC (mp4) 0x0-0xe (14) + | | | boxes[0:1]: 0x0-0xe (14) + | | | [0]{}: box 0x0-0xe (14) +0x0|00 00 00 0e |.... | size: 14 0x0-0x4 (4) +0x0| 70 63 6d 43 | pcmC | type: "pcmC" 0x4-0x8 (4) +0x0| 00 | . | version: 0 0x8-0x9 (1) +0x0| 00 00 00 | ... | flags: 0 0x9-0xc (3) +0x0| 01 | . | format_flags: 1 0xc-0xd (1) +0x0| 18| | .| | sample_size: 24 0xd-0xe (1)