mp4: Decode pcmC and chnl box

wader · wader · commit 775713403f4c · 2025-02-17T13:08:06.000+01:00
diff --git a/format/mp4/boxes.go b/format/mp4/boxes.go
@@ -591,13 +591,14 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
 				size := d.FieldU32("size")
 				dataFormat := d.FieldUTF8("type", 4, dataFormatNames, scalar.ActualTrimSpace)
 				subType := ""
-				if t := ctx.currentTrack(); t != nil {
-					t.sampleDescriptions = append(t.sampleDescriptions, sampleDescription{
+				track := ctx.currentTrack()
+				if track != nil {
+					track.sampleDescriptions = append(track.sampleDescriptions, sampleDescription{
 						dataFormat: dataFormat,
 					})
 
-					if t.seenHdlr {
-						subType = t.subType
+					if track.seenHdlr {
+						subType = track.subType
 					} else {
 						// TODO: seems to be ffmpeg mov.c, where is this documented in specs?
 						// no hdlr box found, guess using dataFormat
@@ -617,7 +618,6 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
 
 					switch subType {
 					case "soun", "vide":
-
 						version := d.FieldU16("version")
 						d.FieldU16("revision_level")
 						d.FieldU32("max_packet_size") // TODO: vendor for some subtype?
@@ -626,9 +626,10 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
 						case "soun":
 							// AudioSampleEntry
 							// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html#//apple_ref/doc/uid/TP40000939-CH205-SW1
+							var numAudioChannels uint64
 							switch version {
 							case 0:
-								d.FieldU16("num_audio_channels")
+								numAudioChannels = d.FieldU16("num_audio_channels")
 								d.FieldU16("sample_size")
 								d.FieldU16("compression_id")
 								d.FieldU16("packet_size")
@@ -637,7 +638,7 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
 									decodeBoxes(ctx, d)
 								}
 							case 1:
-								d.FieldU16("num_audio_channels")
+								numAudioChannels = d.FieldU16("num_audio_channels")
 								d.FieldU16("sample_size")
 								d.FieldU16("compression_id")
 								d.FieldU16("packet_size")
@@ -657,7 +658,7 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
 								d.FieldU32("always_65536")
 								d.FieldU32("size_of_struct_only")
 								d.FieldF64("audio_sample_rate")
-								d.FieldU32("num_audio_channels")
+								numAudioChannels = d.FieldU32("num_audio_channels")
 								d.FieldU32("always_7f000000")
 								d.FieldU32("const_bits_per_channel")
 								d.FieldU32("format_specific_flags")
@@ -669,6 +670,9 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
 							default:
 								d.FieldRawLen("data", d.BitsLeft())
 							}
+							if track != nil {
+								track.stsdNumAudioChannels = numAudioChannels
+							}
 						case "vide":
 							// VideoSampleEntry
 							// TODO: version 0 and 1 same?
@@ -1832,6 +1836,88 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
 				d.FieldRawLen("uid", 128)
 			}
 		})
+	case "pcmC":
+		d.FieldU8("version")
+		d.FieldU24("flags")
+		d.FieldU8("format_flags")
+		d.FieldU8("sample_size")
+	case "chnl":
+		version := d.FieldU8("version")
+		d.FieldU24("flags")
+
+		if version == 0 {
+			hasObjects := false
+			hasChannels := false
+			d.FieldStruct("stream_structure", func(d *decode.D) {
+				d.FieldRawLen("unused", 6)
+				hasObjects = d.FieldBool("objects")
+				hasChannels = d.FieldBool("channels")
+			})
+			if hasChannels {
+				definedLayout := d.FieldU8("defined_layout")
+				if definedLayout == 0 {
+					track := ctx.currentTrack()
+					if track == nil {
+						d.FieldRawLen("rest", d.BitsLeft())
+						break
+					}
+					d.FieldArray("channels", func(d *decode.D) {
+						for i := 0; i < int(track.stsdNumAudioChannels); i++ {
+							d.FieldStruct("channel", func(d *decode.D) {
+								speakerPosition := d.FieldU8("speaker_position")
+								if speakerPosition == 126 {
+									d.FieldS16("azimuth")
+									d.FieldS8("elevation")
+								}
+							})
+						}
+					})
+				} else {
+					d.FieldU64("omitted_channels_map")
+				}
+			}
+			if hasObjects {
+				d.FieldU8("object_count")
+			}
+		} else {
+			hasObjects := false
+			hasChannels := false
+			d.FieldStruct("stream_structure", func(d *decode.D) {
+				d.FieldRawLen("unused", 2)
+				hasObjects = d.FieldBool("objects")
+				hasChannels = d.FieldBool("channels")
+			})
+			d.FieldU4("format_ordering")
+			d.FieldU8("base_channel_count")
+			if hasChannels {
+				definedLayout := d.FieldU8("defined_layout")
+				if definedLayout == 0 {
+					layoutChannelCount := d.FieldU8("layout_channel_count")
+					d.FieldArray("channels", func(d *decode.D) {
+						for i := 0; i < int(layoutChannelCount); i++ {
+							d.FieldStruct("channel", func(d *decode.D) {
+								speakerPosition := d.FieldU8("speaker_position")
+								if speakerPosition == 126 {
+									d.FieldS16("azimuth")
+									d.FieldS8("elevation")
+								}
+							})
+						}
+					})
+				} else {
+					d.FieldRawLen("reserved", 4)
+					d.FieldU3("channel_order_definition")
+					omittedChannelsPresent := d.FieldBool("omitted_channels_present")
+					if omittedChannelsPresent {
+						d.FieldU64("omitted_channels_map")
+					}
+				}
+			}
+			if hasObjects {
+				// ISO/IEC 14496-12:2022:
+				// > object_count is derived from baseChannelCount
+			}
+		}
 
 	default:
 		// there are at least 4 ways to encode udta metadata in mov/mp4 files.
diff --git a/format/mp4/mp4.go b/format/mp4/mp4.go
@@ -136,20 +136,21 @@ type stsz struct {
 }
 
 type track struct {
-	seenHdlr           bool
-	fragment           bool
-	id                 int
-	sampleDescriptions []sampleDescription
-	subType            string
-	stco               []int64
-	stsc               []stsc
-	stsz               []stsz
-	formatInArg        any
-	objectType         int // if data format is "mp4a"
-	defaultIVSize      int
-	moofs              []*moof // for fmp4
-	dref               bool
-	drefURL            string
+	seenHdlr             bool
+	fragment             bool
+	id                   int
+	sampleDescriptions   []sampleDescription
+	subType              string
+	stco                 []int64
+	stsc                 []stsc
+	stsz                 []stsz
+	formatInArg          any
+	objectType           int // if data format is "mp4a"
+	defaultIVSize        int
+	moofs                []*moof // for fmp4
+	dref                 bool
+	drefURL              string
+	stsdNumAudioChannels uint64
 }
 
 type pathEntry struct {
diff --git a/format/mp4/testdata/chnl-ver1 b/format/mp4/testdata/chnl-ver1
diff --git a/format/mp4/testdata/chnl-ver1.fqtest b/format/mp4/testdata/chnl-ver1.fqtest
@@ -0,0 +1,18 @@
+$ fq -o force=true -d mp4 dv chnl-ver1
+    |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: chnl-ver1 (mp4) 0x0-0x10 (16)
+    |                                               |                |  boxes[0:1]: 0x0-0x10 (16)
+    |                                               |                |    [0]{}: box 0x0-0x10 (16)
+0x00|00 00 00 10                                    |....            |      size: 16 0x0-0x4 (4)
+0x00|            63 68 6e 6c                        |    chnl        |      type: "chnl" 0x4-0x8 (4)
+0x00|                        01                     |        .       |      version: 1 0x8-0x9 (1)
+0x00|                           00 00 00            |         ...    |      flags: 0 0x9-0xc (3)
+    |                                               |                |      stream_structure{}: 0xc-0xc.4 (0.4)
+0x00|                                    11         |            .   |        unused: raw bits 0xc-0xc.2 (0.2)
+0x00|                                    11         |            .   |        objects: false 0xc.2-0xc.3 (0.1)
+0x00|                                    11         |            .   |        channels: true 0xc.3-0xc.4 (0.1)
+0x00|                                    11         |            .   |      format_ordering: 1 0xc.4-0xd (0.4)
+0x00|                                       02      |             .  |      base_channel_count: 2 0xd-0xe (1)
+0x00|                                          02   |              . |      defined_layout: 2 0xe-0xf (1)
+0x00|                                             00|               .|      reserved: raw bits 0xf-0xf.4 (0.4)
+0x00|                                             00|               .|      channel_order_definition: 0 0xf.4-0xf.7 (0.3)
+0x00|                                             00|               .|      omitted_channels_present: false 0xf.7-0x10 (0.1)
diff --git a/format/mp4/testdata/pcmC b/format/mp4/testdata/pcmC
diff --git a/format/mp4/testdata/pcmC.fqtest b/format/mp4/testdata/pcmC.fqtest
@@ -0,0 +1,10 @@
+$ fq -o force=true -d mp4 dv pcmC
+   |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: pcmC (mp4) 0x0-0xe (14)
+   |                                               |                |  boxes[0:1]: 0x0-0xe (14)
+   |                                               |                |    [0]{}: box 0x0-0xe (14)
+0x0|00 00 00 0e                                    |....            |      size: 14 0x0-0x4 (4)
+0x0|            70 63 6d 43                        |    pcmC        |      type: "pcmC" 0x4-0x8 (4)
+0x0|                        00                     |        .       |      version: 0 0x8-0x9 (1)
+0x0|                           00 00 00            |         ...    |      flags: 0 0x9-0xc (3)
+0x0|                                    01         |            .   |      format_flags: 1 0xc-0xd (1)
+0x0|                                       18|     |             .| |      sample_size: 24 0xd-0xe (1)