@@ -591,13 +591,14 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
591
591
size := d .FieldU32 ("size" )
592
592
dataFormat := d .FieldUTF8 ("type" , 4 , dataFormatNames , scalar .ActualTrimSpace )
593
593
subType := ""
594
- if t := ctx .currentTrack (); t != nil {
595
- t .sampleDescriptions = append (t .sampleDescriptions , sampleDescription {
594
+ track := ctx .currentTrack ()
595
+ if track != nil {
596
+ track .sampleDescriptions = append (track .sampleDescriptions , sampleDescription {
596
597
dataFormat : dataFormat ,
597
598
})
598
599
599
- if t .seenHdlr {
600
- subType = t .subType
600
+ if track .seenHdlr {
601
+ subType = track .subType
601
602
} else {
602
603
// TODO: seems to be ffmpeg mov.c, where is this documented in specs?
603
604
// no hdlr box found, guess using dataFormat
@@ -617,7 +618,6 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
617
618
618
619
switch subType {
619
620
case "soun" , "vide" :
620
-
621
621
version := d .FieldU16 ("version" )
622
622
d .FieldU16 ("revision_level" )
623
623
d .FieldU32 ("max_packet_size" ) // TODO: vendor for some subtype?
@@ -626,9 +626,10 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
626
626
case "soun" :
627
627
// AudioSampleEntry
628
628
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html#//apple_ref/doc/uid/TP40000939-CH205-SW1
629
+ var numAudioChannels uint64
629
630
switch version {
630
631
case 0 :
631
- d .FieldU16 ("num_audio_channels" )
632
+ numAudioChannels = d .FieldU16 ("num_audio_channels" )
632
633
d .FieldU16 ("sample_size" )
633
634
d .FieldU16 ("compression_id" )
634
635
d .FieldU16 ("packet_size" )
@@ -637,7 +638,7 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
637
638
decodeBoxes (ctx , d )
638
639
}
639
640
case 1 :
640
- d .FieldU16 ("num_audio_channels" )
641
+ numAudioChannels = d .FieldU16 ("num_audio_channels" )
641
642
d .FieldU16 ("sample_size" )
642
643
d .FieldU16 ("compression_id" )
643
644
d .FieldU16 ("packet_size" )
@@ -657,7 +658,7 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
657
658
d .FieldU32 ("always_65536" )
658
659
d .FieldU32 ("size_of_struct_only" )
659
660
d .FieldF64 ("audio_sample_rate" )
660
- d .FieldU32 ("num_audio_channels" )
661
+ numAudioChannels = d .FieldU32 ("num_audio_channels" )
661
662
d .FieldU32 ("always_7f000000" )
662
663
d .FieldU32 ("const_bits_per_channel" )
663
664
d .FieldU32 ("format_specific_flags" )
@@ -669,6 +670,9 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
669
670
default :
670
671
d .FieldRawLen ("data" , d .BitsLeft ())
671
672
}
673
+ if track != nil {
674
+ track .stsdNumAudioChannels = numAudioChannels
675
+ }
672
676
case "vide" :
673
677
// VideoSampleEntry
674
678
// TODO: version 0 and 1 same?
@@ -1832,6 +1836,88 @@ func decodeBox(ctx *decodeContext, d *decode.D, typ string) {
1832
1836
d .FieldRawLen ("uid" , 128 )
1833
1837
}
1834
1838
})
1839
+ case "pcmC" :
1840
+ d .FieldU8 ("version" )
1841
+ d .FieldU24 ("flags" )
1842
+ d .FieldU8 ("format_flags" )
1843
+ d .FieldU8 ("sample_size" )
1844
+ case "chnl" :
1845
+ version := d .FieldU8 ("version" )
1846
+ d .FieldU24 ("flags" )
1847
+
1848
+ if version == 0 {
1849
+ hasObjects := false
1850
+ hasChannels := false
1851
+ d .FieldStruct ("stream_structure" , func (d * decode.D ) {
1852
+ d .FieldRawLen ("unused" , 6 )
1853
+ hasObjects = d .FieldBool ("objects" )
1854
+ hasChannels = d .FieldBool ("channels" )
1855
+ })
1856
+ if hasChannels {
1857
+ definedLayout := d .FieldU8 ("defined_layout" )
1858
+ if definedLayout == 0 {
1859
+ track := ctx .currentTrack ()
1860
+ if track == nil {
1861
+ d .FieldRawLen ("rest" , d .BitsLeft ())
1862
+ break
1863
+ }
1864
+ d .FieldArray ("channels" , func (d * decode.D ) {
1865
+ for i := 0 ; i < int (track .stsdNumAudioChannels ); i ++ {
1866
+ d .FieldStruct ("channel" , func (d * decode.D ) {
1867
+ speakerPosition := d .FieldU8 ("speaker_position" )
1868
+ if speakerPosition == 126 {
1869
+ d .FieldS16 ("azimuth" )
1870
+ d .FieldS8 ("elevation" )
1871
+ }
1872
+ })
1873
+ }
1874
+ })
1875
+ } else {
1876
+ d .FieldU64 ("omitted_channels_map" )
1877
+ }
1878
+ }
1879
+ if hasObjects {
1880
+ d .FieldU8 ("object_count" )
1881
+ }
1882
+ } else {
1883
+ hasObjects := false
1884
+ hasChannels := false
1885
+ d .FieldStruct ("stream_structure" , func (d * decode.D ) {
1886
+ d .FieldRawLen ("unused" , 2 )
1887
+ hasObjects = d .FieldBool ("objects" )
1888
+ hasChannels = d .FieldBool ("channels" )
1889
+ })
1890
+ d .FieldU4 ("format_ordering" )
1891
+ d .FieldU8 ("base_channel_count" )
1892
+ if hasChannels {
1893
+ definedLayout := d .FieldU8 ("defined_layout" )
1894
+ if definedLayout == 0 {
1895
+ layoutChannelCount := d .FieldU8 ("layout_channel_count" )
1896
+ d .FieldArray ("channels" , func (d * decode.D ) {
1897
+ for i := 0 ; i < int (layoutChannelCount ); i ++ {
1898
+ d .FieldStruct ("channel" , func (d * decode.D ) {
1899
+ speakerPosition := d .FieldU8 ("speaker_position" )
1900
+ if speakerPosition == 126 {
1901
+ d .FieldS16 ("azimuth" )
1902
+ d .FieldS8 ("elevation" )
1903
+ }
1904
+ })
1905
+ }
1906
+ })
1907
+ } else {
1908
+ d .FieldRawLen ("reserved" , 4 )
1909
+ d .FieldU3 ("channel_order_definition" )
1910
+ omittedChannelsPresent := d .FieldBool ("omitted_channels_present" )
1911
+ if omittedChannelsPresent {
1912
+ d .FieldU64 ("omitted_channels_map" )
1913
+ }
1914
+ }
1915
+ }
1916
+ if hasObjects {
1917
+ // ISO/IEC 14496-12:2022:
1918
+ // > object_count is derived from baseChannelCount
1919
+ }
1920
+ }
1835
1921
1836
1922
default :
1837
1923
// there are at least 4 ways to encode udta metadata in mov/mp4 files.
0 commit comments