@@ -11,30 +11,39 @@ import (
1111 "path"
1212 "sync"
1313
14+ "github.com/chocolatkey/gzran"
1415 "github.com/pkg/errors"
1516)
1617
1718type gozipArchiveEntry struct {
1819 file * zip.File
1920 minimizeReads bool
21+
22+ gi gzran.Index
23+ gm sync.Mutex
2024}
2125
22- func (e gozipArchiveEntry ) Path () string {
26+ func (e * gozipArchiveEntry ) Path () string {
2327 return path .Clean (e .file .Name )
2428}
2529
26- func (e gozipArchiveEntry ) Length () uint64 {
30+ func (e * gozipArchiveEntry ) Length () uint64 {
2731 return e .file .UncompressedSize64
2832}
2933
30- func (e gozipArchiveEntry ) CompressedLength () uint64 {
34+ func (e * gozipArchiveEntry ) CompressedLength () uint64 {
3135 if e .file .Method == zip .Store {
3236 return 0
3337 }
3438 return e .file .CompressedSize64
3539}
3640
37- func (e gozipArchiveEntry ) CompressedAs (compressionMethod CompressionMethod ) bool {
41+ func (e * gozipArchiveEntry ) CRC32Checksum () * uint32 {
42+ c := e .file .CRC32
43+ return & c
44+ }
45+
46+ func (e * gozipArchiveEntry ) CompressedAs (compressionMethod CompressionMethod ) bool {
3847 if compressionMethod != CompressionMethodDeflate {
3948 return false
4049 }
@@ -45,11 +54,11 @@ func (e gozipArchiveEntry) CompressedAs(compressionMethod CompressionMethod) boo
4554// It's especially useful when trying to stream the ZIP from a remote file, e.g.
4655// cloud storage. It's only enabled when trying to read the entire file and compression
4756// is enabled. Care needs to be taken to cover every edge case.
48- func (e gozipArchiveEntry ) couldMinimizeReads () bool {
57+ func (e * gozipArchiveEntry ) couldMinimizeReads () bool {
4958 return e .minimizeReads && e .CompressedLength () > 0
5059}
5160
52- func (e gozipArchiveEntry ) Read (start int64 , end int64 ) ([]byte , error ) {
61+ func (e * gozipArchiveEntry ) Read (start int64 , end int64 ) ([]byte , error ) {
5362 if end < start {
5463 return nil , errors .New ("range not satisfiable" )
5564 }
@@ -73,14 +82,60 @@ func (e gozipArchiveEntry) Read(start int64, end int64) ([]byte, error) {
7382 }
7483
7584 if minimizeReads {
76- compressedData := make ([]byte , e .file .CompressedSize64 )
77- _ , err := io .ReadFull (f , compressedData )
78- if err != nil {
79- return nil , err
85+ // If uncompressed size is smaller than 1MB, it's not worth
86+ // using deflate random access, because the state itself takes memory.
87+ // We also skip using zrand logic if the entire file is being requested,
88+ // becaus that means the client probably won't need a partial range
89+ if e .file .UncompressedSize64 < ZRandCutoff || (start == 0 && (end == 0 || end == int64 (e .file .UncompressedSize64 - 1 ))) {
90+ compressedData := make ([]byte , e .file .CompressedSize64 )
91+ _ , err := io .ReadFull (f , compressedData )
92+ if err != nil {
93+ return nil , err
94+ }
95+ frdr := flate .NewReader (bytes .NewReader (compressedData ))
96+ defer frdr .Close ()
97+ f = frdr
98+ } else {
99+ e .gm .Lock ()
100+ var lastCompressedOffset int64
101+ for _ , v := range e .gi {
102+ if v .CompressedOffset > lastCompressedOffset && v .UncompressedOffset <= start {
103+ lastCompressedOffset = v .CompressedOffset
104+ }
105+ }
106+ e .gm .Unlock ()
107+
108+ compressedData := make ([]byte , e .file .CompressedSize64 )
109+ f .(io.Seeker ).Seek (lastCompressedOffset , io .SeekStart )
110+ _ , err := io .ReadFull (f , compressedData [lastCompressedOffset :])
111+ if err != nil {
112+ return nil , err
113+ }
114+
115+ // This special reader lets us restore the decompressor state at known offsets
116+ // which is useful when a client has already requested previous parts of the file,
117+ // such as when a web browser requests subsequent byte ranges for media playback.
118+ fzr , err := gzran .NewDReader (bytes .NewReader (compressedData )) // Default interval = 1MB, same as current ZRandCutoff
119+ if err != nil {
120+ return nil , err
121+ }
122+ // Note: if an implementor uses the same publication instance for all clients,
123+ // this code will lock all clients. This could be problematic and should be
124+ // mitigated in a future version. For us to get this far is pretty rare though,
125+ // and mainly applies to multimedia that is natively streamed by web browsers and
126+ // was also inconveniently compressed by the original author of the ZIP.
127+ e .gm .Lock ()
128+ defer e .gm .Unlock ()
129+ defer func () {
130+ e .gi = fzr .Index
131+ }()
132+ defer fzr .Close ()
133+ if len (e .gi ) > 0 {
134+ fzr .Index = e .gi
135+ }
136+
137+ f = fzr
80138 }
81- frdr := flate .NewReader (bytes .NewReader (compressedData ))
82- defer frdr .Close ()
83- f = frdr
84139 }
85140
86141 if start == 0 && end == 0 {
@@ -92,21 +147,25 @@ func (e gozipArchiveEntry) Read(start int64, end int64) ([]byte, error) {
92147 return data , nil
93148 }
94149 if start > 0 {
95- _ , err := io .CopyN (io .Discard , f , start )
150+ if skr , ok := f .(io.Seeker ); ok {
151+ _ , err = skr .Seek (start , io .SeekStart )
152+ } else {
153+ _ , err = io .CopyN (io .Discard , f , start )
154+ }
96155 if err != nil {
97156 return nil , err
98157 }
99158 }
100159 data := make ([]byte , end - start + 1 )
101- n , err := f . Read ( data )
102- if n > 0 && err == io .EOF {
160+ n , err := io . ReadFull ( f , data )
161+ if n > 0 && err == io .ErrUnexpectedEOF {
103162 // Not EOF error if some data was read
104163 err = nil
105164 }
106165 return data [:n ], err
107166}
108167
109- func (e gozipArchiveEntry ) Stream (w io.Writer , start int64 , end int64 ) (int64 , error ) {
168+ func (e * gozipArchiveEntry ) Stream (w io.Writer , start int64 , end int64 ) (int64 , error ) {
110169 if end < start {
111170 return - 1 , errors .New ("range not satisfiable" )
112171 }
@@ -157,7 +216,7 @@ func (e gozipArchiveEntry) Stream(w io.Writer, start int64, end int64) (int64, e
157216 return n , err
158217}
159218
160- func (e gozipArchiveEntry ) StreamCompressed (w io.Writer ) (int64 , error ) {
219+ func (e * gozipArchiveEntry ) StreamCompressed (w io.Writer ) (int64 , error ) {
161220 if e .file .Method != zip .Deflate {
162221 return - 1 , errors .New ("not a compressed resource" )
163222 }
@@ -169,7 +228,7 @@ func (e gozipArchiveEntry) StreamCompressed(w io.Writer) (int64, error) {
169228 return io .Copy (w , f )
170229}
171230
172- func (e gozipArchiveEntry ) StreamCompressedGzip (w io.Writer ) (int64 , error ) {
231+ func (e * gozipArchiveEntry ) StreamCompressedGzip (w io.Writer ) (int64 , error ) {
173232 if e .file .Method != zip .Deflate {
174233 return - 1 , errors .New ("not a compressed resource" )
175234 }
@@ -205,7 +264,7 @@ func (e gozipArchiveEntry) StreamCompressedGzip(w io.Writer) (int64, error) {
205264 return int64 (n ) + nn + int64 (nnn ), nil
206265}
207266
208- func (e gozipArchiveEntry ) ReadCompressed () ([]byte , error ) {
267+ func (e * gozipArchiveEntry ) ReadCompressed () ([]byte , error ) {
209268 if e .file .Method != zip .Deflate {
210269 return nil , errors .New ("not a compressed resource" )
211270 }
@@ -223,7 +282,7 @@ func (e gozipArchiveEntry) ReadCompressed() ([]byte, error) {
223282 return compressedData , nil
224283}
225284
226- func (e gozipArchiveEntry ) ReadCompressedGzip () ([]byte , error ) {
285+ func (e * gozipArchiveEntry ) ReadCompressedGzip () ([]byte , error ) {
227286 if e .file .Method != zip .Deflate {
228287 return nil , errors .New ("not a compressed resource" )
229288 }
@@ -280,7 +339,7 @@ func (a *gozipArchive) Entries() []Entry {
280339
281340 aentry , ok := a .cachedEntries .Load (f .Name )
282341 if ! ok {
283- aentry = gozipArchiveEntry {
342+ aentry = & gozipArchiveEntry {
284343 file : f ,
285344 minimizeReads : a .minimizeReads ,
286345 }
@@ -307,7 +366,7 @@ func (a *gozipArchive) Entry(p string) (Entry, error) {
307366 for _ , f := range a .zip .File {
308367 fp := path .Clean (f .Name )
309368 if fp == cpath {
310- aentry := gozipArchiveEntry {
369+ aentry := & gozipArchiveEntry {
311370 file : f ,
312371 minimizeReads : a .minimizeReads ,
313372 }
0 commit comments