Skip to content

feat: implement Hash and HashSums to performing hashing based on user provided hashers #215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 2, 2024
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ Filters are methods on an existing pipe that also return a pipe, allowing you to
| [`First`](https://pkg.go.dev/github.com/bitfield/script#Pipe.First) | first N lines of input |
| [`Freq`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Freq) | frequency count of unique input lines, most frequent first |
| [`Get`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Get) | response to HTTP GET on supplied URL |
| [`HashSums`](https://pkg.go.dev/github.com/bitfield/script#Pipe.HashSums) | hashes of each listed file |
| [`Join`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Join) | replace all newlines with spaces |
| [`JQ`](https://pkg.go.dev/github.com/bitfield/script#Pipe.JQ) | result of `jq` query |
| [`Last`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Last) | last N lines of input|
Expand All @@ -340,6 +341,7 @@ Sinks are methods that return some data from a pipe, ending the pipeline and ext
| ---- | ----------- | ------- |
| [`AppendFile`](https://pkg.go.dev/github.com/bitfield/script#Pipe.AppendFile) | appended to file, creating if it doesn't exist | bytes written, error |
| [`Bytes`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Bytes) | | data as `[]byte`, error
| [`Hash`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Hash) | | hash, error |
| [`CountLines`](https://pkg.go.dev/github.com/bitfield/script#Pipe.CountLines) | |number of lines, error |
| [`Read`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Read) | given `[]byte` | bytes read, error |
| [`SHA256Sum`](https://pkg.go.dev/github.com/bitfield/script#Pipe.SHA256Sum) | | SHA-256 hash, error |
Expand Down
62 changes: 39 additions & 23 deletions script.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"hash"
"io"
"math"
"net/http"
Expand Down Expand Up @@ -650,6 +651,38 @@ func (p *Pipe) Get(url string) *Pipe {
return p.Do(req)
}

// Hash returns the hex-encoded hash of the entire contents of the
// pipe based on the provided hasher, or an error.
func (p *Pipe) Hash(hasher hash.Hash) (string, error) {
if p.Error() != nil {
return "", p.Error()
}
_, err := io.Copy(hasher, p)
if err != nil {
p.SetError(err)
return "", err
}
return hex.EncodeToString(hasher.Sum(nil)), nil
}

// HashSums reads paths from the pipe, one per line, and produces the
// hex-encoded hash of each corresponding file based on the provided hasher,
// one per line. Any files that cannot be opened or read will be ignored.
func (p *Pipe) HashSums(hasher hash.Hash) *Pipe {
return p.FilterScan(func(line string, w io.Writer) {
f, err := os.Open(line)
if err != nil {
return // skip unopenable files
}
defer f.Close()
_, err = io.Copy(hasher, f)
if err != nil {
return // skip unreadable files
}
fmt.Fprintln(w, hex.EncodeToString(hasher.Sum(nil)))
})
}

// Join joins all the lines in the pipe's contents into a single
// space-separated string, which will always end with a newline.
func (p *Pipe) Join() *Pipe {
Expand Down Expand Up @@ -816,36 +849,19 @@ func (p *Pipe) SetError(err error) {

// SHA256Sum returns the hex-encoded SHA-256 hash of the entire contents of the
// pipe, or an error.
// Deprecated: SHA256Sums has been deprecated by Hash. To get the SHA 256
// hash for the contents of the pipe, call `Hash(sha256.new())`
func (p *Pipe) SHA256Sum() (string, error) {
if p.Error() != nil {
return "", p.Error()
}
hasher := sha256.New()
_, err := io.Copy(hasher, p)
if err != nil {
p.SetError(err)
return "", err
}
return hex.EncodeToString(hasher.Sum(nil)), p.Error()
return p.Hash(sha256.New())
}

// SHA256Sums reads paths from the pipe, one per line, and produces the
// hex-encoded SHA-256 hash of each corresponding file, one per line. Any files
// that cannot be opened or read will be ignored.
// Deprecated: SHA256Sums has been deprecated by HashSums. To get the SHA 256
// encoding for the paths in the pipe, call `HashSums(sha256.new())`
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// encoding for the paths in the pipe, call `HashSums(sha256.new())`
// hash for each file path in the pipe, call `HashSums(sha256.new())`

func (p *Pipe) SHA256Sums() *Pipe {
return p.FilterScan(func(line string, w io.Writer) {
f, err := os.Open(line)
if err != nil {
return // skip unopenable files
}
defer f.Close()
h := sha256.New()
_, err = io.Copy(h, f)
if err != nil {
return // skip unreadable files
}
fmt.Fprintln(w, hex.EncodeToString(h.Sum(nil)))
})
return p.HashSums(sha256.New())
}

// Slice returns the pipe's contents as a slice of strings, one element per
Expand Down
117 changes: 116 additions & 1 deletion script_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ package script_test
import (
"bufio"
"bytes"
"crypto/sha256"
"crypto/sha512"
"errors"
"fmt"
"hash"
"io"
"log"
"net/http"
Expand Down Expand Up @@ -1127,7 +1130,7 @@ func TestSHA256Sums_OutputsCorrectHashForEachSpecifiedFile(t *testing.T) {
want string
}{
// To get the checksum run: openssl dgst -sha256 <file_name>
{"testdata/sha256Sum.input.txt", "1870478d23b0b4db37735d917f4f0ff9393dd3e52d8b0efa852ab85536ddad8e\n"},
{"testdata/hashSum.input.txt", "1870478d23b0b4db37735d917f4f0ff9393dd3e52d8b0efa852ab85536ddad8e\n"},
{"testdata/hello.txt", "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9\n"},
{"testdata/multiple_files", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\ne3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\ne3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"},
}
Expand Down Expand Up @@ -2013,6 +2016,100 @@ func TestWithStdErr_IsConcurrencySafeAfterExec(t *testing.T) {
}
}

func TestHash_OutputsCorrectHash(t *testing.T) {
t.Parallel()
tcs := []struct {
name, input, want string
hasher hash.Hash
}{
{
name: "for no data",
input: "",
hasher: sha256.New(),
want: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
},
{
name: "for short string with SHA 256 hasher",
input: "hello, world",
hasher: sha256.New(),
want: "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b",
},
{
name: "for short string with SHA 512 hasher",
input: "hello, world",
hasher: sha512.New(),
want: "8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9",
},
{
name: "for string containing newline with SHA 256 hasher",
input: "The tao that can be told\nis not the eternal Tao",
hasher: sha256.New(),
want: "788542cb92d37f67e187992bdb402fdfb68228a1802947f74c6576e04790a688",
},
}

for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
got, err := script.Echo(tc.input).Hash(tc.hasher)
if err != nil {
t.Fatal(err)
}
if got != tc.want {
t.Errorf("want %q, got %q", tc.want, got)
}
})
}
}

func TestHashSums_OutputsCorrectHashForEachSpecifiedFile(t *testing.T) {
t.Parallel()
tcs := []struct {
testFileName string
hasher hash.Hash
want string
}{
// To get the checksum run: openssl dgst -sha256 <file_name>
{
testFileName: "testdata/hashSum.input.txt",
hasher: sha256.New(),
want: "1870478d23b0b4db37735d917f4f0ff9393dd3e52d8b0efa852ab85536ddad8e\n",
},
{
testFileName: "testdata/hashSum.input.txt",
hasher: sha512.New(),
want: "3543bd0d68129e860598ccabcee1beb6bb90d91105cea74a8e555588634ec6f6d6d02033139972da2dc4929b1fb61bd24c91c8e82054e9ae865cf7f70909be8c\n",
},
{
testFileName: "testdata/hello.txt",
hasher: sha256.New(),
want: "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9\n",
},
{
testFileName: "testdata/multiple_files",
hasher: sha256.New(),
want: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\ne3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\ne3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n",
},
}
for _, tc := range tcs {
got, err := script.ListFiles(tc.testFileName).HashSums(tc.hasher).String()
if err != nil {
t.Fatal(err)
}
if got != tc.want {
t.Errorf("%q: want %q, got %q", tc.testFileName, tc.want, got)
}
}
}

func TestHash_ReturnsErrorGivenReadErrorOnPipe(t *testing.T) {
t.Parallel()
brokenReader := iotest.ErrReader(errors.New("oh no"))
_, err := script.NewPipe().WithReader(brokenReader).Hash(sha256.New())
if err == nil {
t.Fatal(nil)
}
}

func ExampleArgs() {
script.Args().Stdout()
// prints command-line arguments
Expand Down Expand Up @@ -2276,6 +2373,24 @@ func ExamplePipe_Get() {
// You said: hello
}

func ExamplePipe_Hash() {
sum, err := script.Echo("hello world").Hash(sha512.New())
if err != nil {
panic(err)
}
fmt.Println(sum)
// Output:
// 309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f
}

func ExamplePipe_HashSums() {
script.ListFiles("testdata/multiple_files").HashSums(sha256.New()).Stdout()
// Output:
// e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
// e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
// e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
}

func ExamplePipe_Join() {
script.Echo("hello\nworld\n").Join().Stdout()
// Output:
Expand Down
File renamed without changes.
Loading