From 6d247017bd353027ef809bcaa825363c1c14a75d Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Sun, 3 Mar 2024 13:32:34 -1000 Subject: [PATCH 01/12] Implement Base.one for StringView I have a lot of `S where S<:AbstractString` code which uses `one(S)` for the empty string to compare against. I didn't realize until I went to add this that `typemin` also works, but hey, that makes the implementation that much easier, and StringViews gives me a zero-copy drop-in replacement for a string buffer with this addition. --- src/StringViews.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/StringViews.jl b/src/StringViews.jl index d72c428..2ff9191 100644 --- a/src/StringViews.jl +++ b/src/StringViews.jl @@ -86,6 +86,7 @@ Base.:(==)(s1::StringViewAndSub, s2::StringAndSub) = s2 == s1 Base.typemin(::Type{StringView{Vector{UInt8}}}) = StringView(Vector{UInt8}(undef,0)) Base.typemin(::T) where {T<:StringView} = typemin(T) +Base.one(::T) where {T<:StringView} = typemin(T) if VERSION < v"1.10.0-DEV.1007" # JuliaLang/julia#47880 Base.isvalid(s::DenseStringViewAndSub) = ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s)) ≠ 0 From 1a22be907d128f60acccdf59939a8817f580abec Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Mon, 4 Mar 2024 14:32:43 -1000 Subject: [PATCH 02/12] Update runtests.jl And a test for identity equality --- test/runtests.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index c0eaeca..6dde0fc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -158,6 +158,8 @@ end @test Base.typemin(s) isa StringView{Vector{UInt8}} @test Base.typemin(s) == "" + @test Base.one(s) == Base.typemin(s) == "" + @test isascii(s) @test !isascii(StringView("fööbār")) From 9f4766d4adda0faee30e8e6d15ec5c6928c69e2a Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Thu, 7 Mar 2024 10:17:21 -1000 Subject: [PATCH 03/12] Union type signature for Base.one Co-authored-by: Steven G. Johnson --- src/StringViews.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StringViews.jl b/src/StringViews.jl index 2ff9191..33297b7 100644 --- a/src/StringViews.jl +++ b/src/StringViews.jl @@ -86,7 +86,7 @@ Base.:(==)(s1::StringViewAndSub, s2::StringAndSub) = s2 == s1 Base.typemin(::Type{StringView{Vector{UInt8}}}) = StringView(Vector{UInt8}(undef,0)) Base.typemin(::T) where {T<:StringView} = typemin(T) -Base.one(::T) where {T<:StringView} = typemin(T) +Base.one(::Union{T,Type{T}}) where {T<:StringView} = typemin(T) if VERSION < v"1.10.0-DEV.1007" # JuliaLang/julia#47880 Base.isvalid(s::DenseStringViewAndSub) = ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s)) ≠ 0 From 766348ac4c626ce1bd2a8ae9d010453a6eff3788 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Thu, 7 Mar 2024 10:17:36 -1000 Subject: [PATCH 04/12] Update test/runtests.jl Co-authored-by: Steven G. Johnson --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 6dde0fc..fefc605 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -158,7 +158,7 @@ end @test Base.typemin(s) isa StringView{Vector{UInt8}} @test Base.typemin(s) == "" - @test Base.one(s) == Base.typemin(s) == "" + @test one(s) == one(typeof(s)) == typemin(s) == "" @test isascii(s) @test !isascii(StringView("fööbār")) From cbc0e6f62476d0cdb061bcb1e4bf8b06a00706d9 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Thu, 7 Mar 2024 10:19:05 -1000 Subject: [PATCH 05/12] Adds Base.oneunit Also defined for Strings --- src/StringViews.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/StringViews.jl b/src/StringViews.jl index 33297b7..fa6915f 100644 --- a/src/StringViews.jl +++ b/src/StringViews.jl @@ -87,6 +87,7 @@ Base.:(==)(s1::StringViewAndSub, s2::StringAndSub) = s2 == s1 Base.typemin(::Type{StringView{Vector{UInt8}}}) = StringView(Vector{UInt8}(undef,0)) Base.typemin(::T) where {T<:StringView} = typemin(T) Base.one(::Union{T,Type{T}}) where {T<:StringView} = typemin(T) +Base.oneunit(::Union{T,Type{T}}) where {T<:StringView} = typemin(T) if VERSION < v"1.10.0-DEV.1007" # JuliaLang/julia#47880 Base.isvalid(s::DenseStringViewAndSub) = ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s)) ≠ 0 From db46ed119fb6c61949e1be27d2e1db83af178ef2 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Thu, 7 Mar 2024 10:21:01 -1000 Subject: [PATCH 06/12] Test for oneunit --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index fefc605..07f69e8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -159,6 +159,7 @@ end @test Base.typemin(s) == "" @test one(s) == one(typeof(s)) == typemin(s) == "" + @test oneunit(s) == oneunit(typeof(s)) == one(s) == "" @test isascii(s) @test !isascii(StringView("fööbār")) From 5ca7fe37cd748d6945c1e0c5b81d17db59479ea7 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Mon, 18 Mar 2024 11:52:51 -0400 Subject: [PATCH 07/12] Adds convert for data type of StringView As well as a custom typemin for StringView(Base.CodeUnits{etc}}. This makes the Base implementation of typemin function for both s and su in the test suite, and I would consider returning the data field for an attempt to convert a StringView to its underlying data to be legitimate. --- .vscode/settings.json | 3 +++ Manifest.toml | 7 +++++++ src/StringViews.jl | 3 ++- test/runtests.jl | 4 +++- 4 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 Manifest.toml diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d7cc418 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "julia.environmentPath": "/Users/atman/orb/opp/StringViews.jl" +} \ No newline at end of file diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..66c946c --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,7 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.10.2" +manifest_format = "2.0" +project_hash = "b49e9b1483a70ece1f89d014cdbe39fe7ec7e18e" + +[deps] diff --git a/src/StringViews.jl b/src/StringViews.jl index fa6915f..0f48773 100644 --- a/src/StringViews.jl +++ b/src/StringViews.jl @@ -58,6 +58,7 @@ Base.unsafe_convert(::Type{Ptr{UInt8}}, s::DenseStringViewAndSub) = pointer(s) Base.unsafe_convert(::Type{Ptr{Int8}}, s::DenseStringViewAndSub) = convert(Ptr{Int8}, pointer(s)) Base.cconvert(::Type{Ptr{UInt8}}, s::DenseStringViewAndSub) = s Base.cconvert(::Type{Ptr{Int8}}, s::DenseStringViewAndSub) = s +Base.convert(::Type{T}, s::StringView{T}) where {T<:AbstractVector{UInt8}} = s.data Base.sizeof(s::StringView) = length(s.data) Base.ncodeunits(s::StringView) = length(s.data) @@ -85,9 +86,9 @@ end Base.:(==)(s1::StringViewAndSub, s2::StringAndSub) = s2 == s1 Base.typemin(::Type{StringView{Vector{UInt8}}}) = StringView(Vector{UInt8}(undef,0)) +Base.typemin(::Type{StringView{Base.CodeUnits{UInt8, String}}}) = StringView("") Base.typemin(::T) where {T<:StringView} = typemin(T) Base.one(::Union{T,Type{T}}) where {T<:StringView} = typemin(T) -Base.oneunit(::Union{T,Type{T}}) where {T<:StringView} = typemin(T) if VERSION < v"1.10.0-DEV.1007" # JuliaLang/julia#47880 Base.isvalid(s::DenseStringViewAndSub) = ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s)) ≠ 0 diff --git a/test/runtests.jl b/test/runtests.jl index 07f69e8..66c4e18 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -157,9 +157,11 @@ end @test Base.typemin(s) isa StringView{Vector{UInt8}} @test Base.typemin(s) == "" - @test one(s) == one(typeof(s)) == typemin(s) == "" @test oneunit(s) == oneunit(typeof(s)) == one(s) == "" + @test one(su) == one(typeof(su)) == typemin(su) == "" + @test oneunit(su) == oneunit(typeof(su)) == one(su) == "" + @test isascii(s) @test !isascii(StringView("fööbār")) From 08034b0c26536adaf8cb0d172ac646abc426e021 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Mon, 18 Mar 2024 13:08:50 -0400 Subject: [PATCH 08/12] Remove settings.json --- .vscode/settings.json | 3 --- test/runtests.jl | 1 - 2 files changed, 4 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index d7cc418..e69de29 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +0,0 @@ -{ - "julia.environmentPath": "/Users/atman/orb/opp/StringViews.jl" -} \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 66c4e18..200eb7c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -162,7 +162,6 @@ end @test one(su) == one(typeof(su)) == typemin(su) == "" @test oneunit(su) == oneunit(typeof(su)) == one(su) == "" - @test isascii(s) @test !isascii(StringView("fööbār")) From 3eb4ad993a1f37328fa39394f1361ec2536af192 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Mon, 18 Mar 2024 13:09:55 -0400 Subject: [PATCH 09/12] remove settings.json entirely :/ --- .vscode/settings.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index e69de29..0000000 From 64230661bcc99521618b1b16f601dba80da38c7a Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Tue, 26 Mar 2024 16:53:15 -0400 Subject: [PATCH 10/12] Removes Manifest.toml --- Manifest.toml | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index 66c946c..0000000 --- a/Manifest.toml +++ /dev/null @@ -1,7 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.10.2" -manifest_format = "2.0" -project_hash = "b49e9b1483a70ece1f89d014cdbe39fe7ec7e18e" - -[deps] From 83796dc108b4ab2b0a4c6f78a4c257ef68ddc3dd Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 27 Mar 2024 19:07:53 -0400 Subject: [PATCH 11/12] Remove convert, add StringViews{S} constructor --- Manifest.toml | 7 +++++++ src/StringViews.jl | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..66c946c --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,7 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.10.2" +manifest_format = "2.0" +project_hash = "b49e9b1483a70ece1f89d014cdbe39fe7ec7e18e" + +[deps] diff --git a/src/StringViews.jl b/src/StringViews.jl index 0f48773..20688ff 100644 --- a/src/StringViews.jl +++ b/src/StringViews.jl @@ -39,6 +39,7 @@ Base.Vector{UInt8}(s::StringViewAndSub) = Vector{UInt8}(codeunits(s)) Base.Array{UInt8}(s::StringViewAndSub) = Vector{UInt8}(s) Base.String(s::StringViewAndSub) = String(copyto!(Base.StringVector(ncodeunits(s)), codeunits(s))) StringView(s::StringView) = s +StringView{S}(s::StringView{S}) where {S<:AbstractVector{UInt8}} = s StringView(s::String) = StringView(codeunits(s)) # iobuffer constructor (note that buf.data is always 1-based) @@ -58,7 +59,6 @@ Base.unsafe_convert(::Type{Ptr{UInt8}}, s::DenseStringViewAndSub) = pointer(s) Base.unsafe_convert(::Type{Ptr{Int8}}, s::DenseStringViewAndSub) = convert(Ptr{Int8}, pointer(s)) Base.cconvert(::Type{Ptr{UInt8}}, s::DenseStringViewAndSub) = s Base.cconvert(::Type{Ptr{Int8}}, s::DenseStringViewAndSub) = s -Base.convert(::Type{T}, s::StringView{T}) where {T<:AbstractVector{UInt8}} = s.data Base.sizeof(s::StringView) = length(s.data) Base.ncodeunits(s::StringView) = length(s.data) From 185c6203c4af23c988fe93253d1d9acefcf6e57c Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Tue, 2 Apr 2024 13:22:51 -0400 Subject: [PATCH 12/12] Remove Manifest.toml --- Manifest.toml | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index 66c946c..0000000 --- a/Manifest.toml +++ /dev/null @@ -1,7 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.10.2" -manifest_format = "2.0" -project_hash = "b49e9b1483a70ece1f89d014cdbe39fe7ec7e18e" - -[deps]