Skip to content

Reorganize base/string.jl, base/utf*, test/strings.jl, test/unicode.jl #11925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 10, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions base/hashing2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,15 @@ end
## hashing Float16s ##

hash(x::Float16, h::UInt) = hash(Float64(x), h)

## hashing strings ##

const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81

function hash{T<:ByteString}(s::Union{T,SubString{T}}, h::UInt)
h += memhash_seed
# note: use pointer(s) here (see #6058).
ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), pointer(s), sizeof(s), h % UInt32) + h
end
hash(s::AbstractString, h::UInt) = hash(bytestring(s), h)
199 changes: 199 additions & 0 deletions base/parse.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

## string to integer functions ##

function parse{T<:Integer}(::Type{T}, c::Char, base::Integer=36)
a::Int = (base <= 36 ? 10 : 36)
2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
d = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : throw(ArgumentError("invalid digit: $(repr(c))"))
d < base || throw(ArgumentError("invalid base $base digit $(repr(c))"))
convert(T, d)
end

function parseint_next(s::AbstractString, startpos::Int, endpos::Int)
(0 < startpos <= endpos) || (return Char(0), 0, 0)
j = startpos
c, startpos = next(s,startpos)
c, startpos, j
end

function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos::Int, endpos::Int)
c, i, j = parseint_next(s, startpos, endpos)

while isspace(c)
c, i, j = parseint_next(s,i,endpos)
end
(j == 0) && (return 0, 0, 0)

sgn = 1
if signed
if c == '-' || c == '+'
(c == '-') && (sgn = -1)
c, i, j = parseint_next(s,i,endpos)
end
end

while isspace(c)
c, i, j = parseint_next(s,i,endpos)
end
(j == 0) && (return 0, 0, 0)

if base == 0
if c == '0' && !done(s,i)
c, i = next(s,i)
base = c=='b' ? 2 : c=='o' ? 8 : c=='x' ? 16 : 10
if base != 10
c, i, j = parseint_next(s,i,endpos)
end
else
base = 10
end
end
return sgn, base, j
end

function tryparse_internal{S<:ByteString}(::Type{Bool}, sbuff::S, startpos::Int, endpos::Int, raise::Bool)
len = endpos-startpos+1
p = pointer(sbuff)+startpos-1
(len == 4) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "true", 4)) && (return Nullable(true))
(len == 5) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "false", 5)) && (return Nullable(false))
raise && throw(ArgumentError("invalid Bool representation: $(repr(SubString(s,startpos,endpos)))"))
Nullable{Bool}()
end

safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
(n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)

function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, a::Int, raise::Bool)
_n = Nullable{T}()
sgn, base, i = parseint_preamble(T<:Signed, base, s, startpos, endpos)
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
return _n
end
c, i = parseint_next(s,i,endpos)
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
return _n
end

base = convert(T,base)
m::T = div(typemax(T)-base+1,base)
n::T = 0
while n <= m
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
if d >= base
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
return _n
end
n *= base
n += d
if i > endpos
n *= sgn
return Nullable{T}(n)
end
c, i = next(s,i)
isspace(c) && break
end
(T <: Signed) && (n *= sgn)
while !isspace(c)
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
if d >= base
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
return _n
end
(T <: Signed) && (d *= sgn)

safe_n = safe_mul(n, base)
isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
if isnull(safe_n)
raise && throw(OverflowError())
return _n
end
n = get(safe_n)
(i > endpos) && return Nullable{T}(n)
c, i = next(s,i)
end
while i <= endpos
c, i = next(s,i)
if !isspace(c)
raise && throw(ArgumentError("extra characters after whitespace in $(repr(SubString(s,startpos,endpos)))"))
return _n
end
end
return Nullable{T}(n)
end
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
tryparse_internal(T,s,start(s),endof(s),base,raise)
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, raise::Bool) =
tryparse_internal(T, s, startpos, endpos, base, base <= 36 ? 10 : 36, raise)
tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)

function parse{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
(2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
get(tryparse_internal(T, s, base, true))
end
parse{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))

## stringifying integers more efficiently ##

string(x::Union{Int8,Int16,Int32,Int64,Int128}) = dec(x)

## string to float functions ##

tryparse(::Type{Float64}, s::ByteString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
tryparse{T<:ByteString}(::Type{Float64}, s::SubString{T}) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)

tryparse(::Type{Float32}, s::ByteString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
tryparse{T<:ByteString}(::Type{Float32}, s::SubString{T}) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)

tryparse{T<:Union{Float32,Float64}}(::Type{T}, s::AbstractString) = tryparse(T, bytestring(s))

function parse{T<:FloatingPoint}(::Type{T}, s::AbstractString)
nf = tryparse(T, s)
isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
end

float(x::AbstractString) = parse(Float64,x)

float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)

## interface to parser ##

function parse(str::AbstractString, pos::Int; greedy::Bool=true, raise::Bool=true)
# returns (expr, end_pos). expr is () in case of parse error.
bstr = bytestring(str)
ex, pos = ccall(:jl_parse_string, Any,
(Ptr{UInt8}, Csize_t, Int32, Int32),
bstr, sizeof(bstr), pos-1, greedy ? 1:0)
if raise && isa(ex,Expr) && is(ex.head,:error)
throw(ParseError(ex.args[1]))
end
if ex == ()
raise && throw(ParseError("end of input"))
ex = Expr(:error, "end of input")
end
ex, pos+1 # C is zero-based, Julia is 1-based
end

function parse(str::AbstractString; raise::Bool=true)
ex, pos = parse(str, start(str), greedy=true, raise=raise)
if isa(ex,Expr) && ex.head === :error
return ex
end
if !done(str, pos)
raise && throw(ParseError("extra token after end of expression"))
return Expr(:error, "extra token after end of expression")
end
return ex
end
167 changes: 167 additions & 0 deletions base/shell.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

## shell-like command parsing ##

function shell_parse(raw::AbstractString, interp::Bool)
s = lstrip(raw)
#Strips the end but respects the space when the string endswith "\\ "
r = RevString(s)
i = start(r)
c_old = nothing
while !done(r,i)
c, j = next(r,i)
if c == '\\' && c_old == ' '
i -= 1
break
elseif !(c in _default_delims)
break
end
i = j
c_old = c
end
s = s[1:end-i+1]

last_parse = 0:-1
isempty(s) && return interp ? (Expr(:tuple,:()),last_parse) : ([],last_parse)

in_single_quotes = false
in_double_quotes = false

args::Vector{Any} = []
arg::Vector{Any} = []
i = start(s)
j = i

function update_arg(x)
if !isa(x,AbstractString) || !isempty(x)
push!(arg, x)
end
end
function append_arg()
if isempty(arg); arg = Any["",]; end
push!(args, arg)
arg = []
end

while !done(s,j)
c, k = next(s,j)
if !in_single_quotes && !in_double_quotes && isspace(c)
update_arg(s[i:j-1])
append_arg()
j = k
while !done(s,j)
c, k = next(s,j)
if !isspace(c)
i = j
break
end
j = k
end
elseif interp && !in_single_quotes && c == '$'
update_arg(s[i:j-1]); i = k; j = k
if done(s,k)
error("\$ right before end of command")
end
if isspace(s[k])
error("space not allowed right after \$")
end
stpos = j
ex, j = parse(s,j,greedy=false)
last_parse = stpos:j
update_arg(esc(ex)); i = j
else
if !in_double_quotes && c == '\''
in_single_quotes = !in_single_quotes
update_arg(s[i:j-1]); i = k
elseif !in_single_quotes && c == '"'
in_double_quotes = !in_double_quotes
update_arg(s[i:j-1]); i = k
elseif c == '\\'
if in_double_quotes
if done(s,k)
error("unterminated double quote")
end
if s[k] == '"' || s[k] == '$'
update_arg(s[i:j-1]); i = k
c, k = next(s,k)
end
elseif !in_single_quotes
if done(s,k)
error("dangling backslash")
end
update_arg(s[i:j-1]); i = k
c, k = next(s,k)
end
end
j = k
end
end

if in_single_quotes; error("unterminated single quote"); end
if in_double_quotes; error("unterminated double quote"); end

update_arg(s[i:end])
append_arg()

if !interp
return (args,last_parse)
end

# construct an expression
ex = Expr(:tuple)
for arg in args
push!(ex.args, Expr(:tuple, arg...))
end
(ex,last_parse)
end
shell_parse(s::AbstractString) = shell_parse(s,true)

function shell_split(s::AbstractString)
parsed = shell_parse(s,false)[1]
args = AbstractString[]
for arg in parsed
push!(args, string(arg...))
end
args
end

function print_shell_word(io::IO, word::AbstractString)
if isempty(word)
print(io, "''")
end
has_single = false
has_special = false
for c in word
if isspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
has_special = true
if c == '\''
has_single = true
end
end
end
if !has_special
print(io, word)
elseif !has_single
print(io, '\'', word, '\'')
else
print(io, '"')
for c in word
if c == '"' || c == '$'
print(io, '\\')
end
print(io, c)
end
print(io, '"')
end
end

function print_shell_escaped(io::IO, cmd::AbstractString, args::AbstractString...)
print_shell_word(io, cmd)
for arg in args
print(io, ' ')
print_shell_word(io, arg)
end
end
print_shell_escaped(io::IO) = nothing

shell_escape(args::AbstractString...) = sprint(print_shell_escaped, args...)
Loading