Skip to content

Commit 38c6925

Browse files
committed
Reorganize base/string.jl, base/utf*, test/strings.jl, test/unicode.jl
The monolithic string.jl has been split up into several files, and the test files in strings.jl and unicode.jl have been made to correspond with the files of the same names in base. This will prevent a lot of manual merging that was previously necessary. Merge Sub/Rev/Rep/RopeStrings into strings/types.jl, for base and test
1 parent e3dfa56 commit 38c6925

35 files changed

+3919
-3956
lines changed

base/hashing2.jl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,15 @@ end
166166
## hashing Float16s ##
167167

168168
hash(x::Float16, h::UInt) = hash(Float64(x), h)
169+
170+
## hashing strings ##
171+
172+
const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
173+
const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
174+
175+
function hash{T<:ByteString}(s::Union{T,SubString{T}}, h::UInt)
176+
h += memhash_seed
177+
# note: use pointer(s) here (see #6058).
178+
ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), pointer(s), sizeof(s), h % UInt32) + h
179+
end
180+
hash(s::AbstractString, h::UInt) = hash(bytestring(s), h)

base/parse.jl

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
# This file is a part of Julia. License is MIT: http://julialang.org/license
2+
3+
## string to integer functions ##
4+
5+
function parse{T<:Integer}(::Type{T}, c::Char, base::Integer=36)
6+
a::Int = (base <= 36 ? 10 : 36)
7+
2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
8+
d = '0' <= c <= '9' ? c-'0' :
9+
'A' <= c <= 'Z' ? c-'A'+10 :
10+
'a' <= c <= 'z' ? c-'a'+a : throw(ArgumentError("invalid digit: $(repr(c))"))
11+
d < base || throw(ArgumentError("invalid base $base digit $(repr(c))"))
12+
convert(T, d)
13+
end
14+
15+
function parseint_next(s::AbstractString, startpos::Int, endpos::Int)
16+
(0 < startpos <= endpos) || (return Char(0), 0, 0)
17+
j = startpos
18+
c, startpos = next(s,startpos)
19+
c, startpos, j
20+
end
21+
22+
function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos::Int, endpos::Int)
23+
c, i, j = parseint_next(s, startpos, endpos)
24+
25+
while isspace(c)
26+
c, i, j = parseint_next(s,i,endpos)
27+
end
28+
(j == 0) && (return 0, 0, 0)
29+
30+
sgn = 1
31+
if signed
32+
if c == '-' || c == '+'
33+
(c == '-') && (sgn = -1)
34+
c, i, j = parseint_next(s,i,endpos)
35+
end
36+
end
37+
38+
while isspace(c)
39+
c, i, j = parseint_next(s,i,endpos)
40+
end
41+
(j == 0) && (return 0, 0, 0)
42+
43+
if base == 0
44+
if c == '0' && !done(s,i)
45+
c, i = next(s,i)
46+
base = c=='b' ? 2 : c=='o' ? 8 : c=='x' ? 16 : 10
47+
if base != 10
48+
c, i, j = parseint_next(s,i,endpos)
49+
end
50+
else
51+
base = 10
52+
end
53+
end
54+
return sgn, base, j
55+
end
56+
57+
function tryparse_internal{S<:ByteString}(::Type{Bool}, sbuff::S, startpos::Int, endpos::Int, raise::Bool)
58+
len = endpos-startpos+1
59+
p = pointer(sbuff)+startpos-1
60+
(len == 4) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "true", 4)) && (return Nullable(true))
61+
(len == 5) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "false", 5)) && (return Nullable(false))
62+
raise && throw(ArgumentError("invalid Bool representation: $(repr(SubString(s,startpos,endpos)))"))
63+
Nullable{Bool}()
64+
end
65+
66+
safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
67+
safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
68+
(n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
69+
((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)
70+
71+
function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, a::Int, raise::Bool)
72+
_n = Nullable{T}()
73+
sgn, base, i = parseint_preamble(T<:Signed, base, s, startpos, endpos)
74+
if i == 0
75+
raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
76+
return _n
77+
end
78+
c, i = parseint_next(s,i,endpos)
79+
if i == 0
80+
raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
81+
return _n
82+
end
83+
84+
base = convert(T,base)
85+
m::T = div(typemax(T)-base+1,base)
86+
n::T = 0
87+
while n <= m
88+
d::T = '0' <= c <= '9' ? c-'0' :
89+
'A' <= c <= 'Z' ? c-'A'+10 :
90+
'a' <= c <= 'z' ? c-'a'+a : base
91+
if d >= base
92+
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
93+
return _n
94+
end
95+
n *= base
96+
n += d
97+
if i > endpos
98+
n *= sgn
99+
return Nullable{T}(n)
100+
end
101+
c, i = next(s,i)
102+
isspace(c) && break
103+
end
104+
(T <: Signed) && (n *= sgn)
105+
while !isspace(c)
106+
d::T = '0' <= c <= '9' ? c-'0' :
107+
'A' <= c <= 'Z' ? c-'A'+10 :
108+
'a' <= c <= 'z' ? c-'a'+a : base
109+
if d >= base
110+
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
111+
return _n
112+
end
113+
(T <: Signed) && (d *= sgn)
114+
115+
safe_n = safe_mul(n, base)
116+
isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
117+
if isnull(safe_n)
118+
raise && throw(OverflowError())
119+
return _n
120+
end
121+
n = get(safe_n)
122+
(i > endpos) && return Nullable{T}(n)
123+
c, i = next(s,i)
124+
end
125+
while i <= endpos
126+
c, i = next(s,i)
127+
if !isspace(c)
128+
raise && throw(ArgumentError("extra characters after whitespace in $(repr(SubString(s,startpos,endpos)))"))
129+
return _n
130+
end
131+
end
132+
return Nullable{T}(n)
133+
end
134+
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
135+
tryparse_internal(T,s,start(s),endof(s),base,raise)
136+
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, raise::Bool) =
137+
tryparse_internal(T, s, startpos, endpos, base, base <= 36 ? 10 : 36, raise)
138+
tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
139+
2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
140+
tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)
141+
142+
function parse{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
143+
(2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
144+
get(tryparse_internal(T, s, base, true))
145+
end
146+
parse{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))
147+
148+
## stringifying integers more efficiently ##
149+
150+
string(x::Union{Int8,Int16,Int32,Int64,Int128}) = dec(x)
151+
152+
## string to float functions ##
153+
154+
tryparse(::Type{Float64}, s::ByteString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
155+
tryparse{T<:ByteString}(::Type{Float64}, s::SubString{T}) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
156+
157+
tryparse(::Type{Float32}, s::ByteString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
158+
tryparse{T<:ByteString}(::Type{Float32}, s::SubString{T}) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
159+
160+
tryparse{T<:Union{Float32,Float64}}(::Type{T}, s::AbstractString) = tryparse(T, bytestring(s))
161+
162+
function parse{T<:FloatingPoint}(::Type{T}, s::AbstractString)
163+
nf = tryparse(T, s)
164+
isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
165+
end
166+
167+
float(x::AbstractString) = parse(Float64,x)
168+
169+
float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)
170+
171+
## interface to parser ##
172+
173+
function parse(str::AbstractString, pos::Int; greedy::Bool=true, raise::Bool=true)
174+
# returns (expr, end_pos). expr is () in case of parse error.
175+
bstr = bytestring(str)
176+
ex, pos = ccall(:jl_parse_string, Any,
177+
(Ptr{UInt8}, Csize_t, Int32, Int32),
178+
bstr, sizeof(bstr), pos-1, greedy ? 1:0)
179+
if raise && isa(ex,Expr) && is(ex.head,:error)
180+
throw(ParseError(ex.args[1]))
181+
end
182+
if ex == ()
183+
raise && throw(ParseError("end of input"))
184+
ex = Expr(:error, "end of input")
185+
end
186+
ex, pos+1 # C is zero-based, Julia is 1-based
187+
end
188+
189+
function parse(str::AbstractString; raise::Bool=true)
190+
ex, pos = parse(str, start(str), greedy=true, raise=raise)
191+
if isa(ex,Expr) && ex.head === :error
192+
return ex
193+
end
194+
if !done(str, pos)
195+
raise && throw(ParseError("extra token after end of expression"))
196+
return Expr(:error, "extra token after end of expression")
197+
end
198+
return ex
199+
end

base/shell.jl

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# This file is a part of Julia. License is MIT: http://julialang.org/license
2+
3+
## shell-like command parsing ##
4+
5+
function shell_parse(raw::AbstractString, interp::Bool)
6+
s = lstrip(raw)
7+
#Strips the end but respects the space when the string endswith "\\ "
8+
r = RevString(s)
9+
i = start(r)
10+
c_old = nothing
11+
while !done(r,i)
12+
c, j = next(r,i)
13+
if c == '\\' && c_old == ' '
14+
i -= 1
15+
break
16+
elseif !(c in _default_delims)
17+
break
18+
end
19+
i = j
20+
c_old = c
21+
end
22+
s = s[1:end-i+1]
23+
24+
last_parse = 0:-1
25+
isempty(s) && return interp ? (Expr(:tuple,:()),last_parse) : ([],last_parse)
26+
27+
in_single_quotes = false
28+
in_double_quotes = false
29+
30+
args::Vector{Any} = []
31+
arg::Vector{Any} = []
32+
i = start(s)
33+
j = i
34+
35+
function update_arg(x)
36+
if !isa(x,AbstractString) || !isempty(x)
37+
push!(arg, x)
38+
end
39+
end
40+
function append_arg()
41+
if isempty(arg); arg = Any["",]; end
42+
push!(args, arg)
43+
arg = []
44+
end
45+
46+
while !done(s,j)
47+
c, k = next(s,j)
48+
if !in_single_quotes && !in_double_quotes && isspace(c)
49+
update_arg(s[i:j-1])
50+
append_arg()
51+
j = k
52+
while !done(s,j)
53+
c, k = next(s,j)
54+
if !isspace(c)
55+
i = j
56+
break
57+
end
58+
j = k
59+
end
60+
elseif interp && !in_single_quotes && c == '$'
61+
update_arg(s[i:j-1]); i = k; j = k
62+
if done(s,k)
63+
error("\$ right before end of command")
64+
end
65+
if isspace(s[k])
66+
error("space not allowed right after \$")
67+
end
68+
stpos = j
69+
ex, j = parse(s,j,greedy=false)
70+
last_parse = stpos:j
71+
update_arg(esc(ex)); i = j
72+
else
73+
if !in_double_quotes && c == '\''
74+
in_single_quotes = !in_single_quotes
75+
update_arg(s[i:j-1]); i = k
76+
elseif !in_single_quotes && c == '"'
77+
in_double_quotes = !in_double_quotes
78+
update_arg(s[i:j-1]); i = k
79+
elseif c == '\\'
80+
if in_double_quotes
81+
if done(s,k)
82+
error("unterminated double quote")
83+
end
84+
if s[k] == '"' || s[k] == '$'
85+
update_arg(s[i:j-1]); i = k
86+
c, k = next(s,k)
87+
end
88+
elseif !in_single_quotes
89+
if done(s,k)
90+
error("dangling backslash")
91+
end
92+
update_arg(s[i:j-1]); i = k
93+
c, k = next(s,k)
94+
end
95+
end
96+
j = k
97+
end
98+
end
99+
100+
if in_single_quotes; error("unterminated single quote"); end
101+
if in_double_quotes; error("unterminated double quote"); end
102+
103+
update_arg(s[i:end])
104+
append_arg()
105+
106+
if !interp
107+
return (args,last_parse)
108+
end
109+
110+
# construct an expression
111+
ex = Expr(:tuple)
112+
for arg in args
113+
push!(ex.args, Expr(:tuple, arg...))
114+
end
115+
(ex,last_parse)
116+
end
117+
shell_parse(s::AbstractString) = shell_parse(s,true)
118+
119+
function shell_split(s::AbstractString)
120+
parsed = shell_parse(s,false)[1]
121+
args = AbstractString[]
122+
for arg in parsed
123+
push!(args, string(arg...))
124+
end
125+
args
126+
end
127+
128+
function print_shell_word(io::IO, word::AbstractString)
129+
if isempty(word)
130+
print(io, "''")
131+
end
132+
has_single = false
133+
has_special = false
134+
for c in word
135+
if isspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
136+
has_special = true
137+
if c == '\''
138+
has_single = true
139+
end
140+
end
141+
end
142+
if !has_special
143+
print(io, word)
144+
elseif !has_single
145+
print(io, '\'', word, '\'')
146+
else
147+
print(io, '"')
148+
for c in word
149+
if c == '"' || c == '$'
150+
print(io, '\\')
151+
end
152+
print(io, c)
153+
end
154+
print(io, '"')
155+
end
156+
end
157+
158+
function print_shell_escaped(io::IO, cmd::AbstractString, args::AbstractString...)
159+
print_shell_word(io, cmd)
160+
for arg in args
161+
print(io, ' ')
162+
print_shell_word(io, arg)
163+
end
164+
end
165+
print_shell_escaped(io::IO) = nothing
166+
167+
shell_escape(args::AbstractString...) = sprint(print_shell_escaped, args...)

0 commit comments

Comments
 (0)