|
| 1 | +# This file is a part of Julia. License is MIT: http://julialang.org/license |
| 2 | + |
| 3 | +## string to integer functions ## |
| 4 | + |
| 5 | +function parse{T<:Integer}(::Type{T}, c::Char, base::Integer=36) |
| 6 | + a::Int = (base <= 36 ? 10 : 36) |
| 7 | + 2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base")) |
| 8 | + d = '0' <= c <= '9' ? c-'0' : |
| 9 | + 'A' <= c <= 'Z' ? c-'A'+10 : |
| 10 | + 'a' <= c <= 'z' ? c-'a'+a : throw(ArgumentError("invalid digit: $(repr(c))")) |
| 11 | + d < base || throw(ArgumentError("invalid base $base digit $(repr(c))")) |
| 12 | + convert(T, d) |
| 13 | +end |
| 14 | + |
| 15 | +function parseint_next(s::AbstractString, startpos::Int, endpos::Int) |
| 16 | + (0 < startpos <= endpos) || (return Char(0), 0, 0) |
| 17 | + j = startpos |
| 18 | + c, startpos = next(s,startpos) |
| 19 | + c, startpos, j |
| 20 | +end |
| 21 | + |
| 22 | +function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos::Int, endpos::Int) |
| 23 | + c, i, j = parseint_next(s, startpos, endpos) |
| 24 | + |
| 25 | + while isspace(c) |
| 26 | + c, i, j = parseint_next(s,i,endpos) |
| 27 | + end |
| 28 | + (j == 0) && (return 0, 0, 0) |
| 29 | + |
| 30 | + sgn = 1 |
| 31 | + if signed |
| 32 | + if c == '-' || c == '+' |
| 33 | + (c == '-') && (sgn = -1) |
| 34 | + c, i, j = parseint_next(s,i,endpos) |
| 35 | + end |
| 36 | + end |
| 37 | + |
| 38 | + while isspace(c) |
| 39 | + c, i, j = parseint_next(s,i,endpos) |
| 40 | + end |
| 41 | + (j == 0) && (return 0, 0, 0) |
| 42 | + |
| 43 | + if base == 0 |
| 44 | + if c == '0' && !done(s,i) |
| 45 | + c, i = next(s,i) |
| 46 | + base = c=='b' ? 2 : c=='o' ? 8 : c=='x' ? 16 : 10 |
| 47 | + if base != 10 |
| 48 | + c, i, j = parseint_next(s,i,endpos) |
| 49 | + end |
| 50 | + else |
| 51 | + base = 10 |
| 52 | + end |
| 53 | + end |
| 54 | + return sgn, base, j |
| 55 | +end |
| 56 | + |
| 57 | +function tryparse_internal{S<:ByteString}(::Type{Bool}, sbuff::S, startpos::Int, endpos::Int, raise::Bool) |
| 58 | + len = endpos-startpos+1 |
| 59 | + p = pointer(sbuff)+startpos-1 |
| 60 | + (len == 4) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "true", 4)) && (return Nullable(true)) |
| 61 | + (len == 5) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "false", 5)) && (return Nullable(false)) |
| 62 | + raise && throw(ArgumentError("invalid Bool representation: $(repr(SubString(s,startpos,endpos)))")) |
| 63 | + Nullable{Bool}() |
| 64 | +end |
| 65 | + |
| 66 | +safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2) |
| 67 | +safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) : |
| 68 | + (n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) : |
| 69 | + ((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2) |
| 70 | + |
| 71 | +function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, a::Int, raise::Bool) |
| 72 | + _n = Nullable{T}() |
| 73 | + sgn, base, i = parseint_preamble(T<:Signed, base, s, startpos, endpos) |
| 74 | + if i == 0 |
| 75 | + raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))")) |
| 76 | + return _n |
| 77 | + end |
| 78 | + c, i = parseint_next(s,i,endpos) |
| 79 | + if i == 0 |
| 80 | + raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))")) |
| 81 | + return _n |
| 82 | + end |
| 83 | + |
| 84 | + base = convert(T,base) |
| 85 | + m::T = div(typemax(T)-base+1,base) |
| 86 | + n::T = 0 |
| 87 | + while n <= m |
| 88 | + d::T = '0' <= c <= '9' ? c-'0' : |
| 89 | + 'A' <= c <= 'Z' ? c-'A'+10 : |
| 90 | + 'a' <= c <= 'z' ? c-'a'+a : base |
| 91 | + if d >= base |
| 92 | + raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))")) |
| 93 | + return _n |
| 94 | + end |
| 95 | + n *= base |
| 96 | + n += d |
| 97 | + if i > endpos |
| 98 | + n *= sgn |
| 99 | + return Nullable{T}(n) |
| 100 | + end |
| 101 | + c, i = next(s,i) |
| 102 | + isspace(c) && break |
| 103 | + end |
| 104 | + (T <: Signed) && (n *= sgn) |
| 105 | + while !isspace(c) |
| 106 | + d::T = '0' <= c <= '9' ? c-'0' : |
| 107 | + 'A' <= c <= 'Z' ? c-'A'+10 : |
| 108 | + 'a' <= c <= 'z' ? c-'a'+a : base |
| 109 | + if d >= base |
| 110 | + raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))")) |
| 111 | + return _n |
| 112 | + end |
| 113 | + (T <: Signed) && (d *= sgn) |
| 114 | + |
| 115 | + safe_n = safe_mul(n, base) |
| 116 | + isnull(safe_n) || (safe_n = safe_add(get(safe_n), d)) |
| 117 | + if isnull(safe_n) |
| 118 | + raise && throw(OverflowError()) |
| 119 | + return _n |
| 120 | + end |
| 121 | + n = get(safe_n) |
| 122 | + (i > endpos) && return Nullable{T}(n) |
| 123 | + c, i = next(s,i) |
| 124 | + end |
| 125 | + while i <= endpos |
| 126 | + c, i = next(s,i) |
| 127 | + if !isspace(c) |
| 128 | + raise && throw(ArgumentError("extra characters after whitespace in $(repr(SubString(s,startpos,endpos)))")) |
| 129 | + return _n |
| 130 | + end |
| 131 | + end |
| 132 | + return Nullable{T}(n) |
| 133 | +end |
| 134 | +tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) = |
| 135 | + tryparse_internal(T,s,start(s),endof(s),base,raise) |
| 136 | +tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, raise::Bool) = |
| 137 | + tryparse_internal(T, s, startpos, endpos, base, base <= 36 ? 10 : 36, raise) |
| 138 | +tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) = |
| 139 | + 2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base")) |
| 140 | +tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false) |
| 141 | + |
| 142 | +function parse{T<:Integer}(::Type{T}, s::AbstractString, base::Integer) |
| 143 | + (2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base")) |
| 144 | + get(tryparse_internal(T, s, base, true)) |
| 145 | +end |
| 146 | +parse{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true)) |
| 147 | + |
| 148 | +## stringifying integers more efficiently ## |
| 149 | + |
| 150 | +string(x::Union{Int8,Int16,Int32,Int64,Int128}) = dec(x) |
| 151 | + |
| 152 | +## string to float functions ## |
| 153 | + |
| 154 | +tryparse(::Type{Float64}, s::ByteString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s)) |
| 155 | +tryparse{T<:ByteString}(::Type{Float64}, s::SubString{T}) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof) |
| 156 | + |
| 157 | +tryparse(::Type{Float32}, s::ByteString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s)) |
| 158 | +tryparse{T<:ByteString}(::Type{Float32}, s::SubString{T}) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof) |
| 159 | + |
| 160 | +tryparse{T<:Union{Float32,Float64}}(::Type{T}, s::AbstractString) = tryparse(T, bytestring(s)) |
| 161 | + |
| 162 | +function parse{T<:FloatingPoint}(::Type{T}, s::AbstractString) |
| 163 | + nf = tryparse(T, s) |
| 164 | + isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf) |
| 165 | +end |
| 166 | + |
| 167 | +float(x::AbstractString) = parse(Float64,x) |
| 168 | + |
| 169 | +float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a) |
| 170 | + |
| 171 | +## interface to parser ## |
| 172 | + |
| 173 | +function parse(str::AbstractString, pos::Int; greedy::Bool=true, raise::Bool=true) |
| 174 | + # returns (expr, end_pos). expr is () in case of parse error. |
| 175 | + bstr = bytestring(str) |
| 176 | + ex, pos = ccall(:jl_parse_string, Any, |
| 177 | + (Ptr{UInt8}, Csize_t, Int32, Int32), |
| 178 | + bstr, sizeof(bstr), pos-1, greedy ? 1:0) |
| 179 | + if raise && isa(ex,Expr) && is(ex.head,:error) |
| 180 | + throw(ParseError(ex.args[1])) |
| 181 | + end |
| 182 | + if ex == () |
| 183 | + raise && throw(ParseError("end of input")) |
| 184 | + ex = Expr(:error, "end of input") |
| 185 | + end |
| 186 | + ex, pos+1 # C is zero-based, Julia is 1-based |
| 187 | +end |
| 188 | + |
| 189 | +function parse(str::AbstractString; raise::Bool=true) |
| 190 | + ex, pos = parse(str, start(str), greedy=true, raise=raise) |
| 191 | + if isa(ex,Expr) && ex.head === :error |
| 192 | + return ex |
| 193 | + end |
| 194 | + if !done(str, pos) |
| 195 | + raise && throw(ParseError("extra token after end of expression")) |
| 196 | + return Expr(:error, "extra token after end of expression") |
| 197 | + end |
| 198 | + return ex |
| 199 | +end |
0 commit comments