diff --git a/base/hashing2.jl b/base/hashing2.jl
index becf7b9bed07d..a2653ca556a49 100644
--- a/base/hashing2.jl
+++ b/base/hashing2.jl
@@ -166,3 +166,15 @@ end
 ## hashing Float16s ##
 
 hash(x::Float16, h::UInt) = hash(Float64(x), h)
+
+## hashing strings ##
+
+const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
+const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
+
+function hash{T<:ByteString}(s::Union{T,SubString{T}}, h::UInt)
+    h += memhash_seed
+    # note: use pointer(s) here (see #6058).
+    ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), pointer(s), sizeof(s), h % UInt32) + h
+end
+hash(s::AbstractString, h::UInt) = hash(bytestring(s), h)
diff --git a/base/parse.jl b/base/parse.jl
new file mode 100644
index 0000000000000..c1f916d980441
--- /dev/null
+++ b/base/parse.jl
@@ -0,0 +1,199 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+## string to integer functions ##
+
+function parse{T<:Integer}(::Type{T}, c::Char, base::Integer=36)
+    a::Int = (base <= 36 ? 10 : 36)
+    2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
+    d = '0' <= c <= '9' ? c-'0'    :
+        'A' <= c <= 'Z' ? c-'A'+10 :
+        'a' <= c <= 'z' ? c-'a'+a  : throw(ArgumentError("invalid digit: $(repr(c))"))
+    d < base || throw(ArgumentError("invalid base $base digit $(repr(c))"))
+    convert(T, d)
+end
+
+function parseint_next(s::AbstractString, startpos::Int, endpos::Int)
+    (0 < startpos <= endpos) || (return Char(0), 0, 0)
+    j = startpos
+    c, startpos = next(s,startpos)
+    c, startpos, j
+end
+
+function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos::Int, endpos::Int)
+    c, i, j = parseint_next(s, startpos, endpos)
+
+    while isspace(c)
+        c, i, j = parseint_next(s,i,endpos)
+    end
+    (j == 0) && (return 0, 0, 0)
+
+    sgn = 1
+    if signed
+        if c == '-' || c == '+'
+            (c == '-') && (sgn = -1)
+            c, i, j = parseint_next(s,i,endpos)
+        end
+    end
+
+    while isspace(c)
+        c, i, j = parseint_next(s,i,endpos)
+    end
+    (j == 0) && (return 0, 0, 0)
+
+    if base == 0
+        if c == '0' && !done(s,i)
+            c, i = next(s,i)
+            base = c=='b' ? 2 : c=='o' ? 8 : c=='x' ? 16 : 10
+            if base != 10
+                c, i, j = parseint_next(s,i,endpos)
+            end
+        else
+            base = 10
+        end
+    end
+    return sgn, base, j
+end
+
+function tryparse_internal{S<:ByteString}(::Type{Bool}, sbuff::S, startpos::Int, endpos::Int, raise::Bool)
+    len = endpos-startpos+1
+    p = pointer(sbuff)+startpos-1
+    (len == 4) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "true", 4)) && (return Nullable(true))
+    (len == 5) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "false", 5)) && (return Nullable(false))
+    raise && throw(ArgumentError("invalid Bool representation: $(repr(SubString(s,startpos,endpos)))"))
+    Nullable{Bool}()
+end
+
+safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
+safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 >   0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
+                                      (n2 <  -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
+                                      ((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)
+
+function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, a::Int, raise::Bool)
+    _n = Nullable{T}()
+    sgn, base, i = parseint_preamble(T<:Signed, base, s, startpos, endpos)
+    if i == 0
+        raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
+        return _n
+    end
+    c, i = parseint_next(s,i,endpos)
+    if i == 0
+        raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
+        return _n
+    end
+
+    base = convert(T,base)
+    m::T = div(typemax(T)-base+1,base)
+    n::T = 0
+    while n <= m
+        d::T = '0' <= c <= '9' ? c-'0'    :
+               'A' <= c <= 'Z' ? c-'A'+10 :
+               'a' <= c <= 'z' ? c-'a'+a  : base
+        if d >= base
+            raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
+            return _n
+        end
+        n *= base
+        n += d
+        if i > endpos
+            n *= sgn
+            return Nullable{T}(n)
+        end
+        c, i = next(s,i)
+        isspace(c) && break
+    end
+    (T <: Signed) && (n *= sgn)
+    while !isspace(c)
+        d::T = '0' <= c <= '9' ? c-'0'    :
+        'A' <= c <= 'Z' ? c-'A'+10 :
+            'a' <= c <= 'z' ? c-'a'+a  : base
+        if d >= base
+            raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
+            return _n
+        end
+        (T <: Signed) && (d *= sgn)
+
+        safe_n = safe_mul(n, base)
+        isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
+        if isnull(safe_n)
+            raise && throw(OverflowError())
+            return _n
+        end
+        n = get(safe_n)
+        (i > endpos) && return Nullable{T}(n)
+        c, i = next(s,i)
+    end
+    while i <= endpos
+        c, i = next(s,i)
+        if !isspace(c)
+            raise && throw(ArgumentError("extra characters after whitespace in $(repr(SubString(s,startpos,endpos)))"))
+            return _n
+        end
+    end
+    return Nullable{T}(n)
+end
+tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
+    tryparse_internal(T,s,start(s),endof(s),base,raise)
+tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, raise::Bool) =
+    tryparse_internal(T, s, startpos, endpos, base, base <= 36 ? 10 : 36, raise)
+tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
+    2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
+tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)
+
+function parse{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
+    (2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
+    get(tryparse_internal(T, s, base, true))
+end
+parse{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))
+
+## stringifying integers more efficiently ##
+
+string(x::Union{Int8,Int16,Int32,Int64,Int128}) = dec(x)
+
+## string to float functions ##
+
+tryparse(::Type{Float64}, s::ByteString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
+tryparse{T<:ByteString}(::Type{Float64}, s::SubString{T}) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
+
+tryparse(::Type{Float32}, s::ByteString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
+tryparse{T<:ByteString}(::Type{Float32}, s::SubString{T}) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
+
+tryparse{T<:Union{Float32,Float64}}(::Type{T}, s::AbstractString) = tryparse(T, bytestring(s))
+
+function parse{T<:FloatingPoint}(::Type{T}, s::AbstractString)
+    nf = tryparse(T, s)
+    isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
+end
+
+float(x::AbstractString) = parse(Float64,x)
+
+float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)
+
+## interface to parser ##
+
+function parse(str::AbstractString, pos::Int; greedy::Bool=true, raise::Bool=true)
+    # returns (expr, end_pos). expr is () in case of parse error.
+    bstr = bytestring(str)
+    ex, pos = ccall(:jl_parse_string, Any,
+                    (Ptr{UInt8}, Csize_t, Int32, Int32),
+                    bstr, sizeof(bstr), pos-1, greedy ? 1:0)
+    if raise && isa(ex,Expr) && is(ex.head,:error)
+        throw(ParseError(ex.args[1]))
+    end
+    if ex == ()
+        raise && throw(ParseError("end of input"))
+        ex = Expr(:error, "end of input")
+    end
+    ex, pos+1 # C is zero-based, Julia is 1-based
+end
+
+function parse(str::AbstractString; raise::Bool=true)
+    ex, pos = parse(str, start(str), greedy=true, raise=raise)
+    if isa(ex,Expr) && ex.head === :error
+        return ex
+    end
+    if !done(str, pos)
+        raise && throw(ParseError("extra token after end of expression"))
+        return Expr(:error, "extra token after end of expression")
+    end
+    return ex
+end
diff --git a/base/shell.jl b/base/shell.jl
new file mode 100644
index 0000000000000..cfd7eb26ee690
--- /dev/null
+++ b/base/shell.jl
@@ -0,0 +1,167 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+## shell-like command parsing ##
+
+function shell_parse(raw::AbstractString, interp::Bool)
+    s = lstrip(raw)
+    #Strips the end but respects the space when the string endswith "\\ "
+    r = RevString(s)
+    i = start(r)
+    c_old = nothing
+    while !done(r,i)
+        c, j = next(r,i)
+        if c == '\\' && c_old == ' '
+            i -= 1
+            break
+        elseif !(c in _default_delims)
+            break
+        end
+        i = j
+        c_old = c
+    end
+    s = s[1:end-i+1]
+
+    last_parse = 0:-1
+    isempty(s) && return interp ? (Expr(:tuple,:()),last_parse) : ([],last_parse)
+
+    in_single_quotes = false
+    in_double_quotes = false
+
+    args::Vector{Any} = []
+    arg::Vector{Any} = []
+    i = start(s)
+    j = i
+
+    function update_arg(x)
+        if !isa(x,AbstractString) || !isempty(x)
+            push!(arg, x)
+        end
+    end
+    function append_arg()
+        if isempty(arg); arg = Any["",]; end
+        push!(args, arg)
+        arg = []
+    end
+
+    while !done(s,j)
+        c, k = next(s,j)
+        if !in_single_quotes && !in_double_quotes && isspace(c)
+            update_arg(s[i:j-1])
+            append_arg()
+            j = k
+            while !done(s,j)
+                c, k = next(s,j)
+                if !isspace(c)
+                    i = j
+                    break
+                end
+                j = k
+            end
+        elseif interp && !in_single_quotes && c == '$'
+            update_arg(s[i:j-1]); i = k; j = k
+            if done(s,k)
+                error("\$ right before end of command")
+            end
+            if isspace(s[k])
+                error("space not allowed right after \$")
+            end
+            stpos = j
+            ex, j = parse(s,j,greedy=false)
+            last_parse = stpos:j
+            update_arg(esc(ex)); i = j
+        else
+            if !in_double_quotes && c == '\''
+                in_single_quotes = !in_single_quotes
+                update_arg(s[i:j-1]); i = k
+            elseif !in_single_quotes && c == '"'
+                in_double_quotes = !in_double_quotes
+                update_arg(s[i:j-1]); i = k
+            elseif c == '\\'
+                if in_double_quotes
+                    if done(s,k)
+                        error("unterminated double quote")
+                    end
+                    if s[k] == '"' || s[k] == '$'
+                        update_arg(s[i:j-1]); i = k
+                        c, k = next(s,k)
+                    end
+                elseif !in_single_quotes
+                    if done(s,k)
+                        error("dangling backslash")
+                    end
+                    update_arg(s[i:j-1]); i = k
+                    c, k = next(s,k)
+                end
+            end
+            j = k
+        end
+    end
+
+    if in_single_quotes; error("unterminated single quote"); end
+    if in_double_quotes; error("unterminated double quote"); end
+
+    update_arg(s[i:end])
+    append_arg()
+
+    if !interp
+        return (args,last_parse)
+    end
+
+    # construct an expression
+    ex = Expr(:tuple)
+    for arg in args
+        push!(ex.args, Expr(:tuple, arg...))
+    end
+    (ex,last_parse)
+end
+shell_parse(s::AbstractString) = shell_parse(s,true)
+
+function shell_split(s::AbstractString)
+    parsed = shell_parse(s,false)[1]
+    args = AbstractString[]
+    for arg in parsed
+       push!(args, string(arg...))
+    end
+    args
+end
+
+function print_shell_word(io::IO, word::AbstractString)
+    if isempty(word)
+        print(io, "''")
+    end
+    has_single = false
+    has_special = false
+    for c in word
+        if isspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
+            has_special = true
+            if c == '\''
+                has_single = true
+            end
+        end
+    end
+    if !has_special
+        print(io, word)
+    elseif !has_single
+        print(io, '\'', word, '\'')
+    else
+        print(io, '"')
+        for c in word
+            if c == '"' || c == '$'
+                print(io, '\\')
+            end
+            print(io, c)
+        end
+        print(io, '"')
+    end
+end
+
+function print_shell_escaped(io::IO, cmd::AbstractString, args::AbstractString...)
+    print_shell_word(io, cmd)
+    for arg in args
+        print(io, ' ')
+        print_shell_word(io, arg)
+    end
+end
+print_shell_escaped(io::IO) = nothing
+
+shell_escape(args::AbstractString...) = sprint(print_shell_escaped, args...)
diff --git a/base/string.jl b/base/string.jl
index f0d90f4f04162..a4976f67cfa14 100644
--- a/base/string.jl
+++ b/base/string.jl
@@ -1,1723 +1,7 @@
 # This file is a part of Julia. License is MIT: http://julialang.org/license
 
-## core text I/O ##
-
-print(io::IO, x) = show(io, x)
-print(io::IO, xs...) = for x in xs print(io, x) end
-
-println(io::IO, xs...) = print(io, xs..., '\n')
-
-print(xs...)   = print(STDOUT, xs...)
-println(xs...) = println(STDOUT, xs...)
-
-## core string functions ##
-
-endof(s::AbstractString) = error("you must implement endof(", typeof(s), ")")
-next(s::AbstractString, i::Int) = error("you must implement next(", typeof(s), ",Int)")
-next(s::DirectIndexString, i::Int) = (s[i],i+1)
-next(s::AbstractString, i::Integer) = next(s,Int(i))
-
-## conversion of general objects to strings ##
-
-function print_to_string(xs...)
-    # specialized for performance reasons
-    s = IOBuffer(Array(UInt8,isa(xs[1],AbstractString) ? endof(xs[1]) : 0), true, true)
-    for x in xs
-        print(s, x)
-    end
-    d = s.data
-    resize!(d,s.size)
-    bytestring(d)
-end
-
-string() = ""
-string(s::AbstractString) = s
-string(xs...) = print_to_string(xs...)
-
-bytestring() = ""
-bytestring(s::Vector{UInt8}) = bytestring(pointer(s),length(s))
-bytestring(s::AbstractString...) = print_to_string(s...)
-
-function bytestring(p::Union{Ptr{UInt8},Ptr{Int8}})
-    p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) :
-    ccall(:jl_cstr_to_string, ByteString, (Ptr{UInt8},), p)
-end
-bytestring(s::Cstring) = bytestring(box(Ptr{Cchar}, unbox(Cstring,s)))
-
-function bytestring(p::Union{Ptr{UInt8},Ptr{Int8}},len::Integer)
-    p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) :
-    ccall(:jl_pchar_to_string, ByteString, (Ptr{UInt8},Int), p, len)
-end
-
-convert(::Type{Vector{UInt8}}, s::AbstractString) = bytestring(s).data
-convert(::Type{Array{UInt8}}, s::AbstractString) = bytestring(s).data
-convert(::Type{ByteString}, s::AbstractString) = bytestring(s)
-convert(::Type{Vector{Char}}, s::AbstractString) = collect(s)
-convert(::Type{Symbol}, s::AbstractString) = symbol(s)
-
-## generic supplied functions ##
-
-start(s::AbstractString) = 1
-done(s::AbstractString,i) = (i > endof(s))
-getindex(s::AbstractString, i::Int) = next(s,i)[1]
-getindex(s::AbstractString, i::Integer) = s[Int(i)]
-getindex(s::AbstractString, x::Real) = s[to_index(x)]
-getindex{T<:Integer}(s::AbstractString, r::UnitRange{T}) = s[Int(first(r)):Int(last(r))]
-# TODO: handle other ranges with stride ±1 specially?
-getindex(s::AbstractString, v::AbstractVector) =
-    sprint(length(v), io->(for i in v write(io,s[i]) end))
-
-symbol(s::AbstractString) = symbol(bytestring(s))
-
-print(io::IO, s::AbstractString) = (write(io, s); nothing)
-write(io::IO, s::AbstractString) = (len = 0; for c in s; len += write(io, c); end; len)
-show(io::IO, s::AbstractString) = print_quoted(io, s)
-
-sizeof(s::AbstractString) = error("type $(typeof(s)) has no canonical binary representation")
-
-eltype{T<:AbstractString}(::Type{T}) = Char
-
-(*)(s1::AbstractString, ss::AbstractString...) = string(s1, ss...)
-(^)(s::AbstractString, r::Integer) = repeat(s,r)
-
-length(s::DirectIndexString) = endof(s)
-function length(s::AbstractString)
-    i = start(s)
-    if done(s,i)
-        return 0
-    end
-    n = 1
-    while true
-        c, j = next(s,i)
-        if done(s,j)
-            return n
-        end
-        n += 1
-        i = j
-    end
-end
-
-isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= endof(s))
-function isvalid(s::AbstractString, i::Integer)
-    i < 1 && return false
-    done(s,i) && return false
-    try
-        next(s,i)
-        true
-    catch
-        false
-    end
-end
-
-prevind(s::DirectIndexString, i::Integer) = i-1
-prevind(s::AbstractArray   , i::Integer) = i-1
-nextind(s::DirectIndexString, i::Integer) = i+1
-nextind(s::AbstractArray   , i::Integer) = i+1
-
-function prevind(s::AbstractString, i::Integer)
-    e = endof(s)
-    if i > e
-        return e
-    end
-    j = i-1
-    while j >= 1
-        if isvalid(s,j)
-            return j
-        end
-        j -= 1
-    end
-    return 0 # out of range
-end
-
-function nextind(s::AbstractString, i::Integer)
-    e = endof(s)
-    if i < 1
-        return 1
-    end
-    if i > e
-        return i+1
-    end
-    for j = i+1:e
-        if isvalid(s,j)
-            return j
-        end
-    end
-    next(s,e)[2] # out of range
-end
-
-checkbounds(s::AbstractString, i::Integer) = start(s) <= i <= endof(s) || throw(BoundsError(s, i))
-checkbounds(s::AbstractString, i::Real) = checkbounds(s, to_index(i))
-checkbounds{T<:Integer}(s::AbstractString, r::Range{T}) = isempty(r) || (minimum(r) >= start(s) && maximum(r) <= endof(s)) || throw(BoundsError(s, r))
-checkbounds{T<:Real}(s::AbstractString, I::AbstractArray{T}) = all(i -> checkbounds(s, i), I)
-
-ind2chr(s::DirectIndexString, i::Integer) = begin checkbounds(s,i); i end
-chr2ind(s::DirectIndexString, i::Integer) = begin checkbounds(s,i); i end
-
-function ind2chr(s::AbstractString, i::Integer)
-    s[i] # throws error if invalid
-    j = 1
-    k = start(s)
-    while true
-        c, l = next(s,k)
-        if i <= k
-            return j
-        end
-        j += 1
-        k = l
-    end
-end
-
-function chr2ind(s::AbstractString, i::Integer)
-    i < start(s) && throw(BoundsError(s, i))
-    j = 1
-    k = start(s)
-    while true
-        c, l = next(s,k)
-        if i == j
-            return k
-        end
-        j += 1
-        k = l
-    end
-end
-
-immutable EachStringIndex{T<:AbstractString}
-    s::T
-end
-eachindex(s::AbstractString) = EachStringIndex(s)
-
-length(e::EachStringIndex) = length(e.s)
-start(e::EachStringIndex) = start(e.s)
-next(e::EachStringIndex, state) = (state, nextind(e.s, state))
-done(e::EachStringIndex, state) = done(e.s, state)
-eltype(e::EachStringIndex) = Int
-
-typealias Chars Union{Char,AbstractVector{Char},Set{Char}}
-
-function search(s::AbstractString, c::Chars, i::Integer)
-    if isempty(c)
-        return 1 <= i <= nextind(s,endof(s)) ? i :
-               throw(BoundsError(s, i))
-    end
-    if i < 1 || i > nextind(s,endof(s))
-        throw(BoundsError(s, i))
-    end
-    while !done(s,i)
-        d, j = next(s,i)
-        if d in c
-            return i
-        end
-        i = j
-    end
-    return 0
-end
-search(s::AbstractString, c::Chars) = search(s,c,start(s))
-
-in(c::Char, s::AbstractString) = (search(s,c)!=0)
-
-function _searchindex(s, t, i)
-    if isempty(t)
-        return 1 <= i <= nextind(s,endof(s)) ? i :
-               throw(BoundsError(s, i))
-    end
-    t1, j2 = next(t,start(t))
-    while true
-        i = search(s,t1,i)
-        if i == 0 return 0 end
-        c, ii = next(s,i)
-        j = j2; k = ii
-        matched = true
-        while !done(t,j)
-            if done(s,k)
-                matched = false
-                break
-            end
-            c, k = next(s,k)
-            d, j = next(t,j)
-            if c != d
-                matched = false
-                break
-            end
-        end
-        if matched
-            return i
-        end
-        i = ii
-    end
-end
-
-function _search_bloom_mask(c)
-    UInt64(1) << (c & 63)
-end
-
-function _searchindex(s::Array, t::Array, i)
-    n = length(t)
-    m = length(s)
-
-    if n == 0
-        return 1 <= i <= m+1 ? max(1, i) : 0
-    elseif m == 0
-        return 0
-    elseif n == 1
-        return search(s, t[1], i)
-    end
-
-    w = m - n
-    if w < 0 || i - 1 > w
-        return 0
-    end
-
-    bloom_mask = UInt64(0)
-    skip = n - 1
-    tlast = t[end]
-    for j in 1:n
-        bloom_mask |= _search_bloom_mask(t[j])
-        if t[j] == tlast && j < n
-            skip = n - j - 1
-        end
-    end
-
-    i -= 1
-    while i <= w
-        if s[i+n] == tlast
-            # check candidate
-            j = 0
-            while j < n - 1
-                if s[i+j+1] != t[j+1]
-                    break
-                end
-                j += 1
-            end
-
-            # match found
-            if j == n - 1
-                return i+1
-            end
-
-            # no match, try to rule out the next character
-            if i < w && bloom_mask & _search_bloom_mask(s[i+n+1]) == 0
-                i += n
-            else
-                i += skip
-            end
-        elseif i < w
-            if bloom_mask & _search_bloom_mask(s[i+n+1]) == 0
-                i += n
-            end
-        end
-        i += 1
-    end
-
-    0
-end
-
-typealias ByteArray Union{Vector{UInt8},Vector{Int8}}
-
-searchindex(s::ByteArray, t::ByteArray, i) = _searchindex(s,t,i)
-searchindex(s::AbstractString, t::AbstractString, i::Integer) = _searchindex(s,t,i)
-searchindex(s::AbstractString, t::AbstractString) = searchindex(s,t,start(s))
-searchindex(s::AbstractString, c::Char, i::Integer) = _searchindex(s,c,i)
-searchindex(s::AbstractString, c::Char) = searchindex(s,c,start(s))
-
-function searchindex(s::ByteString, t::ByteString, i::Integer=1)
-    # Check for fast case of a single byte
-    # (for multi-byte UTF-8 sequences, use searchindex on byte arrays instead)
-    if endof(t) == 1
-        search(s, t[1], i)
-    else
-        searchindex(s.data, t.data, i)
-    end
-end
-
-function search(s::ByteArray, t::ByteArray, i)
-    idx = searchindex(s,t,i)
-    if isempty(t)
-        idx:idx-1
-    else
-        idx:(idx > 0 ? idx + endof(t) - 1 : -1)
-    end
-end
-
-function search(s::AbstractString, t::AbstractString, i::Integer=start(s))
-    idx = searchindex(s,t,i)
-    if isempty(t)
-        idx:idx-1
-    else
-        idx:(idx > 0 ? idx + endof(t) - 1 : -1)
-    end
-end
-
-function rsearch(s::AbstractString, c::Chars)
-    j = search(RevString(s), c)
-    j == 0 && return 0
-    endof(s)-j+1
-end
-
-function rsearch(s::AbstractString, c::Chars, i::Integer)
-    e = endof(s)
-    j = search(RevString(s), c, e-i+1)
-    j == 0 && return 0
-    e-j+1
-end
-
-function _rsearchindex(s, t, i)
-    if isempty(t)
-        return 1 <= i <= nextind(s,endof(s)) ? i :
-               throw(BoundsError(s, i))
-    end
-    t = RevString(t)
-    rs = RevString(s)
-    l = endof(s)
-    t1, j2 = next(t,start(t))
-    while true
-        i = rsearch(s,t1,i)
-        if i == 0 return 0 end
-        c, ii = next(rs,l-i+1)
-        j = j2; k = ii
-        matched = true
-        while !done(t,j)
-            if done(rs,k)
-                matched = false
-                break
-            end
-            c, k = next(rs,k)
-            d, j = next(t,j)
-            if c != d
-                matched = false
-                break
-            end
-        end
-        if matched
-            return nextind(s,l-k+1)
-        end
-        i = l-ii+1
-    end
-end
-
-function _rsearchindex(s::Array, t::Array, k)
-    n = length(t)
-    m = length(s)
-
-    if n == 0
-        return 0 <= k <= m ? max(k, 1) : 0
-    elseif m == 0
-        return 0
-    elseif n == 1
-        return rsearch(s, t[1], k)
-    end
-
-    w = m - n
-    if w < 0 || k <= 0
-        return 0
-    end
-
-    bloom_mask = UInt64(0)
-    skip = n - 1
-    tfirst = t[1]
-    for j in n:-1:1
-        bloom_mask |= _search_bloom_mask(t[j])
-        if t[j] == tfirst && j > 1
-            skip = j - 2
-        end
-    end
-
-    i = min(k - n + 1, w + 1)
-    while i > 0
-        if s[i] == tfirst
-            # check candidate
-            j = 1
-            while j < n
-                if s[i+j] != t[j+1]
-                    break
-                end
-                j += 1
-            end
-
-            # match found
-            if j == n
-                return i
-            end
-
-            # no match, try to rule out the next character
-            if i > 1 && bloom_mask & _search_bloom_mask(s[i-1]) == 0
-                i -= n
-            else
-                i -= skip
-            end
-        elseif i > 1
-            if bloom_mask & _search_bloom_mask(s[i-1]) == 0
-                i -= n
-            end
-        end
-        i -= 1
-    end
-
-    0
-end
-
-rsearchindex(s::ByteArray,t::ByteArray,i) = _rsearchindex(s,t,i)
-rsearchindex(s::AbstractString, t::AbstractString, i::Integer) = _rsearchindex(s,t,i)
-rsearchindex(s::AbstractString, t::AbstractString) = (isempty(s) && isempty(t)) ? 1 : rsearchindex(s,t,endof(s))
-
-function rsearchindex(s::ByteString, t::ByteString)
-    # Check for fast case of a single byte
-    # (for multi-byte UTF-8 sequences, use rsearchindex instead)
-    if endof(t) == 1
-        rsearch(s, t[1])
-    else
-        _rsearchindex(s.data, t.data, length(s.data))
-    end
-end
-
-function rsearchindex(s::ByteString, t::ByteString, i::Integer)
-    # Check for fast case of a single byte
-    # (for multi-byte UTF-8 sequences, use rsearchindex instead)
-    if endof(t) == 1
-        rsearch(s, t[1], i)
-    elseif endof(t) != 0
-        _rsearchindex(s.data, t.data, nextind(s, i)-1)
-    elseif i > sizeof(s)
-        return 0
-    elseif i == 0
-        return 1
-    else
-        return i
-    end
-end
-
-function rsearch(s::ByteArray, t::ByteArray, i::Integer)
-    idx = rsearchindex(s,t,i)
-    if isempty(t)
-        idx:idx-1
-    else
-        idx:(idx > 0 ? idx + endof(t) - 1 : -1)
-    end
-end
-
-function rsearch(s::AbstractString, t::AbstractString, i::Integer=endof(s))
-    idx = rsearchindex(s,t,i)
-    if isempty(t)
-        idx:idx-1
-    else
-        idx:(idx > 0 ? idx + endof(t) - 1 : -1)
-    end
-end
-
-contains(haystack::AbstractString, needle::AbstractString) = searchindex(haystack,needle)!=0
-
-in(::AbstractString, ::AbstractString) = error("use contains(x,y) for string containment")
-
-function cmp(a::AbstractString, b::AbstractString)
-    if a === b
-        return 0
-    end
-    i = start(a)
-    j = start(b)
-    while !done(a,i) && !done(b,i)
-        c, i = next(a,i)
-        d, j = next(b,j)
-        if c != d
-            return c < d ? -1 : +1
-        end
-    end
-    done(a,i) && !done(b,j) ? -1 :
-    !done(a,i) && done(b,j) ? +1 : 0
-end
-
-==(a::AbstractString, b::AbstractString) = cmp(a,b) == 0
-isless(a::AbstractString, b::AbstractString) = cmp(a,b) < 0
-
-# starts with and ends with predicates
-
-function startswith(a::AbstractString, b::AbstractString)
-    i = start(a)
-    j = start(b)
-    while !done(a,i) && !done(b,i)
-        c, i = next(a,i)
-        d, j = next(b,j)
-        if c != d return false end
-    end
-    done(b,i)
-end
-startswith(str::AbstractString, chars::Chars) = !isempty(str) && str[start(str)] in chars
-
-function endswith(a::AbstractString, b::AbstractString)
-    i = endof(a)
-    j = endof(b)
-    a1 = start(a)
-    b1 = start(b)
-    while a1 <= i && b1 <= j
-        c = a[i]
-        d = b[j]
-        if c != d return false end
-        i = prevind(a,i)
-        j = prevind(b,j)
-    end
-    j < b1
-end
-endswith(str::AbstractString, chars::Chars) = !isempty(str) && str[end] in chars
-
-# faster comparisons for byte strings and symbols
-
-cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data)
-cmp(a::Symbol, b::Symbol) = Int(sign(ccall(:strcmp, Int32, (Cstring, Cstring), a, b)))
-
-==(a::ByteString, b::ByteString) = endof(a) == endof(b) && cmp(a,b) == 0
-isless(a::Symbol, b::Symbol) = cmp(a,b) < 0
-
-startswith(a::ByteString, b::ByteString) = startswith(a.data, b.data)
-startswith(a::Vector{UInt8}, b::Vector{UInt8}) =
-    (length(a) >= length(b) && ccall(:strncmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, length(b)) == 0)
-
-# TODO: fast endswith
-
-## character column width function ##
-
-strwidth(s::AbstractString) = (w=0; for c in s; w += charwidth(c); end; w)
-
-isascii(c::Char) = c < Char(0x80)
-isascii(s::AbstractString) = all(isascii, s)
-isascii(s::ASCIIString) = true
-
-## substrings reference original strings ##
-
-immutable SubString{T<:AbstractString} <: AbstractString
-    string::T
-    offset::Int
-    endof::Int
-
-    function SubString(s::T, i::Int, j::Int)
-        if i > endof(s) || j<i
-            return new(s, i-1, 0)
-        else
-            if !isvalid(s,i)
-                throw(ArgumentError("invalid SubString index"))
-            end
-
-            while !isvalid(s,j) && j > i
-                j -= 1
-            end
-
-            o = i-1
-            new(s, o, max(0, j-o))
-        end
-    end
-end
-SubString{T<:AbstractString}(s::T, i::Int, j::Int) = SubString{T}(s, i, j)
-SubString(s::SubString, i::Int, j::Int) = SubString(s.string, s.offset+i, s.offset+j)
-SubString(s::AbstractString, i::Integer, j::Integer) = SubString(s, Int(i), Int(j))
-SubString(s::AbstractString, i::Integer) = SubString(s, i, endof(s))
-
-write{T<:ByteString}(to::AbstractIOBuffer, s::SubString{T}) =
-    s.endof==0 ? 0 : write_sub(to, s.string.data, s.offset + 1, nextind(s, s.endof) - 1)
-
-sizeof(s::SubString{ASCIIString}) = s.endof
-sizeof(s::SubString{UTF8String}) = s.endof == 0 ? 0 : nextind(s, s.endof) - 1
-
-# TODO: length(s::SubString) = ??
-# default implementation will work but it's slow
-# can this be delegated efficiently somehow?
-# that may require additional string interfaces
-length{T<:DirectIndexString}(s::SubString{T}) = endof(s)
-
-function length(s::SubString{UTF8String})
-    return s.endof==0 ? 0 : Int(ccall(:u8_charnum, Csize_t, (Ptr{UInt8}, Csize_t),
-                                      pointer(s), nextind(s, s.endof) - 1))
-end
-
-function next(s::SubString, i::Int)
-    if i < 1 || i > s.endof
-        throw(BoundsError(s, i))
-    end
-    c, i = next(s.string, i+s.offset)
-    c, i-s.offset
-end
-
-function getindex(s::SubString, i::Int)
-    if i < 1 || i > s.endof
-        throw(BoundsError(s, i))
-    end
-    getindex(s.string, i+s.offset)
-end
-
-endof(s::SubString) = s.endof
-
-function isvalid(s::SubString, i::Integer)
-    return (start(s) <= i <= endof(s)) && isvalid(s.string, s.offset+i)
-end
-
-isvalid{T<:DirectIndexString}(s::SubString{T}, i::Integer) = (start(s) <= i <= endof(s))
-
-ind2chr{T<:DirectIndexString}(s::SubString{T}, i::Integer) = begin checkbounds(s,i); i end
-chr2ind{T<:DirectIndexString}(s::SubString{T}, i::Integer) = begin checkbounds(s,i); i end
-
-nextind(s::SubString, i::Integer) = nextind(s.string, i+s.offset)-s.offset
-prevind(s::SubString, i::Integer) = prevind(s.string, i+s.offset)-s.offset
-
-convert{T<:AbstractString}(::Type{SubString{T}}, s::T) = SubString(s, 1, endof(s))
-
-bytestring{T <: ByteString}(p::SubString{T}) = bytestring(p.string.data[1+p.offset:p.offset+nextind(p, p.endof)-1])
-
-function getindex(s::AbstractString, r::UnitRange{Int})
-    if first(r) < 1 || endof(s) < last(r)
-        throw(BoundsError(s, r))
-    end
-    SubString(s, first(r), last(r))
-end
-
-isascii(s::SubString{ASCIIString}) = true
-
-function cmp{T<:ByteString,S<:ByteString}(a::SubString{T}, b::SubString{S})
-    na = sizeof(a)
-    nb = sizeof(b)
-    c = ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt),
-              pointer(a), pointer(b), min(na,nb))
-    c < 0 ? -1 : c > 0 ? +1 : cmp(na,nb)
-end
-
-## hashing strings ##
-
-const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
-const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
-
-function hash{T<:ByteString}(s::Union{T,SubString{T}}, h::UInt)
-    h += memhash_seed
-    # note: use pointer(s) here (see #6058).
-    ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), pointer(s), sizeof(s), h % UInt32) + h
-end
-hash(s::AbstractString, h::UInt) = hash(bytestring(s), h)
-
-## efficient representation of repeated strings ##
-
-immutable RepString <: AbstractString
-    string::AbstractString
-    repeat::Integer
-end
-
-function endof(s::RepString)
-    e = endof(s.string)
-    (next(s.string,e)[2]-1) * (s.repeat-1) + e
-end
-length(s::RepString) = length(s.string)*s.repeat
-sizeof(s::RepString) = sizeof(s.string)*s.repeat
-
-function next(s::RepString, i::Int)
-    if i < 1
-        throw(BoundsError(s, i))
-    end
-    e = endof(s.string)
-    sz = next(s.string,e)[2]-1
-
-    r, j = divrem(i-1, sz)
-    j += 1
-
-    if r >= s.repeat || j > e
-        throw(BoundsError(s, i))
-    end
-
-    c, k = next(s.string, j)
-    c, k-j+i
-end
-
-function repeat(s::AbstractString, r::Integer)
-    r <  0 ? throw(ArgumentError("can't repeat a string $r times")) :
-    r == 0 ? "" :
-    r == 1 ? s  :
-    RepString(s,r)
-end
-
-convert(::Type{RepString}, s::AbstractString) = RepString(s,1)
-
-function repeat(s::ByteString, r::Integer)
-    r < 0 && throw(ArgumentError("can't repeat a string $r times"))
-    d = s.data; n = length(d)
-    out = Array(UInt8, n*r)
-    for i=1:r
-        copy!(out, 1+(i-1)*n, d, 1, n)
-    end
-    convert(typeof(s), out)
-end
-
-## reversed strings without data movement ##
-
-immutable RevString{T<:AbstractString} <: AbstractString
-    string::T
-end
-
-endof(s::RevString) = endof(s.string)
-length(s::RevString) = length(s.string)
-sizeof(s::RevString) = sizeof(s.string)
-
-function next(s::RevString, i::Int)
-    n = endof(s); j = n-i+1
-    (s.string[j], n-prevind(s.string,j)+1)
-end
-
-reverse(s::AbstractString) = RevString(s)
-reverse(s::RevString) = s.string
-
-isascii(s::RevString{ASCIIString}) = true
-
-## reverse an index i so that reverse(s)[i] == s[reverseind(s,i)]
-
-reverseind(s::Union{DirectIndexString,SubString{DirectIndexString}}, i::Integer) = length(s) + 1 - i
-reverseind(s::RevString, i::Integer) = endof(s) - i + 1
-lastidx(s::AbstractString) = nextind(s, endof(s)) - 1
-lastidx(s::DirectIndexString) = length(s)
-reverseind(s::SubString, i::Integer) =
-    reverseind(s.string, lastidx(s.string)-s.offset-s.endof+i) - s.offset
-
-## ropes for efficient concatenation, etc. ##
-
-immutable RopeString <: AbstractString
-    head::AbstractString
-    tail::AbstractString
-    depth::Int32
-    endof::Int
-
-    RopeString(h::RopeString, t::RopeString) =
-        strdepth(h.tail) + strdepth(t) < strdepth(h.head) ?
-            RopeString(h.head, RopeString(h.tail, t)) :
-            new(h, t, max(h.depth,t.depth)+1, endof(h)+endof(t))
-
-    RopeString(h::RopeString, t::AbstractString) =
-        strdepth(h.tail) < strdepth(h.head) ?
-            RopeString(h.head, RopeString(h.tail, t)) :
-            new(h, t, h.depth+1, endof(h)+endof(t))
-
-    RopeString(h::AbstractString, t::RopeString) =
-        strdepth(t.head) < strdepth(t.tail) ?
-            RopeString(RopeString(h, t.head), t.tail) :
-            new(h, t, t.depth+1, endof(h)+endof(t))
-
-    RopeString(h::AbstractString, t::AbstractString) =
-        new(h, t, 1, endof(h)+endof(t))
-end
-RopeString(s::AbstractString) = RopeString(s,"")
-
-strdepth(s::AbstractString) = 0
-strdepth(s::RopeString) = s.depth
-
-function next(s::RopeString, i::Int)
-    eh = endof(s.head)
-    if i <= eh
-        return next(s.head, i)
-    else
-        c, j = next(s.tail, i-eh)
-        return c, j+eh
-    end
-end
-
-endof(s::RopeString) = s.endof
-length(s::RopeString) = length(s.head) + length(s.tail)
-write(io::IO, s::RopeString) = (write(io, s.head); write(io, s.tail))
-sizeof(s::RopeString) = sizeof(s.head) + sizeof(s.tail)
-
-## uppercase and lowercase transformations ##
-uppercase(s::AbstractString) = map(uppercase, s)
-lowercase(s::AbstractString) = map(lowercase, s)
-
-function ucfirst(s::AbstractString)
-    isempty(s) || isupper(s[1]) ? s : string(uppercase(s[1]),s[nextind(s,1):end])
-end
-function lcfirst(s::AbstractString)
-    isempty(s) || islower(s[1]) ? s : string(lowercase(s[1]),s[nextind(s,1):end])
-end
-
-## string map, filter, has ##
-
-map_result(s::AbstractString, a::Vector{UInt8}) = UTF8String(a)
-map_result(s::Union{ASCIIString,SubString{ASCIIString}}, a::Vector{UInt8}) = bytestring(a)
-
-function map(f, s::AbstractString)
-    out = IOBuffer(Array(UInt8,endof(s)),true,true)
-    truncate(out,0)
-    for c in s
-        c2 = f(c)
-        if !isa(c2,Char)
-            throw(ArgumentError("map(f,s::AbstractString) requires f to return Char; try map(f,collect(s)) or a comprehension instead"))
-        end
-        write(out, c2::Char)
-    end
-    map_result(s, takebuf_array(out))
-end
-
-function filter(f, s::AbstractString)
-    out = IOBuffer(Array(UInt8,endof(s)),true,true)
-    truncate(out,0)
-    for c in s
-        if f(c)
-            write(out, c)
-        end
-    end
-    takebuf_string(out)
-end
-
-## string promotion rules ##
-
-promote_rule{S<:AbstractString,T<:AbstractString}(::Type{S}, ::Type{T}) = UTF8String
-
-## printing literal quoted string data ##
-
-# this is the inverse of print_unescaped_chars(io, s, "\\\")
-
-function print_quoted_literal(io, s::AbstractString)
-    print(io, '"')
-    for c = s; c == '"' ? print(io, "\\\"") : print(io, c); end
-    print(io, '"')
-end
-
-## string escaping & unescaping ##
-
-escape_nul(s::AbstractString, i::Int) =
-    !done(s,i) && '0' <= next(s,i)[1] <= '7' ? "\\x00" : "\\0"
-
-isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
-isxdigit(s::AbstractString) = all(isxdigit, s)
-need_full_hex(s::AbstractString, i::Int) = !done(s,i) && isxdigit(next(s,i)[1])
-
-function print_escaped(io, s::AbstractString, esc::AbstractString)
-    i = start(s)
-    while !done(s,i)
-        c, j = next(s,i)
-        c == '\0'       ? print(io, escape_nul(s,j)) :
-        c == '\e'       ? print(io, "\\e") :
-        c == '\\'       ? print(io, "\\\\") :
-        c in esc        ? print(io, '\\', c) :
-        '\a' <= c <= '\r' ? print(io, '\\', "abtnvfr"[Int(c)-6]) :
-        isprint(c)      ? print(io, c) :
-        c <= '\x7f'     ? print(io, "\\x", hex(c, 2)) :
-        c <= '\uffff'   ? print(io, "\\u", hex(c, need_full_hex(s,j) ? 4 : 2)) :
-                          print(io, "\\U", hex(c, need_full_hex(s,j) ? 8 : 4))
-        i = j
-    end
-end
-
-escape_string(s::AbstractString) = sprint(endof(s), print_escaped, s, "\"")
-function print_quoted(io, s::AbstractString)
-    print(io, '"')
-    print_escaped(io, s, "\"\$") #"# work around syntax highlighting problem
-    print(io, '"')
-end
-
-# bare minimum unescaping function unescapes only given characters
-
-function print_unescaped_chars(io, s::AbstractString, esc::AbstractString)
-    if !('\\' in esc)
-        esc = string("\\", esc)
-    end
-    i = start(s)
-    while !done(s,i)
-        c, i = next(s,i)
-        if c == '\\' && !done(s,i) && s[i] in esc
-            c, i = next(s,i)
-        end
-        print(io, c)
-    end
-end
-
-unescape_chars(s::AbstractString, esc::AbstractString) =
-    sprint(endof(s), print_unescaped_chars, s, esc)
-
-# general unescaping of traditional C and Unicode escape sequences
-
-function print_unescaped(io, s::AbstractString)
-    i = start(s)
-    while !done(s,i)
-        c, i = next(s,i)
-        if !done(s,i) && c == '\\'
-            c, i = next(s,i)
-            if c == 'x' || c == 'u' || c == 'U'
-                n = k = 0
-                m = c == 'x' ? 2 :
-                    c == 'u' ? 4 : 8
-                while (k+=1) <= m && !done(s,i)
-                    c, j = next(s,i)
-                    n = '0' <= c <= '9' ? n<<4 + c-'0' :
-                        'a' <= c <= 'f' ? n<<4 + c-'a'+10 :
-                        'A' <= c <= 'F' ? n<<4 + c-'A'+10 : break
-                    i = j
-                end
-                if k == 1
-                    throw(ArgumentError("\\x used with no following hex digits in $(repr(s))"))
-                end
-                if m == 2 # \x escape sequence
-                    write(io, UInt8(n))
-                else
-                    print(io, Char(n))
-                end
-            elseif '0' <= c <= '7'
-                k = 1
-                n = c-'0'
-                while (k+=1) <= 3 && !done(s,i)
-                    c, j = next(s,i)
-                    n = ('0' <= c <= '7') ? n<<3 + c-'0' : break
-                    i = j
-                end
-                if n > 255
-                    throw(ArgumentError("octal escape sequence out of range"))
-                end
-                write(io, UInt8(n))
-            else
-                print(io, c == 'a' ? '\a' :
-                          c == 'b' ? '\b' :
-                          c == 't' ? '\t' :
-                          c == 'n' ? '\n' :
-                          c == 'v' ? '\v' :
-                          c == 'f' ? '\f' :
-                          c == 'r' ? '\r' :
-                          c == 'e' ? '\e' : c)
-            end
-        else
-            print(io, c)
-        end
-    end
-end
-
-unescape_string(s::AbstractString) = sprint(endof(s), print_unescaped, s)
-
-## checking UTF-8 & ACSII validity ##
-
-byte_string_classify(data::Vector{UInt8}) =
-    ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), data, length(data))
-byte_string_classify(s::ByteString) = byte_string_classify(s.data)
-    # 0: neither valid ASCII nor UTF-8
-    # 1: valid ASCII
-    # 2: valid UTF-8
-
-isvalid(::Type{ASCIIString}, s::Union{Vector{UInt8},ByteString}) = byte_string_classify(s) == 1
-isvalid(::Type{UTF8String}, s::Union{Vector{UInt8},ByteString}) = byte_string_classify(s) != 0
-
-## multiline strings ##
-
-function blank_width(c::Char)
-    c == ' '   ? 1 :
-    c == '\t'  ? 8 :
-    throw(ArgumentError("$(repr(c)) not a blank character"))
-end
-
-# width of leading blank space, also check if string is blank
-function indentation(s::AbstractString)
-    count = 0
-    for c in s
-        if c == ' ' || c == '\t'
-            count += blank_width(c)
-        else
-            return count, false
-        end
-    end
-    count, true
-end
-
-function unindent(s::AbstractString, indent::Int)
-    indent == 0 && return s
-    buf = IOBuffer(Array(UInt8,endof(s)), true, true)
-    truncate(buf,0)
-    a = i = start(s)
-    cutting = false
-    cut = 0
-    while !done(s,i)
-        c,i_ = next(s,i)
-        if cutting && (c == ' ' || c == '\t')
-            a = i_
-            cut += blank_width(c)
-            if cut == indent
-                cutting = false
-            elseif cut > indent
-                cutting = false
-                for _ = (indent+1):cut write(buf, ' ') end
-            end
-        elseif c == '\n'
-            print(buf, s[a:i])
-            a = i_
-            cutting = true
-            cut = 0
-        else
-            cutting = false
-        end
-        i = i_
-    end
-    print(buf, s[a:end])
-    takebuf_string(buf)
-end
-
-## core string macros ##
-
-macro b_str(s); :($(unescape_string(s)).data); end
-
-## shell-like command parsing ##
-
-function shell_parse(raw::AbstractString, interp::Bool)
-    s = lstrip(raw)
-    #Strips the end but respects the space when the string endswith "\\ "
-    r = RevString(s)
-    i = start(r)
-    c_old = nothing
-    while !done(r,i)
-        c, j = next(r,i)
-        if c == '\\' && c_old == ' '
-            i -= 1
-            break
-        elseif !(c in _default_delims)
-            break
-        end
-        i = j
-        c_old = c
-    end
-    s = s[1:end-i+1]
-
-    last_parse = 0:-1
-    isempty(s) && return interp ? (Expr(:tuple,:()),last_parse) : ([],last_parse)
-
-    in_single_quotes = false
-    in_double_quotes = false
-
-    args::Vector{Any} = []
-    arg::Vector{Any} = []
-    i = start(s)
-    j = i
-
-    function update_arg(x)
-        if !isa(x,AbstractString) || !isempty(x)
-            push!(arg, x)
-        end
-    end
-    function append_arg()
-        if isempty(arg); arg = Any["",]; end
-        push!(args, arg)
-        arg = []
-    end
-
-    while !done(s,j)
-        c, k = next(s,j)
-        if !in_single_quotes && !in_double_quotes && isspace(c)
-            update_arg(s[i:j-1])
-            append_arg()
-            j = k
-            while !done(s,j)
-                c, k = next(s,j)
-                if !isspace(c)
-                    i = j
-                    break
-                end
-                j = k
-            end
-        elseif interp && !in_single_quotes && c == '$'
-            update_arg(s[i:j-1]); i = k; j = k
-            if done(s,k)
-                error("\$ right before end of command")
-            end
-            if isspace(s[k])
-                error("space not allowed right after \$")
-            end
-            stpos = j
-            ex, j = parse(s,j,greedy=false)
-            last_parse = stpos:j
-            update_arg(esc(ex)); i = j
-        else
-            if !in_double_quotes && c == '\''
-                in_single_quotes = !in_single_quotes
-                update_arg(s[i:j-1]); i = k
-            elseif !in_single_quotes && c == '"'
-                in_double_quotes = !in_double_quotes
-                update_arg(s[i:j-1]); i = k
-            elseif c == '\\'
-                if in_double_quotes
-                    if done(s,k)
-                        error("unterminated double quote")
-                    end
-                    if s[k] == '"' || s[k] == '$'
-                        update_arg(s[i:j-1]); i = k
-                        c, k = next(s,k)
-                    end
-                elseif !in_single_quotes
-                    if done(s,k)
-                        error("dangling backslash")
-                    end
-                    update_arg(s[i:j-1]); i = k
-                    c, k = next(s,k)
-                end
-            end
-            j = k
-        end
-    end
-
-    if in_single_quotes; error("unterminated single quote"); end
-    if in_double_quotes; error("unterminated double quote"); end
-
-    update_arg(s[i:end])
-    append_arg()
-
-    if !interp
-        return (args,last_parse)
-    end
-
-    # construct an expression
-    ex = Expr(:tuple)
-    for arg in args
-        push!(ex.args, Expr(:tuple, arg...))
-    end
-    (ex,last_parse)
-end
-shell_parse(s::AbstractString) = shell_parse(s,true)
-
-function shell_split(s::AbstractString)
-    parsed = shell_parse(s,false)[1]
-    args = AbstractString[]
-    for arg in parsed
-       push!(args, string(arg...))
-    end
-    args
-end
-
-function print_shell_word(io::IO, word::AbstractString)
-    if isempty(word)
-        print(io, "''")
-    end
-    has_single = false
-    has_special = false
-    for c in word
-        if isspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
-            has_special = true
-            if c == '\''
-                has_single = true
-            end
-        end
-    end
-    if !has_special
-        print(io, word)
-    elseif !has_single
-        print(io, '\'', word, '\'')
-    else
-        print(io, '"')
-        for c in word
-            if c == '"' || c == '$'
-                print(io, '\\')
-            end
-            print(io, c)
-        end
-        print(io, '"')
-    end
-end
-
-function print_shell_escaped(io::IO, cmd::AbstractString, args::AbstractString...)
-    print_shell_word(io, cmd)
-    for arg in args
-        print(io, ' ')
-        print_shell_word(io, arg)
-    end
-end
-print_shell_escaped(io::IO) = nothing
-
-shell_escape(args::AbstractString...) = sprint(print_shell_escaped, args...)
-
-## interface to parser ##
-
-function parse(str::AbstractString, pos::Int; greedy::Bool=true, raise::Bool=true)
-    # returns (expr, end_pos). expr is () in case of parse error.
-    bstr = bytestring(str)
-    ex, pos = ccall(:jl_parse_string, Any,
-                    (Ptr{UInt8}, Csize_t, Int32, Int32),
-                    bstr, sizeof(bstr), pos-1, greedy ? 1:0)
-    if raise && isa(ex,Expr) && is(ex.head,:error)
-        throw(ParseError(ex.args[1]))
-    end
-    if ex == ()
-        raise && throw(ParseError("end of input"))
-        ex = Expr(:error, "end of input")
-    end
-    ex, pos+1 # C is zero-based, Julia is 1-based
-end
-
-function parse(str::AbstractString; raise::Bool=true)
-    ex, pos = parse(str, start(str), greedy=true, raise=raise)
-    if isa(ex,Expr) && ex.head === :error
-        return ex
-    end
-    if !done(str, pos)
-        raise && throw(ParseError("extra token after end of expression"))
-        return Expr(:error, "extra token after end of expression")
-    end
-    return ex
-end
-
-## miscellaneous string functions ##
-
-function lpad(s::AbstractString, n::Integer, p::AbstractString=" ")
-    m = n - strwidth(s)
-    if m <= 0; return s; end
-    l = strwidth(p)
-    if l==1
-        return bytestring(p^m * s)
-    end
-    q = div(m,l)
-    r = m - q*l
-    i = r != 0 ? chr2ind(p, r) : -1
-    bytestring(p^q*p[1:i]*s)
-end
-
-function rpad(s::AbstractString, n::Integer, p::AbstractString=" ")
-    m = n - strwidth(s)
-    if m <= 0; return s; end
-    l = strwidth(p)
-    if l==1
-        return bytestring(s * p^m)
-    end
-    q = div(m,l)
-    r = m - q*l
-    i = r != 0 ? chr2ind(p, r) : -1
-    bytestring(s*p^q*p[1:i])
-end
-
-lpad(s, n::Integer, p=" ") = lpad(string(s),n,string(p))
-rpad(s, n::Integer, p=" ") = rpad(string(s),n,string(p))
-cpad(s, n::Integer, p=" ") = rpad(lpad(s,div(n+strwidth(s),2),p),n,p)
-
-
-# splitter can be a Char, Vector{Char}, AbstractString, Regex, ...
-# any splitter that provides search(s::AbstractString, splitter)
-split{T<:SubString}(str::T, splitter; limit::Integer=0, keep::Bool=true) = _split(str, splitter, limit, keep, T[])
-split{T<:AbstractString}(str::T, splitter; limit::Integer=0, keep::Bool=true) = _split(str, splitter, limit, keep, SubString{T}[])
-function _split{T<:AbstractString,U<:Array}(str::T, splitter, limit::Integer, keep_empty::Bool, strs::U)
-    i = start(str)
-    n = endof(str)
-    r = search(str,splitter,i)
-    j, k = first(r), nextind(str,last(r))
-    while 0 < j <= n && length(strs) != limit-1
-        if i < k
-            if keep_empty || i < j
-                push!(strs, SubString(str,i,prevind(str,j)))
-            end
-            i = k
-        end
-        if k <= j; k = nextind(str,j) end
-        r = search(str,splitter,k)
-        j, k = first(r), nextind(str,last(r))
-    end
-    if keep_empty || !done(str,i)
-        push!(strs, SubString(str,i))
-    end
-    return strs
-end
-
-# a bit oddball, but standard behavior in Perl, Ruby & Python:
-const _default_delims = [' ','\t','\n','\v','\f','\r']
-split(str::AbstractString) = split(str, _default_delims; limit=0, keep=false)
-
-rsplit{T<:SubString}(str::T, splitter; limit::Integer=0, keep::Bool=true) = _rsplit(str, splitter, limit, keep, T[])
-rsplit{T<:AbstractString}(str::T, splitter   ; limit::Integer=0, keep::Bool=true) = _rsplit(str, splitter, limit, keep, SubString{T}[])
-function _rsplit{T<:AbstractString,U<:Array}(str::T, splitter, limit::Integer, keep_empty::Bool, strs::U)
-    i = start(str)
-    n = endof(str)
-    r = rsearch(str,splitter)
-    j = first(r)-1
-    k = last(r)
-    while((0 <= j < n) && (length(strs) != limit-1))
-        if i <= k
-            (keep_empty || (k < n)) && unshift!(strs, SubString(str,k+1,n))
-            n = j
-        end
-        (k <= j) && (j = prevind(str,j))
-        r = rsearch(str,splitter,j)
-        j = first(r)-1
-        k = last(r)
-    end
-    (keep_empty || (n > 0)) && unshift!(strs, SubString(str,1,n))
-    return strs
-end
-#rsplit(str::AbstractString) = rsplit(str, _default_delims, 0, false)
-
-_replacement(repl, str, j, k) = repl
-_replacement(repl::Function, str, j, k) = repl(SubString(str, j, k))
-
-function replace(str::ByteString, pattern, repl, limit::Integer)
-    n = 1
-    e = endof(str)
-    i = a = start(str)
-    r = search(str,pattern,i)
-    j, k = first(r), last(r)
-    out = IOBuffer()
-    while j != 0
-        if i == a || i <= k
-            write_sub(out, str.data, i, j-i)
-            write(out, _replacement(repl, str, j, k))
-        end
-        if k<j
-            i = j
-            k = nextind(str, j)
-        else
-            i = k = nextind(str, k)
-        end
-        if j > e
-            break
-        end
-        r = search(str,pattern,k)
-        j, k = first(r), last(r)
-        n == limit && break
-        n += 1
-    end
-    write(out, SubString(str,i))
-    takebuf_string(out)
-end
-replace(s::AbstractString, pat, f, n::Integer) = replace(bytestring(s), pat, f, n)
-replace(s::AbstractString, pat, r) = replace(s, pat, r, 0)
-
-function print_joined(io, strings, delim, last)
-    i = start(strings)
-    if done(strings,i)
-        return
-    end
-    str, i = next(strings,i)
-    print(io, str)
-    is_done = done(strings,i)
-    while !is_done
-        str, i = next(strings,i)
-        is_done = done(strings,i)
-        print(io, is_done ? last : delim)
-        print(io, str)
-    end
-end
-
-function print_joined(io, strings, delim)
-    i = start(strings)
-    is_done = done(strings,i)
-    while !is_done
-        str, i = next(strings,i)
-        is_done = done(strings,i)
-        print(io, str)
-        if !is_done
-            print(io, delim)
-        end
-    end
-end
-print_joined(io, strings) = print_joined(io, strings, "")
-
-join(args...) = sprint(print_joined, args...)
-
-chop(s::AbstractString) = s[1:end-1]
-
-function chomp(s::AbstractString)
-    i = endof(s)
-    if (i < 1 || s[i] != '\n') return s end
-    j = prevind(s,i)
-    if (j < 1 || s[j] != '\r') return s[1:i-1] end
-    return s[1:j-1]
-end
-chomp(s::ByteString) =
-    (endof(s) < 1 || s.data[end]   != 0x0a) ? s :
-    (endof(s) < 2 || s.data[end-1] != 0x0d) ? s[1:end-1] : s[1:end-2]
-
-# NOTE: use with caution -- breaks the immutable string convention!
-function chomp!(s::ByteString)
-    if !isempty(s) && s.data[end] == 0x0a
-        n = (endof(s) < 2 || s.data[end-1] != 0x0d) ? 1 : 2
-        ccall(:jl_array_del_end, Void, (Any, UInt), s.data, n)
-    end
-    return s
-end
-chomp!(s::AbstractString) = chomp(s) # copying fallback for other string types
-
-function lstrip(s::AbstractString, chars::Chars=_default_delims)
-    i = start(s)
-    while !done(s,i)
-        c, j = next(s,i)
-        if !(c in chars)
-            return s[i:end]
-        end
-        i = j
-    end
-    ""
-end
-
-function rstrip(s::AbstractString, chars::Chars=_default_delims)
-    r = RevString(s)
-    i = start(r)
-    while !done(r,i)
-        c, j = next(r,i)
-        if !(c in chars)
-            return s[1:end-i+1]
-        end
-        i = j
-    end
-    ""
-end
-
-strip(s::AbstractString) = lstrip(rstrip(s))
-strip(s::AbstractString, chars::Chars) = lstrip(rstrip(s, chars), chars)
-
-## string to integer functions ##
-
-function parse{T<:Integer}(::Type{T}, c::Char, base::Integer=36)
-    a::Int = (base <= 36 ? 10 : 36)
-    2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
-    d = '0' <= c <= '9' ? c-'0'    :
-        'A' <= c <= 'Z' ? c-'A'+10 :
-        'a' <= c <= 'z' ? c-'a'+a  : throw(ArgumentError("invalid digit: $(repr(c))"))
-    d < base || throw(ArgumentError("invalid base $base digit $(repr(c))"))
-    convert(T, d)
-end
-
-function parseint_next(s::AbstractString, startpos::Int, endpos::Int)
-    (0 < startpos <= endpos) || (return Char(0), 0, 0)
-    j = startpos
-    c, startpos = next(s,startpos)
-    c, startpos, j
-end
-
-function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos::Int, endpos::Int)
-    c, i, j = parseint_next(s, startpos, endpos)
-
-    while isspace(c)
-        c, i, j = parseint_next(s,i,endpos)
-    end
-    (j == 0) && (return 0, 0, 0)
-
-    sgn = 1
-    if signed
-        if c == '-' || c == '+'
-            (c == '-') && (sgn = -1)
-            c, i, j = parseint_next(s,i,endpos)
-        end
-    end
-
-    while isspace(c)
-        c, i, j = parseint_next(s,i,endpos)
-    end
-    (j == 0) && (return 0, 0, 0)
-
-    if base == 0
-        if c == '0' && !done(s,i)
-            c, i = next(s,i)
-            base = c=='b' ? 2 : c=='o' ? 8 : c=='x' ? 16 : 10
-            if base != 10
-                c, i, j = parseint_next(s,i,endpos)
-            end
-        else
-            base = 10
-        end
-    end
-    return sgn, base, j
-end
-
-function tryparse_internal{S<:ByteString}(::Type{Bool}, sbuff::S, startpos::Int, endpos::Int, raise::Bool)
-    len = endpos-startpos+1
-    p = pointer(sbuff)+startpos-1
-    (len == 4) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "true", 4)) && (return Nullable(true))
-    (len == 5) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "false", 5)) && (return Nullable(false))
-    raise && throw(ArgumentError("invalid Bool representation: $(repr(SubString(s,startpos,endpos)))"))
-    Nullable{Bool}()
-end
-
-safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
-safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 >   0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
-                                      (n2 <  -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
-                                      ((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)
-
-function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, a::Int, raise::Bool)
-    _n = Nullable{T}()
-    sgn, base, i = parseint_preamble(T<:Signed, base, s, startpos, endpos)
-    if i == 0
-        raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
-        return _n
-    end
-    c, i = parseint_next(s,i,endpos)
-    if i == 0
-        raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
-        return _n
-    end
-
-    base = convert(T,base)
-    m::T = div(typemax(T)-base+1,base)
-    n::T = 0
-    while n <= m
-        d::T = '0' <= c <= '9' ? c-'0'    :
-               'A' <= c <= 'Z' ? c-'A'+10 :
-               'a' <= c <= 'z' ? c-'a'+a  : base
-        if d >= base
-            raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
-            return _n
-        end
-        n *= base
-        n += d
-        if i > endpos
-            n *= sgn
-            return Nullable{T}(n)
-        end
-        c, i = next(s,i)
-        isspace(c) && break
-    end
-    (T <: Signed) && (n *= sgn)
-    while !isspace(c)
-        d::T = '0' <= c <= '9' ? c-'0'    :
-        'A' <= c <= 'Z' ? c-'A'+10 :
-            'a' <= c <= 'z' ? c-'a'+a  : base
-        if d >= base
-            raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
-            return _n
-        end
-        (T <: Signed) && (d *= sgn)
-
-        safe_n = safe_mul(n, base)
-        isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
-        if isnull(safe_n)
-            raise && throw(OverflowError())
-            return _n
-        end
-        n = get(safe_n)
-        (i > endpos) && return Nullable{T}(n)
-        c, i = next(s,i)
-    end
-    while i <= endpos
-        c, i = next(s,i)
-        if !isspace(c)
-            raise && throw(ArgumentError("extra characters after whitespace in $(repr(SubString(s,startpos,endpos)))"))
-            return _n
-        end
-    end
-    return Nullable{T}(n)
-end
-tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
-    tryparse_internal(T,s,start(s),endof(s),base,raise)
-tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, raise::Bool) =
-    tryparse_internal(T, s, startpos, endpos, base, base <= 36 ? 10 : 36, raise)
-tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
-    2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
-tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)
-
-function parse{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
-    (2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
-    get(tryparse_internal(T, s, base, true))
-end
-parse{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))
-
-## stringifying integers more efficiently ##
-
-string(x::Union{Int8,Int16,Int32,Int64,Int128}) = dec(x)
-
-## string to float functions ##
-
-tryparse(::Type{Float64}, s::ByteString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
-tryparse{T<:ByteString}(::Type{Float64}, s::SubString{T}) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
-
-tryparse(::Type{Float32}, s::ByteString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
-tryparse{T<:ByteString}(::Type{Float32}, s::SubString{T}) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
-
-tryparse{T<:Union{Float32,Float64}}(::Type{T}, s::AbstractString) = tryparse(T, bytestring(s))
-
-function parse{T<:FloatingPoint}(::Type{T}, s::AbstractString)
-    nf = tryparse(T, s)
-    isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
-end
-
-float(x::AbstractString) = parse(Float64,x)
-
-float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)
-
-# find the index of the first occurrence of a value in a byte array
-
-function search(a::ByteArray, b::Union{Int8,UInt8}, i::Integer)
-    if i < 1
-        throw(BoundsError(a, i))
-    end
-    n = length(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
-    end
-    p = pointer(a)
-    q = ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-1, b, n-i+1)
-    q == C_NULL ? 0 : Int(q-p+1)
-end
-function search(a::ByteArray, b::Char, i::Integer)
-    if isascii(b)
-        search(a,UInt8(b),i)
-    else
-        search(a,string(b).data,i).start
-    end
-end
-search(a::ByteArray, b::Union{Int8,UInt8,Char}) = search(a,b,1)
-
-function rsearch(a::ByteArray, b::Union{Int8,UInt8}, i::Integer)
-    if i < 1
-        return i == 0 ? 0 : throw(BoundsError(a, i))
-    end
-    n = length(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
-    end
-    p = pointer(a)
-    q = ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i)
-    q == C_NULL ? 0 : Int(q-p+1)
-end
-function rsearch(a::ByteArray, b::Char, i::Integer)
-    if isascii(b)
-        rsearch(a,UInt8(b),i)
-    else
-        rsearch(a,string(b).data,i).start
-    end
-end
-rsearch(a::ByteArray, b::Union{Int8,UInt8,Char}) = rsearch(a,b,length(a))
-
-function hex2bytes(s::ASCIIString)
-    len = length(s)
-    iseven(len) || throw(ArgumentError("string length must be even: length($(repr(s))) == $len"))
-    arr = zeros(UInt8, div(len,2))
-    i = j = 0
-    while i < len
-        n = 0
-        c = s[i+=1]
-        n = '0' <= c <= '9' ? c - '0' :
-            'a' <= c <= 'f' ? c - 'a' + 10 :
-            'A' <= c <= 'F' ? c - 'A' + 10 :
-                throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
-        c = s[i+=1]
-        n = '0' <= c <= '9' ? n << 4 + c - '0' :
-            'a' <= c <= 'f' ? n << 4 + c - 'a' + 10 :
-            'A' <= c <= 'F' ? n << 4 + c - 'A' + 10 :
-                throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
-        arr[j+=1] = n
-    end
-    return arr
-end
-
-bytes2hex{T<:UInt8}(arr::Vector{T}) = join([hex(i,2) for i in arr])
-
-function repr(x)
-    s = IOBuffer()
-    showall(s, x)
-    takebuf_string(s)
-end
-
-containsnul(s::AbstractString) = '\0' in s
-containsnul(s::ByteString) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
-containsnul(s::Union{UTF16String,UTF32String}) = findfirst(s.data, 0) != length(s.data)
-
-if sizeof(Cwchar_t) == 2
-    const WString = UTF16String
-    const wstring = utf16
-elseif sizeof(Cwchar_t) == 4
-    const WString = UTF32String
-    const wstring = utf32
-end
-wstring(s::Cwstring) = wstring(box(Ptr{Cwchar_t}, unbox(Cwstring,s)))
-
-# Cwstring is defined in c.jl, but conversion needs to be defined here
-# to have WString
-function unsafe_convert(::Type{Cwstring}, s::WString)
-    if containsnul(s)
-        throw(ArgumentError("embedded NUL chars are not allowed in C strings: $(repr(s))"))
-    end
-    return Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
-end
-
-# pointer conversions of ASCII/UTF8/UTF16/UTF32 strings:
-pointer(x::Union{ByteString,UTF16String,UTF32String}) = pointer(x.data)
-pointer{T<:ByteString}(x::SubString{T}) = pointer(x.string.data) + x.offset
-pointer(x::ByteString, i::Integer) = pointer(x.data)+(i-1)
-pointer{T<:ByteString}(x::SubString{T}, i::Integer) = pointer(x.string.data) + x.offset + (i-1)
-pointer(x::Union{UTF16String,UTF32String}, i::Integer) = pointer(x)+(i-1)*sizeof(eltype(x.data))
-pointer{T<:Union{UTF16String,UTF32String}}(x::SubString{T}) = pointer(x.string.data) + x.offset*sizeof(eltype(x.data))
-pointer{T<:Union{UTF16String,UTF32String}}(x::SubString{T}, i::Integer) = pointer(x.string.data) + (x.offset + (i-1))*sizeof(eltype(x.data))
-
-# IOBuffer views of a (byte)string:
-IOBuffer(str::ByteString) = IOBuffer(str.data)
-IOBuffer{T<:ByteString}(s::SubString{T}) = IOBuffer(sub(s.string.data, s.offset + 1 : s.offset + sizeof(s)))
+include("strings/types.jl")
+include("strings/basic.jl")
+include("strings/search.jl")
+include("strings/util.jl")
+include("strings/io.jl")
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
new file mode 100644
index 0000000000000..3a1ae110976ae
--- /dev/null
+++ b/base/strings/basic.jl
@@ -0,0 +1,270 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+## core string functions ##
+
+endof(s::AbstractString) = error("you must implement endof(", typeof(s), ")")
+next(s::AbstractString, i::Int) = error("you must implement next(", typeof(s), ",Int)")
+next(s::DirectIndexString, i::Int) = (s[i],i+1)
+next(s::AbstractString, i::Integer) = next(s,Int(i))
+
+string() = ""
+string(s::AbstractString) = s
+
+bytestring() = ""
+bytestring(s::Vector{UInt8}) = bytestring(pointer(s),length(s))
+
+function bytestring(p::Union{Ptr{UInt8},Ptr{Int8}})
+    p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) :
+    ccall(:jl_cstr_to_string, ByteString, (Ptr{UInt8},), p)
+end
+bytestring(s::Cstring) = bytestring(box(Ptr{Cchar}, unbox(Cstring,s)))
+
+function bytestring(p::Union{Ptr{UInt8},Ptr{Int8}},len::Integer)
+    p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) :
+    ccall(:jl_pchar_to_string, ByteString, (Ptr{UInt8},Int), p, len)
+end
+
+convert(::Type{Vector{UInt8}}, s::AbstractString) = bytestring(s).data
+convert(::Type{Array{UInt8}}, s::AbstractString) = bytestring(s).data
+convert(::Type{ByteString}, s::AbstractString) = bytestring(s)
+convert(::Type{Vector{Char}}, s::AbstractString) = collect(s)
+convert(::Type{Symbol}, s::AbstractString) = symbol(s)
+
+## generic supplied functions ##
+
+start(s::AbstractString) = 1
+done(s::AbstractString,i) = (i > endof(s))
+getindex(s::AbstractString, i::Int) = next(s,i)[1]
+getindex(s::AbstractString, i::Integer) = s[Int(i)]
+getindex(s::AbstractString, x::Real) = s[to_index(x)]
+getindex{T<:Integer}(s::AbstractString, r::UnitRange{T}) = s[Int(first(r)):Int(last(r))]
+# TODO: handle other ranges with stride ±1 specially?
+getindex(s::AbstractString, v::AbstractVector) =
+    sprint(length(v), io->(for i in v write(io,s[i]) end))
+
+symbol(s::AbstractString) = symbol(bytestring(s))
+
+sizeof(s::AbstractString) = error("type $(typeof(s)) has no canonical binary representation")
+
+eltype{T<:AbstractString}(::Type{T}) = Char
+
+(*)(s1::AbstractString, ss::AbstractString...) = string(s1, ss...)
+
+length(s::DirectIndexString) = endof(s)
+function length(s::AbstractString)
+    i = start(s)
+    if done(s,i)
+        return 0
+    end
+    n = 1
+    while true
+        c, j = next(s,i)
+        if done(s,j)
+            return n
+        end
+        n += 1
+        i = j
+    end
+end
+
+## String comparison functions ##
+
+function cmp(a::AbstractString, b::AbstractString)
+    if a === b
+        return 0
+    end
+    i = start(a)
+    j = start(b)
+    while !done(a,i) && !done(b,i)
+        c, i = next(a,i)
+        d, j = next(b,j)
+        if c != d
+            return c < d ? -1 : +1
+        end
+    end
+    done(a,i) && !done(b,j) ? -1 :
+    !done(a,i) && done(b,j) ? +1 : 0
+end
+
+==(a::AbstractString, b::AbstractString) = cmp(a,b) == 0
+isless(a::AbstractString, b::AbstractString) = cmp(a,b) < 0
+
+# faster comparisons for byte strings and symbols
+
+cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data)
+cmp(a::Symbol, b::Symbol) = Int(sign(ccall(:strcmp, Int32, (Cstring, Cstring), a, b)))
+
+==(a::ByteString, b::ByteString) = endof(a) == endof(b) && cmp(a,b) == 0
+isless(a::Symbol, b::Symbol) = cmp(a,b) < 0
+
+## Generic validation functions ##
+
+isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= endof(s))
+function isvalid(s::AbstractString, i::Integer)
+    i < 1 && return false
+    done(s,i) && return false
+    try
+        next(s,i)
+        true
+    catch
+        false
+    end
+end
+
+## Generic indexing functions ##
+
+prevind(s::DirectIndexString, i::Integer) = i-1
+prevind(s::AbstractArray   , i::Integer) = i-1
+nextind(s::DirectIndexString, i::Integer) = i+1
+nextind(s::AbstractArray   , i::Integer) = i+1
+
+function prevind(s::AbstractString, i::Integer)
+    e = endof(s)
+    if i > e
+        return e
+    end
+    j = i-1
+    while j >= 1
+        if isvalid(s,j)
+            return j
+        end
+        j -= 1
+    end
+    return 0 # out of range
+end
+
+function nextind(s::AbstractString, i::Integer)
+    e = endof(s)
+    if i < 1
+        return 1
+    end
+    if i > e
+        return i+1
+    end
+    for j = i+1:e
+        if isvalid(s,j)
+            return j
+        end
+    end
+    next(s,e)[2] # out of range
+end
+
+checkbounds(s::AbstractString, i::Integer) = start(s) <= i <= endof(s) || throw(BoundsError(s, i))
+checkbounds(s::AbstractString, i::Real) = checkbounds(s, to_index(i))
+checkbounds{T<:Integer}(s::AbstractString, r::Range{T}) = isempty(r) || (minimum(r) >= start(s) && maximum(r) <= endof(s)) || throw(BoundsError(s, r))
+checkbounds{T<:Real}(s::AbstractString, I::AbstractArray{T}) = all(i -> checkbounds(s, i), I)
+
+ind2chr(s::DirectIndexString, i::Integer) = begin checkbounds(s,i); i end
+chr2ind(s::DirectIndexString, i::Integer) = begin checkbounds(s,i); i end
+
+function ind2chr(s::AbstractString, i::Integer)
+    s[i] # throws error if invalid
+    j = 1
+    k = start(s)
+    while true
+        c, l = next(s,k)
+        if i <= k
+            return j
+        end
+        j += 1
+        k = l
+    end
+end
+
+function chr2ind(s::AbstractString, i::Integer)
+    i < start(s) && throw(BoundsError(s, i))
+    j = 1
+    k = start(s)
+    while true
+        c, l = next(s,k)
+        if i == j
+            return k
+        end
+        j += 1
+        k = l
+    end
+end
+
+immutable EachStringIndex{T<:AbstractString}
+    s::T
+end
+eachindex(s::AbstractString) = EachStringIndex(s)
+
+length(e::EachStringIndex) = length(e.s)
+start(e::EachStringIndex) = start(e.s)
+next(e::EachStringIndex, state) = (state, nextind(e.s, state))
+done(e::EachStringIndex, state) = done(e.s, state)
+eltype(e::EachStringIndex) = Int
+
+typealias Chars Union{Char,AbstractVector{Char},Set{Char}}
+
+typealias ByteArray Union{Vector{UInt8},Vector{Int8}}
+
+## character column width function ##
+
+strwidth(s::AbstractString) = (w=0; for c in s; w += charwidth(c); end; w)
+
+isascii(c::Char) = c < Char(0x80)
+isascii(s::AbstractString) = all(isascii, s)
+isascii(s::ASCIIString) = true
+
+## string promotion rules ##
+
+promote_rule{S<:AbstractString,T<:AbstractString}(::Type{S}, ::Type{T}) = UTF8String
+
+isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
+isxdigit(s::AbstractString) = all(isxdigit, s)
+need_full_hex(s::AbstractString, i::Int) = !done(s,i) && isxdigit(next(s,i)[1])
+
+## checking UTF-8 & ACSII validity ##
+
+byte_string_classify(data::Vector{UInt8}) =
+    ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), data, length(data))
+byte_string_classify(s::ByteString) = byte_string_classify(s.data)
+    # 0: neither valid ASCII nor UTF-8
+    # 1: valid ASCII
+    # 2: valid UTF-8
+
+isvalid(::Type{ASCIIString}, s::Union{Vector{UInt8},ByteString}) = byte_string_classify(s) == 1
+isvalid(::Type{UTF8String}, s::Union{Vector{UInt8},ByteString}) = byte_string_classify(s) != 0
+
+## uppercase and lowercase transformations ##
+uppercase(s::AbstractString) = map(uppercase, s)
+lowercase(s::AbstractString) = map(lowercase, s)
+
+function ucfirst(s::AbstractString)
+    isempty(s) || isupper(s[1]) ? s : string(uppercase(s[1]),s[nextind(s,1):end])
+end
+function lcfirst(s::AbstractString)
+    isempty(s) || islower(s[1]) ? s : string(lowercase(s[1]),s[nextind(s,1):end])
+end
+
+## string map, filter, has ##
+
+map_result(s::AbstractString, a::Vector{UInt8}) = UTF8String(a)
+map_result(s::Union{ASCIIString,SubString{ASCIIString}}, a::Vector{UInt8}) = bytestring(a)
+
+function map(f, s::AbstractString)
+    out = IOBuffer(Array(UInt8,endof(s)),true,true)
+    truncate(out,0)
+    for c in s
+        c2 = f(c)
+        if !isa(c2,Char)
+            throw(ArgumentError("map(f,s::AbstractString) requires f to return Char; try map(f,collect(s)) or a comprehension instead"))
+        end
+        write(out, c2::Char)
+    end
+    map_result(s, takebuf_array(out))
+end
+
+function filter(f, s::AbstractString)
+    out = IOBuffer(Array(UInt8,endof(s)),true,true)
+    truncate(out,0)
+    for c in s
+        if f(c)
+            write(out, c)
+        end
+    end
+    takebuf_string(out)
+end
+
diff --git a/base/strings/io.jl b/base/strings/io.jl
new file mode 100644
index 0000000000000..aade25070336e
--- /dev/null
+++ b/base/strings/io.jl
@@ -0,0 +1,247 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+## core text I/O ##
+
+print(io::IO, x) = show(io, x)
+print(io::IO, xs...) = for x in xs print(io, x) end
+
+println(io::IO, xs...) = print(io, xs..., '\n')
+
+print(xs...)   = print(STDOUT, xs...)
+println(xs...) = println(STDOUT, xs...)
+
+## conversion of general objects to strings ##
+
+function print_to_string(xs...)
+    # specialized for performance reasons
+    s = IOBuffer(Array(UInt8,isa(xs[1],AbstractString) ? endof(xs[1]) : 0), true, true)
+    for x in xs
+        print(s, x)
+    end
+    d = s.data
+    resize!(d,s.size)
+    bytestring(d)
+end
+
+string(xs...) = print_to_string(xs...)
+bytestring(s::AbstractString...) = print_to_string(s...)
+
+print(io::IO, s::AbstractString) = (write(io, s); nothing)
+write(io::IO, s::AbstractString) = (len = 0; for c in s; len += write(io, c); end; len)
+show(io::IO, s::AbstractString) = print_quoted(io, s)
+
+write{T<:ByteString}(to::AbstractIOBuffer, s::SubString{T}) =
+    s.endof==0 ? 0 : write_sub(to, s.string.data, s.offset + 1, nextind(s, s.endof) - 1)
+
+## printing literal quoted string data ##
+
+# this is the inverse of print_unescaped_chars(io, s, "\\\")
+
+function print_quoted_literal(io, s::AbstractString)
+    print(io, '"')
+    for c = s; c == '"' ? print(io, "\\\"") : print(io, c); end
+    print(io, '"')
+end
+
+function repr(x)
+    s = IOBuffer()
+    showall(s, x)
+    takebuf_string(s)
+end
+
+# IOBuffer views of a (byte)string:
+IOBuffer(str::ByteString) = IOBuffer(str.data)
+IOBuffer{T<:ByteString}(s::SubString{T}) = IOBuffer(sub(s.string.data, s.offset + 1 : s.offset + sizeof(s)))
+
+# join is implemented using IO
+function print_joined(io, strings, delim, last)
+    i = start(strings)
+    if done(strings,i)
+        return
+    end
+    str, i = next(strings,i)
+    print(io, str)
+    is_done = done(strings,i)
+    while !is_done
+        str, i = next(strings,i)
+        is_done = done(strings,i)
+        print(io, is_done ? last : delim)
+        print(io, str)
+    end
+end
+
+function print_joined(io, strings, delim)
+    i = start(strings)
+    is_done = done(strings,i)
+    while !is_done
+        str, i = next(strings,i)
+        is_done = done(strings,i)
+        print(io, str)
+        if !is_done
+            print(io, delim)
+        end
+    end
+end
+print_joined(io, strings) = print_joined(io, strings, "")
+
+join(args...) = sprint(print_joined, args...)
+
+## string escaping & unescaping ##
+
+escape_nul(s::AbstractString, i::Int) =
+    !done(s,i) && '0' <= next(s,i)[1] <= '7' ? "\\x00" : "\\0"
+
+function print_escaped(io, s::AbstractString, esc::AbstractString)
+    i = start(s)
+    while !done(s,i)
+        c, j = next(s,i)
+        c == '\0'       ? print(io, escape_nul(s,j)) :
+        c == '\e'       ? print(io, "\\e") :
+        c == '\\'       ? print(io, "\\\\") :
+        c in esc        ? print(io, '\\', c) :
+        '\a' <= c <= '\r' ? print(io, '\\', "abtnvfr"[Int(c)-6]) :
+        isprint(c)      ? print(io, c) :
+        c <= '\x7f'     ? print(io, "\\x", hex(c, 2)) :
+        c <= '\uffff'   ? print(io, "\\u", hex(c, need_full_hex(s,j) ? 4 : 2)) :
+                          print(io, "\\U", hex(c, need_full_hex(s,j) ? 8 : 4))
+        i = j
+    end
+end
+
+escape_string(s::AbstractString) = sprint(endof(s), print_escaped, s, "\"")
+function print_quoted(io, s::AbstractString)
+    print(io, '"')
+    print_escaped(io, s, "\"\$") #"# work around syntax highlighting problem
+    print(io, '"')
+end
+
+# bare minimum unescaping function unescapes only given characters
+
+function print_unescaped_chars(io, s::AbstractString, esc::AbstractString)
+    if !('\\' in esc)
+        esc = string("\\", esc)
+    end
+    i = start(s)
+    while !done(s,i)
+        c, i = next(s,i)
+        if c == '\\' && !done(s,i) && s[i] in esc
+            c, i = next(s,i)
+        end
+        print(io, c)
+    end
+end
+
+unescape_chars(s::AbstractString, esc::AbstractString) =
+    sprint(endof(s), print_unescaped_chars, s, esc)
+
+# general unescaping of traditional C and Unicode escape sequences
+
+function print_unescaped(io, s::AbstractString)
+    i = start(s)
+    while !done(s,i)
+        c, i = next(s,i)
+        if !done(s,i) && c == '\\'
+            c, i = next(s,i)
+            if c == 'x' || c == 'u' || c == 'U'
+                n = k = 0
+                m = c == 'x' ? 2 :
+                    c == 'u' ? 4 : 8
+                while (k+=1) <= m && !done(s,i)
+                    c, j = next(s,i)
+                    n = '0' <= c <= '9' ? n<<4 + c-'0' :
+                        'a' <= c <= 'f' ? n<<4 + c-'a'+10 :
+                        'A' <= c <= 'F' ? n<<4 + c-'A'+10 : break
+                    i = j
+                end
+                if k == 1
+                    throw(ArgumentError("\\x used with no following hex digits in $(repr(s))"))
+                end
+                if m == 2 # \x escape sequence
+                    write(io, UInt8(n))
+                else
+                    print(io, Char(n))
+                end
+            elseif '0' <= c <= '7'
+                k = 1
+                n = c-'0'
+                while (k+=1) <= 3 && !done(s,i)
+                    c, j = next(s,i)
+                    n = ('0' <= c <= '7') ? n<<3 + c-'0' : break
+                    i = j
+                end
+                if n > 255
+                    throw(ArgumentError("octal escape sequence out of range"))
+                end
+                write(io, UInt8(n))
+            else
+                print(io, c == 'a' ? '\a' :
+                          c == 'b' ? '\b' :
+                          c == 't' ? '\t' :
+                          c == 'n' ? '\n' :
+                          c == 'v' ? '\v' :
+                          c == 'f' ? '\f' :
+                          c == 'r' ? '\r' :
+                          c == 'e' ? '\e' : c)
+            end
+        else
+            print(io, c)
+        end
+    end
+end
+
+unescape_string(s::AbstractString) = sprint(endof(s), print_unescaped, s)
+
+macro b_str(s); :($(unescape_string(s)).data); end
+
+## Count indentation, unindent ##
+
+function blank_width(c::Char)
+    c == ' '   ? 1 :
+    c == '\t'  ? 8 :
+    throw(ArgumentError("$(repr(c)) not a blank character"))
+end
+
+# width of leading blank space, also check if string is blank
+function indentation(s::AbstractString)
+    count = 0
+    for c in s
+        if c == ' ' || c == '\t'
+            count += blank_width(c)
+        else
+            return count, false
+        end
+    end
+    count, true
+end
+
+function unindent(s::AbstractString, indent::Int)
+    indent == 0 && return s
+    buf = IOBuffer(Array(UInt8,endof(s)), true, true)
+    truncate(buf,0)
+    a = i = start(s)
+    cutting = false
+    cut = 0
+    while !done(s,i)
+        c,i_ = next(s,i)
+        if cutting && (c == ' ' || c == '\t')
+            a = i_
+            cut += blank_width(c)
+            if cut == indent
+                cutting = false
+            elseif cut > indent
+                cutting = false
+                for _ = (indent+1):cut write(buf, ' ') end
+            end
+        elseif c == '\n'
+            print(buf, s[a:i])
+            a = i_
+            cutting = true
+            cut = 0
+        else
+            cutting = false
+        end
+        i = i_
+    end
+    print(buf, s[a:end])
+    takebuf_string(buf)
+end
diff --git a/base/strings/search.jl b/base/strings/search.jl
new file mode 100644
index 0000000000000..1bd242d22e4f3
--- /dev/null
+++ b/base/strings/search.jl
@@ -0,0 +1,360 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+function search(s::AbstractString, c::Chars, i::Integer)
+    if isempty(c)
+        return 1 <= i <= nextind(s,endof(s)) ? i :
+               throw(BoundsError(s, i))
+    end
+    if i < 1 || i > nextind(s,endof(s))
+        throw(BoundsError(s, i))
+    end
+    while !done(s,i)
+        d, j = next(s,i)
+        if d in c
+            return i
+        end
+        i = j
+    end
+    return 0
+end
+search(s::AbstractString, c::Chars) = search(s,c,start(s))
+
+in(c::Char, s::AbstractString) = (search(s,c)!=0)
+
+function _searchindex(s, t, i)
+    if isempty(t)
+        return 1 <= i <= nextind(s,endof(s)) ? i :
+               throw(BoundsError(s, i))
+    end
+    t1, j2 = next(t,start(t))
+    while true
+        i = search(s,t1,i)
+        if i == 0 return 0 end
+        c, ii = next(s,i)
+        j = j2; k = ii
+        matched = true
+        while !done(t,j)
+            if done(s,k)
+                matched = false
+                break
+            end
+            c, k = next(s,k)
+            d, j = next(t,j)
+            if c != d
+                matched = false
+                break
+            end
+        end
+        if matched
+            return i
+        end
+        i = ii
+    end
+end
+
+function _search_bloom_mask(c)
+    UInt64(1) << (c & 63)
+end
+
+function _searchindex(s::Array, t::Array, i)
+    n = length(t)
+    m = length(s)
+
+    if n == 0
+        return 1 <= i <= m+1 ? max(1, i) : 0
+    elseif m == 0
+        return 0
+    elseif n == 1
+        return search(s, t[1], i)
+    end
+
+    w = m - n
+    if w < 0 || i - 1 > w
+        return 0
+    end
+
+    bloom_mask = UInt64(0)
+    skip = n - 1
+    tlast = t[end]
+    for j in 1:n
+        bloom_mask |= _search_bloom_mask(t[j])
+        if t[j] == tlast && j < n
+            skip = n - j - 1
+        end
+    end
+
+    i -= 1
+    while i <= w
+        if s[i+n] == tlast
+            # check candidate
+            j = 0
+            while j < n - 1
+                if s[i+j+1] != t[j+1]
+                    break
+                end
+                j += 1
+            end
+
+            # match found
+            if j == n - 1
+                return i+1
+            end
+
+            # no match, try to rule out the next character
+            if i < w && bloom_mask & _search_bloom_mask(s[i+n+1]) == 0
+                i += n
+            else
+                i += skip
+            end
+        elseif i < w
+            if bloom_mask & _search_bloom_mask(s[i+n+1]) == 0
+                i += n
+            end
+        end
+        i += 1
+    end
+
+    0
+end
+
+searchindex(s::ByteArray, t::ByteArray, i) = _searchindex(s,t,i)
+searchindex(s::AbstractString, t::AbstractString, i::Integer) = _searchindex(s,t,i)
+searchindex(s::AbstractString, t::AbstractString) = searchindex(s,t,start(s))
+searchindex(s::AbstractString, c::Char, i::Integer) = _searchindex(s,c,i)
+searchindex(s::AbstractString, c::Char) = searchindex(s,c,start(s))
+
+function searchindex(s::ByteString, t::ByteString, i::Integer=1)
+    # Check for fast case of a single byte
+    # (for multi-byte UTF-8 sequences, use searchindex on byte arrays instead)
+    if endof(t) == 1
+        search(s, t[1], i)
+    else
+        searchindex(s.data, t.data, i)
+    end
+end
+
+function search(s::ByteArray, t::ByteArray, i)
+    idx = searchindex(s,t,i)
+    if isempty(t)
+        idx:idx-1
+    else
+        idx:(idx > 0 ? idx + endof(t) - 1 : -1)
+    end
+end
+
+function search(s::AbstractString, t::AbstractString, i::Integer=start(s))
+    idx = searchindex(s,t,i)
+    if isempty(t)
+        idx:idx-1
+    else
+        idx:(idx > 0 ? idx + endof(t) - 1 : -1)
+    end
+end
+
+function rsearch(s::AbstractString, c::Chars)
+    j = search(RevString(s), c)
+    j == 0 && return 0
+    endof(s)-j+1
+end
+
+function rsearch(s::AbstractString, c::Chars, i::Integer)
+    e = endof(s)
+    j = search(RevString(s), c, e-i+1)
+    j == 0 && return 0
+    e-j+1
+end
+
+function _rsearchindex(s, t, i)
+    if isempty(t)
+        return 1 <= i <= nextind(s,endof(s)) ? i :
+               throw(BoundsError(s, i))
+    end
+    t = RevString(t)
+    rs = RevString(s)
+    l = endof(s)
+    t1, j2 = next(t,start(t))
+    while true
+        i = rsearch(s,t1,i)
+        if i == 0 return 0 end
+        c, ii = next(rs,l-i+1)
+        j = j2; k = ii
+        matched = true
+        while !done(t,j)
+            if done(rs,k)
+                matched = false
+                break
+            end
+            c, k = next(rs,k)
+            d, j = next(t,j)
+            if c != d
+                matched = false
+                break
+            end
+        end
+        if matched
+            return nextind(s,l-k+1)
+        end
+        i = l-ii+1
+    end
+end
+
+function _rsearchindex(s::Array, t::Array, k)
+    n = length(t)
+    m = length(s)
+
+    if n == 0
+        return 0 <= k <= m ? max(k, 1) : 0
+    elseif m == 0
+        return 0
+    elseif n == 1
+        return rsearch(s, t[1], k)
+    end
+
+    w = m - n
+    if w < 0 || k <= 0
+        return 0
+    end
+
+    bloom_mask = UInt64(0)
+    skip = n - 1
+    tfirst = t[1]
+    for j in n:-1:1
+        bloom_mask |= _search_bloom_mask(t[j])
+        if t[j] == tfirst && j > 1
+            skip = j - 2
+        end
+    end
+
+    i = min(k - n + 1, w + 1)
+    while i > 0
+        if s[i] == tfirst
+            # check candidate
+            j = 1
+            while j < n
+                if s[i+j] != t[j+1]
+                    break
+                end
+                j += 1
+            end
+
+            # match found
+            if j == n
+                return i
+            end
+
+            # no match, try to rule out the next character
+            if i > 1 && bloom_mask & _search_bloom_mask(s[i-1]) == 0
+                i -= n
+            else
+                i -= skip
+            end
+        elseif i > 1
+            if bloom_mask & _search_bloom_mask(s[i-1]) == 0
+                i -= n
+            end
+        end
+        i -= 1
+    end
+
+    0
+end
+
+rsearchindex(s::ByteArray,t::ByteArray,i) = _rsearchindex(s,t,i)
+rsearchindex(s::AbstractString, t::AbstractString, i::Integer) = _rsearchindex(s,t,i)
+rsearchindex(s::AbstractString, t::AbstractString) = (isempty(s) && isempty(t)) ? 1 : rsearchindex(s,t,endof(s))
+
+function rsearchindex(s::ByteString, t::ByteString)
+    # Check for fast case of a single byte
+    # (for multi-byte UTF-8 sequences, use rsearchindex instead)
+    if endof(t) == 1
+        rsearch(s, t[1])
+    else
+        _rsearchindex(s.data, t.data, length(s.data))
+    end
+end
+
+function rsearchindex(s::ByteString, t::ByteString, i::Integer)
+    # Check for fast case of a single byte
+    # (for multi-byte UTF-8 sequences, use rsearchindex instead)
+    if endof(t) == 1
+        rsearch(s, t[1], i)
+    elseif endof(t) != 0
+        _rsearchindex(s.data, t.data, nextind(s, i)-1)
+    elseif i > sizeof(s)
+        return 0
+    elseif i == 0
+        return 1
+    else
+        return i
+    end
+end
+
+function rsearch(s::ByteArray, t::ByteArray, i::Integer)
+    idx = rsearchindex(s,t,i)
+    if isempty(t)
+        idx:idx-1
+    else
+        idx:(idx > 0 ? idx + endof(t) - 1 : -1)
+    end
+end
+
+function rsearch(s::AbstractString, t::AbstractString, i::Integer=endof(s))
+    idx = rsearchindex(s,t,i)
+    if isempty(t)
+        idx:idx-1
+    else
+        idx:(idx > 0 ? idx + endof(t) - 1 : -1)
+    end
+end
+
+contains(haystack::AbstractString, needle::AbstractString) = searchindex(haystack,needle)!=0
+
+in(::AbstractString, ::AbstractString) = error("use contains(x,y) for string containment")
+
+# ByteArray optimizations
+
+# find the index of the first occurrence of a value in a byte array
+
+function search(a::ByteArray, b::Union{Int8,UInt8}, i::Integer)
+    if i < 1
+        throw(BoundsError(a, i))
+    end
+    n = length(a)
+    if i > n
+        return i == n+1 ? 0 : throw(BoundsError(a, i))
+    end
+    p = pointer(a)
+    q = ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-1, b, n-i+1)
+    q == C_NULL ? 0 : Int(q-p+1)
+end
+function search(a::ByteArray, b::Char, i::Integer)
+    if isascii(b)
+        search(a,UInt8(b),i)
+    else
+        search(a,string(b).data,i).start
+    end
+end
+search(a::ByteArray, b::Union{Int8,UInt8,Char}) = search(a,b,1)
+
+function rsearch(a::ByteArray, b::Union{Int8,UInt8}, i::Integer)
+    if i < 1
+        return i == 0 ? 0 : throw(BoundsError(a, i))
+    end
+    n = length(a)
+    if i > n
+        return i == n+1 ? 0 : throw(BoundsError(a, i))
+    end
+    p = pointer(a)
+    q = ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i)
+    q == C_NULL ? 0 : Int(q-p+1)
+end
+function rsearch(a::ByteArray, b::Char, i::Integer)
+    if isascii(b)
+        rsearch(a,UInt8(b),i)
+    else
+        rsearch(a,string(b).data,i).start
+    end
+end
+rsearch(a::ByteArray, b::Union{Int8,UInt8,Char}) = rsearch(a,b,length(a))
+
+
diff --git a/base/strings/types.jl b/base/strings/types.jl
new file mode 100644
index 0000000000000..1bc4f5b9f28c0
--- /dev/null
+++ b/base/strings/types.jl
@@ -0,0 +1,224 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# SubString, RevString, RepString, and RopeString types
+
+## substrings reference original strings ##
+
+immutable SubString{T<:AbstractString} <: AbstractString
+    string::T
+    offset::Int
+    endof::Int
+
+    function SubString(s::T, i::Int, j::Int)
+        if i > endof(s) || j<i
+            return new(s, i-1, 0)
+        else
+            if !isvalid(s,i)
+                throw(ArgumentError("invalid SubString index"))
+            end
+
+            while !isvalid(s,j) && j > i
+                j -= 1
+            end
+
+            o = i-1
+            new(s, o, max(0, j-o))
+        end
+    end
+end
+SubString{T<:AbstractString}(s::T, i::Int, j::Int) = SubString{T}(s, i, j)
+SubString(s::SubString, i::Int, j::Int) = SubString(s.string, s.offset+i, s.offset+j)
+SubString(s::AbstractString, i::Integer, j::Integer) = SubString(s, Int(i), Int(j))
+SubString(s::AbstractString, i::Integer) = SubString(s, i, endof(s))
+
+sizeof(s::SubString{ASCIIString}) = s.endof
+sizeof(s::SubString{UTF8String}) = s.endof == 0 ? 0 : nextind(s, s.endof) - 1
+
+# TODO: length(s::SubString) = ??
+# default implementation will work but it's slow
+# can this be delegated efficiently somehow?
+# that may require additional string interfaces
+length{T<:DirectIndexString}(s::SubString{T}) = endof(s)
+
+function length(s::SubString{UTF8String})
+    return s.endof==0 ? 0 : Int(ccall(:u8_charnum, Csize_t, (Ptr{UInt8}, Csize_t),
+                                      pointer(s), nextind(s, s.endof) - 1))
+end
+
+function next(s::SubString, i::Int)
+    if i < 1 || i > s.endof
+        throw(BoundsError(s, i))
+    end
+    c, i = next(s.string, i+s.offset)
+    c, i-s.offset
+end
+
+function getindex(s::SubString, i::Int)
+    if i < 1 || i > s.endof
+        throw(BoundsError(s, i))
+    end
+    getindex(s.string, i+s.offset)
+end
+
+endof(s::SubString) = s.endof
+
+function isvalid(s::SubString, i::Integer)
+    return (start(s) <= i <= endof(s)) && isvalid(s.string, s.offset+i)
+end
+
+isvalid{T<:DirectIndexString}(s::SubString{T}, i::Integer) = (start(s) <= i <= endof(s))
+
+ind2chr{T<:DirectIndexString}(s::SubString{T}, i::Integer) = begin checkbounds(s,i); i end
+chr2ind{T<:DirectIndexString}(s::SubString{T}, i::Integer) = begin checkbounds(s,i); i end
+
+nextind(s::SubString, i::Integer) = nextind(s.string, i+s.offset)-s.offset
+prevind(s::SubString, i::Integer) = prevind(s.string, i+s.offset)-s.offset
+
+convert{T<:AbstractString}(::Type{SubString{T}}, s::T) = SubString(s, 1, endof(s))
+
+bytestring{T <: ByteString}(p::SubString{T}) = bytestring(p.string.data[1+p.offset:p.offset+nextind(p, p.endof)-1])
+
+function getindex(s::AbstractString, r::UnitRange{Int})
+    if first(r) < 1 || endof(s) < last(r)
+        throw(BoundsError(s, r))
+    end
+    SubString(s, first(r), last(r))
+end
+
+isascii(s::SubString{ASCIIString}) = true
+
+function cmp{T<:ByteString,S<:ByteString}(a::SubString{T}, b::SubString{S})
+    na = sizeof(a)
+    nb = sizeof(b)
+    c = ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt),
+              pointer(a), pointer(b), min(na,nb))
+    c < 0 ? -1 : c > 0 ? +1 : cmp(na,nb)
+end
+
+## reversed strings without data movement ##
+
+immutable RevString{T<:AbstractString} <: AbstractString
+    string::T
+end
+
+endof(s::RevString) = endof(s.string)
+length(s::RevString) = length(s.string)
+sizeof(s::RevString) = sizeof(s.string)
+
+function next(s::RevString, i::Int)
+    n = endof(s); j = n-i+1
+    (s.string[j], n-prevind(s.string,j)+1)
+end
+
+reverse(s::AbstractString) = RevString(s)
+reverse(s::RevString) = s.string
+
+isascii(s::RevString{ASCIIString}) = true
+
+## reverse an index i so that reverse(s)[i] == s[reverseind(s,i)]
+
+reverseind(s::Union{DirectIndexString,SubString{DirectIndexString}}, i::Integer) = length(s) + 1 - i
+reverseind(s::RevString, i::Integer) = endof(s) - i + 1
+lastidx(s::AbstractString) = nextind(s, endof(s)) - 1
+lastidx(s::DirectIndexString) = length(s)
+reverseind(s::SubString, i::Integer) =
+    reverseind(s.string, lastidx(s.string)-s.offset-s.endof+i) - s.offset
+
+## efficient representation of repeated strings ##
+
+immutable RepString <: AbstractString
+    string::AbstractString
+    repeat::Integer
+end
+
+function endof(s::RepString)
+    e = endof(s.string)
+    (next(s.string,e)[2]-1) * (s.repeat-1) + e
+end
+length(s::RepString) = length(s.string)*s.repeat
+sizeof(s::RepString) = sizeof(s.string)*s.repeat
+
+function next(s::RepString, i::Int)
+    if i < 1
+        throw(BoundsError(s, i))
+    end
+    e = endof(s.string)
+    sz = next(s.string,e)[2]-1
+
+    r, j = divrem(i-1, sz)
+    j += 1
+
+    if r >= s.repeat || j > e
+        throw(BoundsError(s, i))
+    end
+
+    c, k = next(s.string, j)
+    c, k-j+i
+end
+
+function repeat(s::AbstractString, r::Integer)
+    r <  0 ? throw(ArgumentError("can't repeat a string $r times")) :
+    r == 0 ? "" :
+    r == 1 ? s  :
+    RepString(s,r)
+end
+
+convert(::Type{RepString}, s::AbstractString) = RepString(s,1)
+
+function repeat(s::ByteString, r::Integer)
+    r < 0 && throw(ArgumentError("can't repeat a string $r times"))
+    d = s.data; n = length(d)
+    out = Array(UInt8, n*r)
+    for i=1:r
+        copy!(out, 1+(i-1)*n, d, 1, n)
+    end
+    convert(typeof(s), out)
+end
+
+(^)(s::AbstractString, r::Integer) = repeat(s,r)
+
+## ropes for efficient concatenation, etc. ##
+
+immutable RopeString <: AbstractString
+    head::AbstractString
+    tail::AbstractString
+    depth::Int32
+    endof::Int
+
+    RopeString(h::RopeString, t::RopeString) =
+        strdepth(h.tail) + strdepth(t) < strdepth(h.head) ?
+            RopeString(h.head, RopeString(h.tail, t)) :
+            new(h, t, max(h.depth,t.depth)+1, endof(h)+endof(t))
+
+    RopeString(h::RopeString, t::AbstractString) =
+        strdepth(h.tail) < strdepth(h.head) ?
+            RopeString(h.head, RopeString(h.tail, t)) :
+            new(h, t, h.depth+1, endof(h)+endof(t))
+
+    RopeString(h::AbstractString, t::RopeString) =
+        strdepth(t.head) < strdepth(t.tail) ?
+            RopeString(RopeString(h, t.head), t.tail) :
+            new(h, t, t.depth+1, endof(h)+endof(t))
+
+    RopeString(h::AbstractString, t::AbstractString) =
+        new(h, t, 1, endof(h)+endof(t))
+end
+RopeString(s::AbstractString) = RopeString(s,"")
+
+strdepth(s::AbstractString) = 0
+strdepth(s::RopeString) = s.depth
+
+function next(s::RopeString, i::Int)
+    eh = endof(s.head)
+    if i <= eh
+        return next(s.head, i)
+    else
+        c, j = next(s.tail, i-eh)
+        return c, j+eh
+    end
+end
+
+endof(s::RopeString) = s.endof
+length(s::RopeString) = length(s.head) + length(s.tail)
+write(io::IO, s::RopeString) = (write(io, s.head); write(io, s.tail))
+sizeof(s::RopeString) = sizeof(s.head) + sizeof(s.tail)
diff --git a/base/strings/util.jl b/base/strings/util.jl
new file mode 100644
index 0000000000000..c26107c71663a
--- /dev/null
+++ b/base/strings/util.jl
@@ -0,0 +1,233 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# starts with and ends with predicates
+
+function startswith(a::AbstractString, b::AbstractString)
+    i = start(a)
+    j = start(b)
+    while !done(a,i) && !done(b,i)
+        c, i = next(a,i)
+        d, j = next(b,j)
+        if c != d return false end
+    end
+    done(b,i)
+end
+startswith(str::AbstractString, chars::Chars) = !isempty(str) && str[start(str)] in chars
+
+function endswith(a::AbstractString, b::AbstractString)
+    i = endof(a)
+    j = endof(b)
+    a1 = start(a)
+    b1 = start(b)
+    while a1 <= i && b1 <= j
+        c = a[i]
+        d = b[j]
+        if c != d return false end
+        i = prevind(a,i)
+        j = prevind(b,j)
+    end
+    j < b1
+end
+endswith(str::AbstractString, chars::Chars) = !isempty(str) && str[end] in chars
+
+startswith(a::ByteString, b::ByteString) = startswith(a.data, b.data)
+startswith(a::Vector{UInt8}, b::Vector{UInt8}) =
+    (length(a) >= length(b) && ccall(:strncmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, length(b)) == 0)
+
+# TODO: fast endswith
+
+chop(s::AbstractString) = s[1:end-1]
+
+function chomp(s::AbstractString)
+    i = endof(s)
+    if (i < 1 || s[i] != '\n') return s end
+    j = prevind(s,i)
+    if (j < 1 || s[j] != '\r') return s[1:i-1] end
+    return s[1:j-1]
+end
+chomp(s::ByteString) =
+    (endof(s) < 1 || s.data[end]   != 0x0a) ? s :
+    (endof(s) < 2 || s.data[end-1] != 0x0d) ? s[1:end-1] : s[1:end-2]
+
+# NOTE: use with caution -- breaks the immutable string convention!
+function chomp!(s::ByteString)
+    if !isempty(s) && s.data[end] == 0x0a
+        n = (endof(s) < 2 || s.data[end-1] != 0x0d) ? 1 : 2
+        ccall(:jl_array_del_end, Void, (Any, UInt), s.data, n)
+    end
+    return s
+end
+chomp!(s::AbstractString) = chomp(s) # copying fallback for other string types
+
+const _default_delims = [' ','\t','\n','\v','\f','\r']
+
+function lstrip(s::AbstractString, chars::Chars=_default_delims)
+    i = start(s)
+    while !done(s,i)
+        c, j = next(s,i)
+        if !(c in chars)
+            return s[i:end]
+        end
+        i = j
+    end
+    ""
+end
+
+function rstrip(s::AbstractString, chars::Chars=_default_delims)
+    r = RevString(s)
+    i = start(r)
+    while !done(r,i)
+        c, j = next(r,i)
+        if !(c in chars)
+            return s[1:end-i+1]
+        end
+        i = j
+    end
+    ""
+end
+
+strip(s::AbstractString) = lstrip(rstrip(s))
+strip(s::AbstractString, chars::Chars) = lstrip(rstrip(s, chars), chars)
+
+## String padding functions ##
+
+function lpad(s::AbstractString, n::Integer, p::AbstractString=" ")
+    m = n - strwidth(s)
+    if m <= 0; return s; end
+    l = strwidth(p)
+    if l==1
+        return bytestring(p^m * s)
+    end
+    q = div(m,l)
+    r = m - q*l
+    i = r != 0 ? chr2ind(p, r) : -1
+    bytestring(p^q*p[1:i]*s)
+end
+
+function rpad(s::AbstractString, n::Integer, p::AbstractString=" ")
+    m = n - strwidth(s)
+    if m <= 0; return s; end
+    l = strwidth(p)
+    if l==1
+        return bytestring(s * p^m)
+    end
+    q = div(m,l)
+    r = m - q*l
+    i = r != 0 ? chr2ind(p, r) : -1
+    bytestring(s*p^q*p[1:i])
+end
+
+lpad(s, n::Integer, p=" ") = lpad(string(s),n,string(p))
+rpad(s, n::Integer, p=" ") = rpad(string(s),n,string(p))
+cpad(s, n::Integer, p=" ") = rpad(lpad(s,div(n+strwidth(s),2),p),n,p)
+
+# splitter can be a Char, Vector{Char}, AbstractString, Regex, ...
+# any splitter that provides search(s::AbstractString, splitter)
+split{T<:SubString}(str::T, splitter; limit::Integer=0, keep::Bool=true) = _split(str, splitter, limit, keep, T[])
+split{T<:AbstractString}(str::T, splitter; limit::Integer=0, keep::Bool=true) = _split(str, splitter, limit, keep, SubString{T}[])
+function _split{T<:AbstractString,U<:Array}(str::T, splitter, limit::Integer, keep_empty::Bool, strs::U)
+    i = start(str)
+    n = endof(str)
+    r = search(str,splitter,i)
+    j, k = first(r), nextind(str,last(r))
+    while 0 < j <= n && length(strs) != limit-1
+        if i < k
+            if keep_empty || i < j
+                push!(strs, SubString(str,i,prevind(str,j)))
+            end
+            i = k
+        end
+        if k <= j; k = nextind(str,j) end
+        r = search(str,splitter,k)
+        j, k = first(r), nextind(str,last(r))
+    end
+    if keep_empty || !done(str,i)
+        push!(strs, SubString(str,i))
+    end
+    return strs
+end
+
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
+split(str::AbstractString) = split(str, _default_delims; limit=0, keep=false)
+
+rsplit{T<:SubString}(str::T, splitter; limit::Integer=0, keep::Bool=true) = _rsplit(str, splitter, limit, keep, T[])
+rsplit{T<:AbstractString}(str::T, splitter   ; limit::Integer=0, keep::Bool=true) = _rsplit(str, splitter, limit, keep, SubString{T}[])
+function _rsplit{T<:AbstractString,U<:Array}(str::T, splitter, limit::Integer, keep_empty::Bool, strs::U)
+    i = start(str)
+    n = endof(str)
+    r = rsearch(str,splitter)
+    j = first(r)-1
+    k = last(r)
+    while((0 <= j < n) && (length(strs) != limit-1))
+        if i <= k
+            (keep_empty || (k < n)) && unshift!(strs, SubString(str,k+1,n))
+            n = j
+        end
+        (k <= j) && (j = prevind(str,j))
+        r = rsearch(str,splitter,j)
+        j = first(r)-1
+        k = last(r)
+    end
+    (keep_empty || (n > 0)) && unshift!(strs, SubString(str,1,n))
+    return strs
+end
+#rsplit(str::AbstractString) = rsplit(str, _default_delims, 0, false)
+
+function replace(str::ByteString, pattern, repl::Function, limit::Integer)
+    n = 1
+    e = endof(str)
+    i = a = start(str)
+    r = search(str,pattern,i)
+    j, k = first(r), last(r)
+    out = IOBuffer()
+    while j != 0
+        if i == a || i <= k
+            write(out, SubString(str,i,prevind(str,j)))
+            write(out, string(repl(SubString(str,j,k))))
+        end
+        if k<j
+            i = j
+            k = nextind(str, j)
+        else
+            i = k = nextind(str, k)
+        end
+        if j > e
+            break
+        end
+        r = search(str,pattern,k)
+        j, k = first(r), last(r)
+        n == limit && break
+        n += 1
+    end
+    write(out, SubString(str,i))
+    takebuf_string(out)
+end
+replace(s::AbstractString, pat, f::Function, n::Integer) = replace(bytestring(s), pat, f, n)
+replace(s::AbstractString, pat, r, n::Integer) = replace(s, pat, x->r, n)
+replace(s::AbstractString, pat, r) = replace(s, pat, r, 0)
+
+# hex <-> bytes conversion
+
+function hex2bytes(s::ASCIIString)
+    len = length(s)
+    iseven(len) || throw(ArgumentError("string length must be even: length($(repr(s))) == $len"))
+    arr = zeros(UInt8, div(len,2))
+    i = j = 0
+    while i < len
+        n = 0
+        c = s[i+=1]
+        n = '0' <= c <= '9' ? c - '0' :
+            'a' <= c <= 'f' ? c - 'a' + 10 :
+            'A' <= c <= 'F' ? c - 'A' + 10 :
+                throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
+        c = s[i+=1]
+        n = '0' <= c <= '9' ? n << 4 + c - '0' :
+            'a' <= c <= 'f' ? n << 4 + c - 'a' + 10 :
+            'A' <= c <= 'F' ? n << 4 + c - 'A' + 10 :
+                throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
+        arr[j+=1] = n
+    end
+    return arr
+end
+
+bytes2hex{T<:UInt8}(arr::Vector{T}) = join([hex(i,2) for i in arr])
diff --git a/base/sysimg.jl b/base/sysimg.jl
index db3bc2f1ccff0..71fea54c4a0de 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -88,18 +88,13 @@ include("iterator.jl")
 include("osutils.jl")
 
 # strings & printing
-include("utferror.jl")
-include("utftypes.jl")
-include("utfcheck.jl")
 include("char.jl")
 include("ascii.jl")
-include("utf8.jl")
-include("utf16.jl")
-include("utf32.jl")
 include("iobuffer.jl")
 include("string.jl")
-include("utf8proc.jl")
-importall .UTF8proc
+include("unicode.jl")
+include("parse.jl")
+include("shell.jl")
 include("regex.jl")
 include("base64.jl")
 importall .Base64
diff --git a/base/unicode.jl b/base/unicode.jl
new file mode 100644
index 0000000000000..e0ed8b5d1b0a8
--- /dev/null
+++ b/base/unicode.jl
@@ -0,0 +1,10 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+include("unicode/UnicodeError.jl")
+include("unicode/types.jl")
+include("unicode/checkstring.jl")
+include("unicode/utf8.jl")
+include("unicode/utf16.jl")
+include("unicode/utf32.jl")
+include("unicode/utf8proc.jl")
+importall .UTF8proc
diff --git a/base/utferror.jl b/base/unicode/UnicodeError.jl
similarity index 100%
rename from base/utferror.jl
rename to base/unicode/UnicodeError.jl
diff --git a/base/utfcheck.jl b/base/unicode/checkstring.jl
similarity index 100%
rename from base/utfcheck.jl
rename to base/unicode/checkstring.jl
diff --git a/base/utftypes.jl b/base/unicode/types.jl
similarity index 100%
rename from base/utftypes.jl
rename to base/unicode/types.jl
diff --git a/base/utf16.jl b/base/unicode/utf16.jl
similarity index 100%
rename from base/utf16.jl
rename to base/unicode/utf16.jl
diff --git a/base/utf32.jl b/base/unicode/utf32.jl
similarity index 67%
rename from base/utf32.jl
rename to base/unicode/utf32.jl
index 0d481bfda353c..612a3bbe4d061 100644
--- a/base/utf32.jl
+++ b/base/unicode/utf32.jl
@@ -101,3 +101,37 @@ function map(f, s::UTF32String)
     end
     UTF32String(out)
 end
+
+# Definitions for C compatible strings, that don't allow embedded
+# '\0', and which are terminated by a '\0'
+
+containsnul(s::AbstractString) = '\0' in s
+containsnul(s::ByteString) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
+containsnul(s::Union{UTF16String,UTF32String}) = findfirst(s.data, 0) != length(s.data)
+
+if sizeof(Cwchar_t) == 2
+    const WString = UTF16String
+    const wstring = utf16
+elseif sizeof(Cwchar_t) == 4
+    const WString = UTF32String
+    const wstring = utf32
+end
+wstring(s::Cwstring) = wstring(box(Ptr{Cwchar_t}, unbox(Cwstring,s)))
+
+# Cwstring is defined in c.jl, but conversion needs to be defined here
+# to have WString
+function unsafe_convert(::Type{Cwstring}, s::WString)
+    if containsnul(s)
+        throw(ArgumentError("embedded NUL chars are not allowed in C strings: $(repr(s))"))
+    end
+    return Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
+end
+
+# pointer conversions of ASCII/UTF8/UTF16/UTF32 strings:
+pointer(x::Union{ByteString,UTF16String,UTF32String}) = pointer(x.data)
+pointer{T<:ByteString}(x::SubString{T}) = pointer(x.string.data) + x.offset
+pointer(x::ByteString, i::Integer) = pointer(x.data)+(i-1)
+pointer{T<:ByteString}(x::SubString{T}, i::Integer) = pointer(x.string.data) + x.offset + (i-1)
+pointer(x::Union{UTF16String,UTF32String}, i::Integer) = pointer(x)+(i-1)*sizeof(eltype(x.data))
+pointer{T<:Union{UTF16String,UTF32String}}(x::SubString{T}) = pointer(x.string.data) + x.offset*sizeof(eltype(x.data))
+pointer{T<:Union{UTF16String,UTF32String}}(x::SubString{T}, i::Integer) = pointer(x.string.data) + (x.offset + (i-1))*sizeof(eltype(x.data))
diff --git a/base/utf8.jl b/base/unicode/utf8.jl
similarity index 100%
rename from base/utf8.jl
rename to base/unicode/utf8.jl
diff --git a/base/utf8proc.jl b/base/unicode/utf8proc.jl
similarity index 100%
rename from base/utf8proc.jl
rename to base/unicode/utf8proc.jl
diff --git a/test/choosetests.jl b/test/choosetests.jl
index aac6a058be6a1..8dee768042cfa 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -15,12 +15,13 @@ Upon return, `tests` is a vector of fully-expanded test names, and
 """ ->
 function choosetests(choices = [])
     testnames = [
-        "linalg", "core", "keywordargs", "numbers", "strings",
+        "linalg", "core", "keywordargs", "numbers", "printf",
+        "char", "string", "triplequote", "unicode",
         "dates", "dict", "hashing", "remote", "iobuffer", "staged",
         "arrayops", "tuple", "subarray", "reduce", "reducedim", "random",
         "abstractarray", "intfuncs", "simdloop", "blas", "sparse",
         "bitarray", "copy", "math", "fastmath", "functional",
-        "operators", "path", "ccall", "unicode",
+        "operators", "path", "ccall", "parse",
         "bigint", "sorting", "statistics", "spawn", "backtrace",
         "priorityqueue", "file", "mmap", "version", "resolve",
         "pollfd", "mpfr", "broadcast", "complex", "socket",
@@ -29,7 +30,7 @@ function choosetests(choices = [])
         "euler", "show", "lineedit", "replcompletions", "repl",
         "replutil", "sets", "test", "goto", "llvmcall", "grisu",
         "nullable", "meta", "profile", "libgit2", "docs", "markdown",
-        "base64", "parser", "serialize", "functors", "char", "misc",
+        "base64", "serialize", "functors", "misc",
         "enums", "cmdlineargs", "i18n", "workspace", "libdl", "int"
     ]
 
diff --git a/test/hashing.jl b/test/hashing.jl
index 46658d230b176..0d3f51e504d51 100644
--- a/test/hashing.jl
+++ b/test/hashing.jl
@@ -79,7 +79,6 @@ for a in vals, b in vals
     @test isequal(a,b) == (hash(a)==hash(b))
 end
 
-@test hash(RopeString("1","2")) == hash("12")
 @test hash(SubString("--hello--",3,7)) == hash("hello")
 @test hash(:(X.x)) == hash(:(X.x))
 @test hash(:(X.x)) != hash(:(X.y))
diff --git a/test/parser.jl b/test/parse.jl
similarity index 58%
rename from test/parser.jl
rename to test/parse.jl
index 5db51441fca18..c7d8294d98e93 100644
--- a/test/parser.jl
+++ b/test/parse.jl
@@ -153,3 +153,122 @@ macro f(args...) end; @f ""
 """) == Expr(:toplevel,
             Expr(:macro, Expr(:call, :f, Expr(:..., :args)), Expr(:block,)),
             Expr(:macrocall, symbol("@f"), ""))
+
+# integer parsing
+@test is(parse(Int32,"0",36),Int32(0))
+@test is(parse(Int32,"1",36),Int32(1))
+@test is(parse(Int32,"9",36),Int32(9))
+@test is(parse(Int32,"A",36),Int32(10))
+@test is(parse(Int32,"a",36),Int32(10))
+@test is(parse(Int32,"B",36),Int32(11))
+@test is(parse(Int32,"b",36),Int32(11))
+@test is(parse(Int32,"F",36),Int32(15))
+@test is(parse(Int32,"f",36),Int32(15))
+@test is(parse(Int32,"Z",36),Int32(35))
+@test is(parse(Int32,"z",36),Int32(35))
+
+@test parse(Int,"0") == 0
+@test parse(Int,"-0") == 0
+@test parse(Int,"1") == 1
+@test parse(Int,"-1") == -1
+@test parse(Int,"9") == 9
+@test parse(Int,"-9") == -9
+@test parse(Int,"10") == 10
+@test parse(Int,"-10") == -10
+@test parse(Int64,"3830974272") == 3830974272
+@test parse(Int64,"-3830974272") == -3830974272
+@test parse(Int,'3') == 3
+@test parse(Int,'3', 8) == 3
+
+parsebin(s) = parse(Int,s,2)
+parseoct(s) = parse(Int,s,8)
+parsehex(s) = parse(Int,s,16)
+
+@test parsebin("0") == 0
+@test parsebin("-0") == 0
+@test parsebin("1") == 1
+@test parsebin("-1") == -1
+@test parsebin("10") == 2
+@test parsebin("-10") == -2
+@test parsebin("11") == 3
+@test parsebin("-11") == -3
+@test parsebin("1111000011110000111100001111") == 252645135
+@test parsebin("-1111000011110000111100001111") == -252645135
+
+@test parseoct("0") == 0
+@test parseoct("-0") == 0
+@test parseoct("1") == 1
+@test parseoct("-1") == -1
+@test parseoct("7") == 7
+@test parseoct("-7") == -7
+@test parseoct("10") == 8
+@test parseoct("-10") == -8
+@test parseoct("11") == 9
+@test parseoct("-11") == -9
+@test parseoct("72") == 58
+@test parseoct("-72") == -58
+@test parseoct("3172207320") == 434704080
+@test parseoct("-3172207320") == -434704080
+
+@test parsehex("0") == 0
+@test parsehex("-0") == 0
+@test parsehex("1") == 1
+@test parsehex("-1") == -1
+@test parsehex("9") == 9
+@test parsehex("-9") == -9
+@test parsehex("a") == 10
+@test parsehex("-a") == -10
+@test parsehex("f") == 15
+@test parsehex("-f") == -15
+@test parsehex("10") == 16
+@test parsehex("-10") == -16
+@test parsehex("0BADF00D") == 195948557
+@test parsehex("-0BADF00D") == -195948557
+@test parse(Int64,"BADCAB1E",16) == 3135023902
+@test parse(Int64,"-BADCAB1E",16) == -3135023902
+@test parse(Int64,"CafeBabe",16) == 3405691582
+@test parse(Int64,"-CafeBabe",16) == -3405691582
+@test parse(Int64,"DeadBeef",16) == 3735928559
+@test parse(Int64,"-DeadBeef",16) == -3735928559
+
+@test parse(Int,"2\n") == 2
+@test parse(Int,"   2 \n ") == 2
+@test parse(Int," 2 ") == 2
+@test parse(Int,"2 ") == 2
+@test parse(Int," 2") == 2
+@test parse(Int,"+2\n") == 2
+@test parse(Int,"-2") == -2
+@test_throws ArgumentError parse(Int,"   2 \n 0")
+@test_throws ArgumentError parse(Int,"2x")
+@test_throws ArgumentError parse(Int,"-")
+
+# multibyte spaces
+@test parse(Int, "3\u2003\u202F") == 3
+@test_throws ArgumentError parse(Int, "3\u2003\u202F,")
+
+@test parse(Int,'a') == 10
+@test_throws ArgumentError parse(Int,typemax(Char))
+
+@test parse(Int,"1234") == 1234
+@test parse(Int,"0x1234") == 0x1234
+@test parse(Int,"0o1234") == 0o1234
+@test parse(Int,"0b1011") == 0b1011
+@test parse(Int,"-1234") == -1234
+@test parse(Int,"-0x1234") == -Int(0x1234)
+@test parse(Int,"-0o1234") == -Int(0o1234)
+@test parse(Int,"-0b1011") == -Int(0b1011)
+
+## FIXME: #4905, do these tests for Int128/UInt128!
+for T in (Int8, Int16, Int32, Int64)
+    @test parse(T,string(typemin(T))) == typemin(T)
+    @test parse(T,string(typemax(T))) == typemax(T)
+    @test_throws OverflowError parse(T,string(big(typemin(T))-1))
+    @test_throws OverflowError parse(T,string(big(typemax(T))+1))
+end
+
+for T in (UInt8,UInt16,UInt32,UInt64)
+    @test parse(T,string(typemin(T))) == typemin(T)
+    @test parse(T,string(typemax(T))) == typemax(T)
+    @test_throws ArgumentError parse(T,string(big(typemin(T))-1))
+    @test_throws OverflowError parse(T,string(big(typemax(T))+1))
+end
diff --git a/test/printf.jl b/test/printf.jl
new file mode 100644
index 0000000000000..cb4c3694b2263
--- /dev/null
+++ b/test/printf.jl
@@ -0,0 +1,51 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# printf
+# int
+@test (@sprintf "%d" typemax(Int64)) == "9223372036854775807"
+@test (@sprintf "%i" 42) == "42"
+@test (@sprintf "%u" 42) == "42"
+@test (@sprintf "Test: %i" 42) == "Test: 42"
+@test (@sprintf "%#x" 42) == "0x2a"
+@test (@sprintf "%#o" 42) == "052"
+@test (@sprintf "%X" 42) == "2A"
+@test (@sprintf "%X" 42) == "2A"
+@test (@sprintf "% i" 42) == " 42"
+@test (@sprintf "%+i" 42) == "+42"
+@test (@sprintf "%4i" 42) == "  42"
+@test (@sprintf "%-4i" 42) == "42  "
+# float
+@test (@sprintf "%7.2f" 1.2345) == "   1.23"
+@test (@sprintf "%-7.2f" 1.2345) == "1.23   "
+@test (@sprintf "%07.2f" 1.2345) == "0001.23"
+@test (@sprintf "%.0f" 1.2345) == "1"
+@test (@sprintf "%#.0f" 1.2345) == "1."
+# Inf / NaN handling
+@test (@sprintf "%f" Inf) == "Inf"
+@test (@sprintf "%f" NaN) == "NaN"
+# scientific notation
+@test (@sprintf "%.4e" 1.2345) == "1.2345e+00"
+@test (@sprintf "%.0e" 3e142) == "3e+142"
+@test (@sprintf "%#.0e" 3e142) == "3.e+142"
+# hex float
+@test (@sprintf "%a" 1.5) == "0x1.8p+0"
+@test (@sprintf "%#.0a" 1.5) == "0x2.p+0"
+@test (@sprintf "%+30a" 1/3) == "         +0x1.5555555555555p-2"
+# chars
+@test (@sprintf "%c" 65) == "A"
+@test (@sprintf "%c" 'A') == "A"
+@test (@sprintf "%c" 248) == "ø"
+@test (@sprintf "%c" 'ø') == "ø"
+# strings
+@test (@sprintf "%s" "test") == "test"
+@test (@sprintf "%s" "tést") == "tést"
+# reasonably complex
+@test (@sprintf "Test: %s%c%C%c%#-.0f." "t" 65 66 67 -42) == "Test: tABC-42.."
+#test simple splatting
+@test (@sprintf "%d%d" [1 2]...) == "12"
+# combo
+@test (@sprintf "%f %d %d %f" 1.0 [3 4]... 5) == "1.000000 3 4 5.000000"
+# multi
+@test (@sprintf "%s %f %9.5f %d %d %d %d%d%d%d" [1:6;]... [7,8,9,10]...) == "1 2.000000   3.00000 4 5 6 78910"
+# comprehension
+@test (@sprintf "%s %s %s %d %d %d %f %f %f" Any[10^x+y for x=1:3,y=1:3 ]...) == "11 101 1001 12 102 1002 13.000000 103.000000 1003.000000"
diff --git a/test/string.jl b/test/string.jl
new file mode 100644
index 0000000000000..df9581e7655f8
--- /dev/null
+++ b/test/string.jl
@@ -0,0 +1,7 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+include("strings/basic.jl")
+include("strings/types.jl")
+include("strings/search.jl")
+include("strings/util.jl")
+include("strings/io.jl")
diff --git a/test/strings.jl b/test/strings.jl
deleted file mode 100644
index cc304f48a097a..0000000000000
--- a/test/strings.jl
+++ /dev/null
@@ -1,2085 +0,0 @@
-# This file is a part of Julia. License is MIT: http://julialang.org/license
-
-# string escaping & unescaping
-cx = Any[
-    0x00000000      '\0'        "\\0"
-    0x00000001      '\x01'      "\\x01"
-    0x00000006      '\x06'      "\\x06"
-    0x00000007      '\a'        "\\a"
-    0x00000008      '\b'        "\\b"
-    0x00000009      '\t'        "\\t"
-    0x0000000a      '\n'        "\\n"
-    0x0000000b      '\v'        "\\v"
-    0x0000000c      '\f'        "\\f"
-    0x0000000d      '\r'        "\\r"
-    0x0000000e      '\x0e'      "\\x0e"
-    0x0000001a      '\x1a'      "\\x1a"
-    0x0000001b      '\e'        "\\e"
-    0x0000001c      '\x1c'      "\\x1c"
-    0x0000001f      '\x1f'      "\\x1f"
-    0x00000020      ' '         " "
-    0x0000002f      '/'         "/"
-    0x00000030      '0'         "0"
-    0x00000039      '9'         "9"
-    0x0000003a      ':'         ":"
-    0x00000040      '@'         "@"
-    0x00000041      'A'         "A"
-    0x0000005a      'Z'         "Z"
-    0x0000005b      '['         "["
-    0x00000060      '`'         "`"
-    0x00000061      'a'         "a"
-    0x0000007a      'z'         "z"
-    0x0000007b      '{'         "{"
-    0x0000007e      '~'         "~"
-    0x0000007f      '\x7f'      "\\x7f"
-    0x000000bf      '\ubf'      "\\ubf"
-    0x000000ff      '\uff'      "\\uff"
-    0x00000100      '\u100'     "\\u100"
-    0x000001ff      '\u1ff'     "\\u1ff"
-    0x00000fff      '\ufff'     "\\ufff"
-    0x00001000      '\u1000'    "\\u1000"
-    0x00001fff      '\u1fff'    "\\u1fff"
-    0x0000ffff      '\uffff'    "\\uffff"
-    0x00010000      '\U10000'   "\\U10000"
-    0x0001ffff      '\U1ffff'   "\\U1ffff"
-    0x0002ffff      '\U2ffff'   "\\U2ffff"
-    0x00030000      '\U30000'   "\\U30000"
-    0x000dffff      '\Udffff'   "\\Udffff"
-    0x000e0000      '\Ue0000'   "\\Ue0000"
-    0x000effff      '\Ueffff'   "\\Ueffff"
-    0x000f0000      '\Uf0000'   "\\Uf0000"
-    0x000fffff      '\Ufffff'   "\\Ufffff"
-    0x00100000      '\U100000'  "\\U100000"
-    0x0010ffff      '\U10ffff'  "\\U10ffff"
-]
-
-for i = 1:size(cx,1)
-    @test cx[i,1] == convert(UInt32, cx[i,2])
-    @test string(cx[i,2]) == unescape_string(cx[i,3])
-    if isascii(cx[i,2]) || !isprint(cx[i,2])
-        @test cx[i,3] == escape_string(string(cx[i,2]))
-    end
-    for j = 1:size(cx,1)
-        str = string(cx[i,2], cx[j,2])
-        @test str == unescape_string(escape_string(str))
-    end
-end
-
-for i = 0:0x7f, p = ["","\0","x","xxx","\x7f","\uFF","\uFFF",
-                     "\uFFFF","\U10000","\U10FFF","\U10FFFF"]
-    c = Char(i)
-    cp = string(c,p)
-    op = string(Char(div(i,8)), oct(i%8), p)
-    hp = string(Char(div(i,16)), hex(i%16), p)
-    @test string(unescape_string(string("\\",oct(i,1),p))) == cp
-    @test string(unescape_string(string("\\",oct(i,2),p))) == cp
-    @test string(unescape_string(string("\\",oct(i,3),p))) == cp
-    @test string(unescape_string(string("\\",oct(i,4),p))) == op
-    @test string(unescape_string(string("\\x",hex(i,1),p))) == cp
-    @test string(unescape_string(string("\\x",hex(i,2),p))) == cp
-    @test string(unescape_string(string("\\x",hex(i,3),p))) == hp
-end
-
-@test "\z" == unescape_string("\z") == "z"
-@test "\X" == unescape_string("\X") == "X"
-@test "\AbC" == unescape_string("\AbC") == "AbC"
-
-@test "\0" == unescape_string("\\0")
-@test "\1" == unescape_string("\\1")
-@test "\7" == unescape_string("\\7")
-@test "\0x" == unescape_string("\\0x")
-@test "\1x" == unescape_string("\\1x")
-@test "\7x" == unescape_string("\\7x")
-@test "\00" == unescape_string("\\00")
-@test "\01" == unescape_string("\\01")
-@test "\07" == unescape_string("\\07")
-@test "\70" == unescape_string("\\70")
-@test "\71" == unescape_string("\\71")
-@test "\77" == unescape_string("\\77")
-@test "\00x" == unescape_string("\\00x")
-@test "\01x" == unescape_string("\\01x")
-@test "\07x" == unescape_string("\\07x")
-@test "\70x" == unescape_string("\\70x")
-@test "\71x" == unescape_string("\\71x")
-@test "\77x" == unescape_string("\\77x")
-@test "\000" == unescape_string("\\000")
-@test "\001" == unescape_string("\\001")
-@test "\007" == unescape_string("\\007")
-@test "\070" == unescape_string("\\070")
-@test "\071" == unescape_string("\\071")
-@test "\077" == unescape_string("\\077")
-@test "\170" == unescape_string("\\170")
-@test "\171" == unescape_string("\\171")
-@test "\177" == unescape_string("\\177")
-@test "\0001" == unescape_string("\\0001")
-@test "\0011" == unescape_string("\\0011")
-@test "\0071" == unescape_string("\\0071")
-@test "\0701" == unescape_string("\\0701")
-@test "\0711" == unescape_string("\\0711")
-@test "\0771" == unescape_string("\\0771")
-@test "\1701" == unescape_string("\\1701")
-@test "\1711" == unescape_string("\\1711")
-@test "\1771" == unescape_string("\\1771")
-
-@test "\x0" == unescape_string("\\x0")
-@test "\x1" == unescape_string("\\x1")
-@test "\xf" == unescape_string("\\xf")
-@test "\xF" == unescape_string("\\xF")
-@test "\x0x" == unescape_string("\\x0x")
-@test "\x1x" == unescape_string("\\x1x")
-@test "\xfx" == unescape_string("\\xfx")
-@test "\xFx" == unescape_string("\\xFx")
-@test "\x00" == unescape_string("\\x00")
-@test "\x01" == unescape_string("\\x01")
-@test "\x0f" == unescape_string("\\x0f")
-@test "\x0F" == unescape_string("\\x0F")
-
-# integer parsing
-@test is(parse(Int32,"0",36),Int32(0))
-@test is(parse(Int32,"1",36),Int32(1))
-@test is(parse(Int32,"9",36),Int32(9))
-@test is(parse(Int32,"A",36),Int32(10))
-@test is(parse(Int32,"a",36),Int32(10))
-@test is(parse(Int32,"B",36),Int32(11))
-@test is(parse(Int32,"b",36),Int32(11))
-@test is(parse(Int32,"F",36),Int32(15))
-@test is(parse(Int32,"f",36),Int32(15))
-@test is(parse(Int32,"Z",36),Int32(35))
-@test is(parse(Int32,"z",36),Int32(35))
-
-@test parse(Int,"0") == 0
-@test parse(Int,"-0") == 0
-@test parse(Int,"1") == 1
-@test parse(Int,"-1") == -1
-@test parse(Int,"9") == 9
-@test parse(Int,"-9") == -9
-@test parse(Int,"10") == 10
-@test parse(Int,"-10") == -10
-@test parse(Int64,"3830974272") == 3830974272
-@test parse(Int64,"-3830974272") == -3830974272
-@test parse(Int,'3') == 3
-@test parse(Int,'3', 8) == 3
-
-parsebin(s) = parse(Int,s,2)
-parseoct(s) = parse(Int,s,8)
-parsehex(s) = parse(Int,s,16)
-
-@test parsebin("0") == 0
-@test parsebin("-0") == 0
-@test parsebin("1") == 1
-@test parsebin("-1") == -1
-@test parsebin("10") == 2
-@test parsebin("-10") == -2
-@test parsebin("11") == 3
-@test parsebin("-11") == -3
-@test parsebin("1111000011110000111100001111") == 252645135
-@test parsebin("-1111000011110000111100001111") == -252645135
-
-@test parseoct("0") == 0
-@test parseoct("-0") == 0
-@test parseoct("1") == 1
-@test parseoct("-1") == -1
-@test parseoct("7") == 7
-@test parseoct("-7") == -7
-@test parseoct("10") == 8
-@test parseoct("-10") == -8
-@test parseoct("11") == 9
-@test parseoct("-11") == -9
-@test parseoct("72") == 58
-@test parseoct("-72") == -58
-@test parseoct("3172207320") == 434704080
-@test parseoct("-3172207320") == -434704080
-
-@test parsehex("0") == 0
-@test parsehex("-0") == 0
-@test parsehex("1") == 1
-@test parsehex("-1") == -1
-@test parsehex("9") == 9
-@test parsehex("-9") == -9
-@test parsehex("a") == 10
-@test parsehex("-a") == -10
-@test parsehex("f") == 15
-@test parsehex("-f") == -15
-@test parsehex("10") == 16
-@test parsehex("-10") == -16
-@test parsehex("0BADF00D") == 195948557
-@test parsehex("-0BADF00D") == -195948557
-@test parse(Int64,"BADCAB1E",16) == 3135023902
-@test parse(Int64,"-BADCAB1E",16) == -3135023902
-@test parse(Int64,"CafeBabe",16) == 3405691582
-@test parse(Int64,"-CafeBabe",16) == -3405691582
-@test parse(Int64,"DeadBeef",16) == 3735928559
-@test parse(Int64,"-DeadBeef",16) == -3735928559
-
-@test parse(Int,"2\n") == 2
-@test parse(Int,"   2 \n ") == 2
-@test parse(Int," 2 ") == 2
-@test parse(Int,"2 ") == 2
-@test parse(Int," 2") == 2
-@test parse(Int,"+2\n") == 2
-@test parse(Int,"-2") == -2
-@test_throws ArgumentError parse(Int,"   2 \n 0")
-@test_throws ArgumentError parse(Int,"2x")
-@test_throws ArgumentError parse(Int,"-")
-
-# multibyte spaces
-@test parse(Int, "3\u2003\u202F") == 3
-@test_throws ArgumentError parse(Int, "3\u2003\u202F,")
-
-@test parse(Int,'a') == 10
-@test_throws ArgumentError parse(Int,typemax(Char))
-
-@test parse(Int,"1234") == 1234
-@test parse(Int,"0x1234") == 0x1234
-@test parse(Int,"0o1234") == 0o1234
-@test parse(Int,"0b1011") == 0b1011
-@test parse(Int,"-1234") == -1234
-@test parse(Int,"-0x1234") == -Int(0x1234)
-@test parse(Int,"-0o1234") == -Int(0o1234)
-@test parse(Int,"-0b1011") == -Int(0b1011)
-
-## FIXME: #4905, do these tests for Int128/UInt128!
-for T in (Int8, Int16, Int32, Int64)
-    @test parse(T,string(typemin(T))) == typemin(T)
-    @test parse(T,string(typemax(T))) == typemax(T)
-    @test_throws OverflowError parse(T,string(big(typemin(T))-1))
-    @test_throws OverflowError parse(T,string(big(typemax(T))+1))
-end
-
-for T in (UInt8,UInt16,UInt32,UInt64)
-    @test parse(T,string(typemin(T))) == typemin(T)
-    @test parse(T,string(typemax(T))) == typemax(T)
-    @test_throws ArgumentError parse(T,string(big(typemin(T))-1))
-    @test_throws OverflowError parse(T,string(big(typemax(T))+1))
-end
-
-@test lpad("foo", 3) == "foo"
-@test rpad("foo", 3) == "foo"
-@test lpad("foo", 5) == "  foo"
-@test rpad("foo", 5) == "foo  "
-@test lpad("foo", 5, "  ") == "  foo"
-@test rpad("foo", 5, "  ") == "foo  "
-@test lpad("foo", 6, "  ") == "   foo"
-@test rpad("foo", 6, "  ") == "foo   "
-
-# string manipulation
-@test strip("\t  hi   \n") == "hi"
-@test strip("foobarfoo", ['f', 'o']) == "bar"
-
-# some test strings
-astr = "Hello, world.\n"
-u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
-
-## generic string uses only endof and next ##
-
-immutable GenericString <: AbstractString
-    string::AbstractString
-end
-
-Base.endof(s::GenericString) = endof(s.string)
-Base.next(s::GenericString, i::Int) = next(s.string, i)
-
-# ascii search
-for str in [astr, GenericString(astr)]
-    @test_throws BoundsError search(str, 'z', 0)
-    @test_throws BoundsError search(str, '∀', 0)
-    @test search(str, 'x') == 0
-    @test search(str, '\0') == 0
-    @test search(str, '\u80') == 0
-    @test search(str, '∀') == 0
-    @test search(str, 'H') == 1
-    @test search(str, 'l') == 3
-    @test search(str, 'l', 4) == 4
-    @test search(str, 'l', 5) == 11
-    @test search(str, 'l', 12) == 0
-    @test search(str, ',') == 6
-    @test search(str, ',', 7) == 0
-    @test search(str, '\n') == 14
-    @test search(str, '\n', 15) == 0
-    @test_throws BoundsError search(str, 'ε', nextind(str,endof(str))+1)
-    @test_throws BoundsError search(str, 'a', nextind(str,endof(str))+1)
-end
-
-# ascii rsearch
-for str in [astr]
-    @test rsearch(str, 'x') == 0
-    @test rsearch(str, '\0') == 0
-    @test rsearch(str, '\u80') == 0
-    @test rsearch(str, '∀') == 0
-    @test rsearch(str, 'H') == 1
-    @test rsearch(str, 'H', 0) == 0
-    @test rsearch(str, 'l') == 11
-    @test rsearch(str, 'l', 5) == 4
-    @test rsearch(str, 'l', 4) == 4
-    @test rsearch(str, 'l', 3) == 3
-    @test rsearch(str, 'l', 2) == 0
-    @test rsearch(str, ',') == 6
-    @test rsearch(str, ',', 5) == 0
-    @test rsearch(str, '\n') == 14
-end
-
-# utf-8 search
-for str in (u8str, GenericString(u8str))
-    @test_throws BoundsError search(str, 'z', 0)
-    @test_throws BoundsError search(str, '∀', 0)
-    @test search(str, 'z') == 0
-    @test search(str, '\0') == 0
-    @test search(str, '\u80') == 0
-    @test search(str, '∄') == 0
-    @test search(str, '∀') == 1
-    @test_throws UnicodeError search(str, '∀', 2)
-    @test search(str, '∀', 4) == 0
-    @test search(str, '∃') == 13
-    @test_throws UnicodeError search(str, '∃', 15)
-    @test search(str, '∃', 16) == 0
-    @test search(str, 'x') == 26
-    @test search(str, 'x', 27) == 43
-    @test search(str, 'x', 44) == 0
-    @test search(str, 'δ') == 17
-    @test_throws UnicodeError search(str, 'δ', 18)
-    @test search(str, 'δ', nextind(str,17)) == 33
-    @test search(str, 'δ', nextind(str,33)) == 0
-    @test search(str, 'ε') == 5
-    @test search(str, 'ε', nextind(str,5)) == 54
-    @test search(str, 'ε', nextind(str,54)) == 0
-    @test search(str, 'ε', nextind(str,endof(str))) == 0
-    @test search(str, 'a', nextind(str,endof(str))) == 0
-    @test_throws BoundsError search(str, 'ε', nextind(str,endof(str))+1)
-    @test_throws BoundsError search(str, 'a', nextind(str,endof(str))+1)
-end
-
-# utf-8 rsearch
-for str in [u8str]
-    @test rsearch(str, 'z') == 0
-    @test rsearch(str, '\0') == 0
-    @test rsearch(str, '\u80') == 0
-    @test rsearch(str, '∄') == 0
-    @test rsearch(str, '∀') == 1
-    @test rsearch(str, '∀', 0) == 0
-    @test rsearch(str, '∃') == 13
-    @test rsearch(str, '∃', 14) == 13
-    @test rsearch(str, '∃', 13) == 13
-    @test rsearch(str, '∃', 12) == 0
-    @test rsearch(str, 'x') == 43
-    @test rsearch(str, 'x', 42) == 26
-    @test rsearch(str, 'x', 25) == 0
-    @test rsearch(str, 'δ') == 33
-    @test rsearch(str, 'δ', 32) == 17
-    @test rsearch(str, 'δ', 16) == 0
-    @test rsearch(str, 'ε') == 54
-    @test rsearch(str, 'ε', 53) == 5
-    @test rsearch(str, 'ε', 4) == 0
-end
-
-# string search with a single-char string
-@test search(astr, "x") == 0:-1
-@test search(astr, "H") == 1:1
-@test search(astr, "H", 2) == 0:-1
-@test search(astr, "l") == 3:3
-@test search(astr, "l", 4) == 4:4
-@test search(astr, "l", 5) == 11:11
-@test search(astr, "l", 12) == 0:-1
-@test search(astr, "\n") == 14:14
-@test search(astr, "\n", 15) == 0:-1
-
-@test search(u8str, "z") == 0:-1
-@test search(u8str, "∄") == 0:-1
-@test search(u8str, "∀") == 1:1
-@test search(u8str, "∀", 4) == 0:-1
-@test search(u8str, "∃") == 13:13
-@test search(u8str, "∃", 16) == 0:-1
-@test search(u8str, "x") == 26:26
-@test search(u8str, "x", 27) == 43:43
-@test search(u8str, "x", 44) == 0:-1
-@test search(u8str, "ε") == 5:5
-@test search(u8str, "ε", 7) == 54:54
-@test search(u8str, "ε", 56) == 0:-1
-
-# string rsearch with a single-char string
-@test rsearch(astr, "x") == 0:-1
-@test rsearch(astr, "H") == 1:1
-@test rsearch(astr, "H", 2) == 1:1
-@test rsearch(astr, "H", 0) == 0:-1
-@test rsearch(astr, "l") == 11:11
-@test rsearch(astr, "l", 10) == 4:4
-@test rsearch(astr, "l", 4) == 4:4
-@test rsearch(astr, "l", 3) == 3:3
-@test rsearch(astr, "l", 2) == 0:-1
-@test rsearch(astr, "\n") == 14:14
-@test rsearch(astr, "\n", 13) == 0:-1
-
-@test rsearch(u8str, "z") == 0:-1
-@test rsearch(u8str, "∄") == 0:-1
-@test rsearch(u8str, "∀") == 1:1
-@test rsearch(u8str, "∀", 0) == 0:-1
-#TODO: setting the limit in the middle of a wide char
-#      makes search fail but rsearch succeed.
-#      Should rsearch fail as well?
-#@test rsearch(u8str, "∀", 2) == 0:-1 # gives 1:3
-@test rsearch(u8str, "∃") == 13:13
-@test rsearch(u8str, "∃", 12) == 0:-1
-@test rsearch(u8str, "x") == 43:43
-@test rsearch(u8str, "x", 42) == 26:26
-@test rsearch(u8str, "x", 25) == 0:-1
-@test rsearch(u8str, "ε") == 54:54
-@test rsearch(u8str, "ε", 53) == 5:5
-@test rsearch(u8str, "ε", 4) == 0:-1
-
-# string search with a single-char regex
-@test search(astr, r"x") == 0:-1
-@test search(astr, r"H") == 1:1
-@test search(astr, r"H", 2) == 0:-1
-@test search(astr, r"l") == 3:3
-@test search(astr, r"l", 4) == 4:4
-@test search(astr, r"l", 5) == 11:11
-@test search(astr, r"l", 12) == 0:-1
-@test search(astr, r"\n") == 14:14
-@test search(astr, r"\n", 15) == 0:-1
-@test search(u8str, r"z") == 0:-1
-@test search(u8str, r"∄") == 0:-1
-@test search(u8str, r"∀") == 1:1
-@test search(u8str, r"∀", 4) == 0:-1
-@test search(u8str, r"∀") == search(u8str, r"\u2200")
-@test search(u8str, r"∀", 4) == search(u8str, r"\u2200", 4)
-@test search(u8str, r"∃") == 13:13
-@test search(u8str, r"∃", 16) == 0:-1
-@test search(u8str, r"x") == 26:26
-@test search(u8str, r"x", 27) == 43:43
-@test search(u8str, r"x", 44) == 0:-1
-@test search(u8str, r"ε") == 5:5
-@test search(u8str, r"ε", 7) == 54:54
-@test search(u8str, r"ε", 56) == 0:-1
-for i = 1:endof(astr)
-    @test search(astr, r"."s, i) == i:i
-end
-for i = 1:endof(u8str)
-    if isvalid(u8str,i)
-        @test search(u8str, r"."s, i) == i:i
-    end
-end
-
-# string search with a zero-char string
-for i = 1:endof(astr)
-    @test search(astr, "", i) == i:i-1
-end
-for i = 1:endof(u8str)
-    @test search(u8str, "", i) == i:i-1
-end
-@test search("", "") == 1:0
-
-# string rsearch with a zero-char string
-for i = 1:endof(astr)
-    @test rsearch(astr, "", i) == i:i-1
-end
-for i = 1:endof(u8str)
-    @test rsearch(u8str, "", i) == i:i-1
-end
-@test rsearch("", "") == 1:0
-
-# string search with a zero-char regex
-for i = 1:endof(astr)
-    @test search(astr, r"", i) == i:i-1
-end
-for i = 1:endof(u8str)
-    # TODO: should regex search fast-forward invalid indices?
-    if isvalid(u8str,i)
-        @test search(u8str, r""s, i) == i:i-1
-    end
-end
-
-# string search with a two-char string literal
-@test search("foo,bar,baz", "xx") == 0:-1
-@test search("foo,bar,baz", "fo") == 1:2
-@test search("foo,bar,baz", "fo", 3) == 0:-1
-@test search("foo,bar,baz", "oo") == 2:3
-@test search("foo,bar,baz", "oo", 4) == 0:-1
-@test search("foo,bar,baz", "o,") == 3:4
-@test search("foo,bar,baz", "o,", 5) == 0:-1
-@test search("foo,bar,baz", ",b") == 4:5
-@test search("foo,bar,baz", ",b", 6) == 8:9
-@test search("foo,bar,baz", ",b", 10) == 0:-1
-@test search("foo,bar,baz", "az") == 10:11
-@test search("foo,bar,baz", "az", 12) == 0:-1
-
-# issue #9365
-# string search with a two-char UTF-8 (2 byte) string literal
-@test search("ééé", "éé") == 1:3
-@test search("ééé", "éé", 1) == 1:3
-# string search with a two-char UTF-8 (3 byte) string literal
-@test search("€€€", "€€") == 1:4
-@test search("€€€", "€€", 1) == 1:4
-# string search with a two-char UTF-8 (4 byte) string literal
-@test search("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 1:5
-@test search("\U1f596\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
-
-# string search with a two-char UTF-8 (2 byte) string literal
-@test search("éé", "éé") == 1:3
-@test search("éé", "éé", 1) == 1:3
-# string search with a two-char UTF-8 (3 byte) string literal
-@test search("€€", "€€") == 1:4
-@test search("€€", "€€", 1) == 1:4
-# string search with a two-char UTF-8 (4 byte) string literal
-@test search("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
-@test search("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
-
-# string rsearch with a two-char UTF-8 (2 byte) string literal
-@test rsearch("ééé", "éé") == 3:5
-@test rsearch("ééé", "éé", endof("ééé")) == 3:5
-# string rsearch with a two-char UTF-8 (3 byte) string literal
-@test rsearch("€€€", "€€") == 4:7
-@test rsearch("€€€", "€€", endof("€€€")) == 4:7
-# string rsearch with a two-char UTF-8 (4 byte) string literal
-@test rsearch("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 5:9
-@test rsearch("\U1f596\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5:9
-
-# string rsearch with a two-char UTF-8 (2 byte) string literal
-@test rsearch("éé", "éé") == 1:3        # should really be 1:4!
-@test rsearch("éé", "éé", endof("ééé")) == 1:3
-# string search with a two-char UTF-8 (3 byte) string literal
-@test rsearch("€€", "€€") == 1:4        # should really be 1:6!
-@test rsearch("€€", "€€", endof("€€€")) == 1:4
-# string search with a two-char UTF-8 (4 byte) string literal
-@test rsearch("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
-@test rsearch("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1:5
-
-# string rsearch with a two-char string literal
-@test rsearch("foo,bar,baz", "xx") == 0:-1
-@test rsearch("foo,bar,baz", "fo") == 1:2
-@test rsearch("foo,bar,baz", "fo", 1) == 0:-1
-@test rsearch("foo,bar,baz", "oo") == 2:3
-@test rsearch("foo,bar,baz", "oo", 2) == 0:-1
-@test rsearch("foo,bar,baz", "o,") == 3:4
-@test rsearch("foo,bar,baz", "o,", 1) == 0:-1
-@test rsearch("foo,bar,baz", ",b") == 8:9
-@test rsearch("foo,bar,baz", ",b", 6) == 4:5
-@test rsearch("foo,bar,baz", ",b", 3) == 0:-1
-@test rsearch("foo,bar,baz", "az") == 10:11
-@test rsearch("foo,bar,baz", "az", 10) == 0:-1
-
-# array rsearch
-@test rsearch(UInt8[1,2,3],UInt8[2,3],3) == 2:3
-@test rsearch(UInt8[1,2,3],UInt8[2,3],1) == 0:-1
-
-# string search with a two-char regex
-@test search("foo,bar,baz", r"xx") == 0:-1
-@test search("foo,bar,baz", r"fo") == 1:2
-@test search("foo,bar,baz", r"fo", 3) == 0:-1
-@test search("foo,bar,baz", r"oo") == 2:3
-@test search("foo,bar,baz", r"oo", 4) == 0:-1
-@test search("foo,bar,baz", r"o,") == 3:4
-@test search("foo,bar,baz", r"o,", 5) == 0:-1
-@test search("foo,bar,baz", r",b") == 4:5
-@test search("foo,bar,baz", r",b", 6) == 8:9
-@test search("foo,bar,baz", r",b", 10) == 0:-1
-@test search("foo,bar,baz", r"az") == 10:11
-@test search("foo,bar,baz", r"az", 12) == 0:-1
-
-@test searchindex("foo", 'o') == 2
-@test searchindex("foo", 'o', 3) == 3
-
-# string searchindex with a two-char UTF-8 (2 byte) string literal
-@test searchindex("ééé", "éé") == 1
-@test searchindex("ééé", "éé", 1) == 1
-# string searchindex with a two-char UTF-8 (3 byte) string literal
-@test searchindex("€€€", "€€") == 1
-@test searchindex("€€€", "€€", 1) == 1
-# string searchindex with a two-char UTF-8 (4 byte) string literal
-@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 1
-@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", 1) == 1
-
-# string searchindex with a two-char UTF-8 (2 byte) string literal
-@test searchindex("éé", "éé") == 1
-@test searchindex("éé", "éé", 1) == 1
-# string searchindex with a two-char UTF-8 (3 byte) string literal
-@test searchindex("€€", "€€") == 1
-@test searchindex("€€", "€€", 1) == 1
-# string searchindex with a two-char UTF-8 (4 byte) string literal
-@test searchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
-@test searchindex("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1
-
-# string rsearchindex with a two-char UTF-8 (2 byte) string literal
-@test rsearchindex("ééé", "éé") == 3
-@test rsearchindex("ééé", "éé", endof("ééé")) == 3
-# string rsearchindex with a two-char UTF-8 (3 byte) string literal
-@test rsearchindex("€€€", "€€") == 4
-@test rsearchindex("€€€", "€€", endof("€€€")) == 4
-# string rsearchindex with a two-char UTF-8 (4 byte) string literal
-@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 5
-@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5
-
-# string rsearchindex with a two-char UTF-8 (2 byte) string literal
-@test rsearchindex("éé", "éé") == 1
-@test rsearchindex("éé", "éé", endof("ééé")) == 1
-# string searchindex with a two-char UTF-8 (3 byte) string literal
-@test rsearchindex("€€", "€€") == 1
-@test rsearchindex("€€", "€€", endof("€€€")) == 1
-# string searchindex with a two-char UTF-8 (4 byte) string literal
-@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
-@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1
-
-# split
-@test isequal(split("foo,bar,baz", 'x'), ["foo,bar,baz"])
-@test isequal(split("foo,bar,baz", ','), ["foo","bar","baz"])
-@test isequal(split("foo,bar,baz", ","), ["foo","bar","baz"])
-@test isequal(split("foo,bar,baz", r","), ["foo","bar","baz"])
-@test isequal(split("foo,bar,baz", ','; limit=0), ["foo","bar","baz"])
-@test isequal(split("foo,bar,baz", ','; limit=1), ["foo,bar,baz"])
-@test isequal(split("foo,bar,baz", ','; limit=2), ["foo","bar,baz"])
-@test isequal(split("foo,bar,baz", ','; limit=3), ["foo","bar","baz"])
-@test isequal(split("foo,bar", "o,b"), ["fo","ar"])
-
-@test isequal(split("", ','), [""])
-@test isequal(split(",", ','), ["",""])
-@test isequal(split(",,", ','), ["","",""])
-@test isequal(split("", ','  ; keep=false), [])
-@test isequal(split(",", ',' ; keep=false), [])
-@test isequal(split(",,", ','; keep=false), [])
-
-@test isequal(split("a b c"), ["a","b","c"])
-@test isequal(split("a  b \t c\n"), ["a","b","c"])
-
-@test isequal(rsplit("foo,bar,baz", 'x'), ["foo,bar,baz"])
-@test isequal(rsplit("foo,bar,baz", ','), ["foo","bar","baz"])
-@test isequal(rsplit("foo,bar,baz", ","), ["foo","bar","baz"])
-@test isequal(rsplit("foo,bar,baz", ','; limit=0), ["foo","bar","baz"])
-@test isequal(rsplit("foo,bar,baz", ','; limit=1), ["foo,bar,baz"])
-@test isequal(rsplit("foo,bar,baz", ','; limit=2), ["foo,bar","baz"])
-@test isequal(rsplit("foo,bar,baz", ','; limit=3), ["foo","bar","baz"])
-@test isequal(rsplit("foo,bar", "o,b"), ["fo","ar"])
-
-@test isequal(rsplit("", ','), [""])
-@test isequal(rsplit(",", ','), ["",""])
-@test isequal(rsplit(",,", ','), ["","",""])
-@test isequal(rsplit(",,", ','; limit=2), [",",""])
-@test isequal(rsplit("", ','  ; keep=false), [])
-@test isequal(rsplit(",", ',' ; keep=false), [])
-@test isequal(rsplit(",,", ','; keep=false), [])
-
-#@test isequal(rsplit("a b c"), ["a","b","c"])
-#@test isequal(rsplit("a  b \t c\n"), ["a","b","c"])
-
-let str = "a.:.ba..:..cba.:.:.dcba.:."
-@test isequal(split(str, ".:."), ["a","ba.",".cba",":.dcba",""])
-@test isequal(split(str, ".:."; keep=false), ["a","ba.",".cba",":.dcba"])
-@test isequal(split(str, ".:."), ["a","ba.",".cba",":.dcba",""])
-@test isequal(split(str, r"\.(:\.)+"), ["a","ba.",".cba","dcba",""])
-@test isequal(split(str, r"\.(:\.)+"; keep=false), ["a","ba.",".cba","dcba"])
-@test isequal(split(str, r"\.+:\.+"), ["a","ba","cba",":.dcba",""])
-@test isequal(split(str, r"\.+:\.+"; keep=false), ["a","ba","cba",":.dcba"])
-
-@test isequal(rsplit(str, ".:."), ["a","ba.",".cba.:","dcba",""])
-@test isequal(rsplit(str, ".:."; keep=false), ["a","ba.",".cba.:","dcba"])
-@test isequal(rsplit(str, ".:."; limit=2), ["a.:.ba..:..cba.:.:.dcba", ""])
-@test isequal(rsplit(str, ".:."; limit=3), ["a.:.ba..:..cba.:", "dcba", ""])
-@test isequal(rsplit(str, ".:."; limit=4), ["a.:.ba.", ".cba.:", "dcba", ""])
-@test isequal(rsplit(str, ".:."; limit=5), ["a", "ba.", ".cba.:", "dcba", ""])
-@test isequal(rsplit(str, ".:."; limit=6), ["a", "ba.", ".cba.:", "dcba", ""])
-end
-
-# zero-width splits
-@test isequal(rsplit("", ""), [""])
-
-@test isequal(split("", ""), [""])
-@test isequal(split("", r""), [""])
-@test isequal(split("abc", ""), ["a","b","c"])
-@test isequal(split("abc", r""), ["a","b","c"])
-@test isequal(split("abcd", r"b?"), ["a","c","d"])
-@test isequal(split("abcd", r"b*"), ["a","c","d"])
-@test isequal(split("abcd", r"b+"), ["a","cd"])
-@test isequal(split("abcd", r"b?c?"), ["a","d"])
-@test isequal(split("abcd", r"[bc]?"), ["a","","d"])
-@test isequal(split("abcd", r"a*"), ["","b","c","d"])
-@test isequal(split("abcd", r"a+"), ["","bcd"])
-@test isequal(split("abcd", r"d*"), ["a","b","c",""])
-@test isequal(split("abcd", r"d+"), ["abc",""])
-@test isequal(split("abcd", r"[ad]?"), ["","b","c",""])
-
-# replace
-@test replace("foobar", 'o', '0') == "f00bar"
-@test replace("foobar", 'o', '0', 1) == "f0obar"
-@test replace("foobar", 'o', "") == "fbar"
-@test replace("foobar", 'o', "", 1) == "fobar"
-@test replace("foobar", 'f', 'F') == "Foobar"
-@test replace("foobar", 'r', 'R') == "foobaR"
-
-@test replace("foofoofoo", "foo", "bar") == "barbarbar"
-@test replace("foobarfoo", "foo", "baz") == "bazbarbaz"
-@test replace("barfoofoo", "foo", "baz") == "barbazbaz"
-
-@test replace("", "", "") == ""
-@test replace("", "", "x") == "x"
-@test replace("", "x", "y") == ""
-
-@test replace("abcd", "", "^") == "^a^b^c^d^"
-@test replace("abcd", "b", "^") == "a^cd"
-@test replace("abcd", r"b?", "^") == "^a^c^d^"
-@test replace("abcd", r"b+", "^") == "a^cd"
-@test replace("abcd", r"b?c?", "^") == "^a^d^"
-@test replace("abcd", r"[bc]?", "^") == "^a^^d^"
-
-@test replace("foobarfoo", r"(fo|ba)", "xx") == "xxoxxrxxo"
-@test replace("foobarfoo", r"(foo|ba)", "bar") == "barbarrbar"
-
-@test replace("foobar", 'o', 'ø') == "føøbar"
-@test replace("foobar", 'o', 'ø', 1) == "føobar"
-@test replace("føøbar", 'ø', 'o') == "foobar"
-@test replace("føøbar", 'ø', 'o', 1) == "foøbar"
-@test replace("føøbar", 'ø', 'ö') == "fööbar"
-@test replace("føøbar", 'ø', 'ö', 1) == "föøbar"
-@test replace("føøbar", 'ø', "") == "fbar"
-@test replace("føøbar", 'ø', "", 1) == "føbar"
-@test replace("føøbar", 'f', 'F') == "Føøbar"
-@test replace("ḟøøbar", 'ḟ', 'F') == "Føøbar"
-@test replace("føøbar", 'f', 'Ḟ') == "Ḟøøbar"
-@test replace("ḟøøbar", 'ḟ', 'Ḟ') == "Ḟøøbar"
-@test replace("føøbar", 'r', 'R') == "føøbaR"
-@test replace("føøbaṙ", 'ṙ', 'R') == "føøbaR"
-@test replace("føøbar", 'r', 'Ṙ') == "føøbaṘ"
-@test replace("føøbaṙ", 'ṙ', 'Ṙ') == "føøbaṘ"
-
-@test replace("ḟøøḟøøḟøø", "ḟøø", "bar") == "barbarbar"
-@test replace("ḟøøbarḟøø", "ḟøø", "baz") == "bazbarbaz"
-@test replace("barḟøøḟøø", "ḟøø", "baz") == "barbazbaz"
-
-@test replace("foofoofoo", "foo", "ƀäṙ") == "ƀäṙƀäṙƀäṙ"
-@test replace("fooƀäṙfoo", "foo", "baz") == "bazƀäṙbaz"
-@test replace("ƀäṙfoofoo", "foo", "baz") == "ƀäṙbazbaz"
-
-@test replace("foofoofoo", "foo", "bar") == "barbarbar"
-@test replace("foobarfoo", "foo", "ƀäż") == "ƀäżbarƀäż"
-@test replace("barfoofoo", "foo", "ƀäż") == "barƀäżƀäż"
-
-@test replace("ḟøøḟøøḟøø", "ḟøø", "ƀäṙ") == "ƀäṙƀäṙƀäṙ"
-@test replace("ḟøøƀäṙḟøø", "ḟøø", "baz") == "bazƀäṙbaz"
-@test replace("ƀäṙḟøøḟøø", "ḟøø", "baz") == "ƀäṙbazbaz"
-
-@test replace("ḟøøḟøøḟøø", "ḟøø", "bar") == "barbarbar"
-@test replace("ḟøøbarḟøø", "ḟøø", "ƀäż") == "ƀäżbarƀäż"
-@test replace("barḟøøḟøø", "ḟøø", "ƀäż") == "barƀäżƀäż"
-
-@test replace("ḟøøḟøøḟøø", "ḟøø", "ƀäṙ") == "ƀäṙƀäṙƀäṙ"
-@test replace("ḟøøƀäṙḟøø", "ḟøø", "ƀäż") == "ƀäżƀäṙƀäż"
-@test replace("ƀäṙḟøøḟøø", "ḟøø", "ƀäż") == "ƀäṙƀäżƀäż"
-
-@test replace("", "", "ẍ") == "ẍ"
-@test replace("", "ẍ", "ÿ") == ""
-
-@test replace("äƀçđ", "", "π") == "πäπƀπçπđπ"
-@test replace("äƀçđ", "ƀ", "π") == "äπçđ"
-@test replace("äƀçđ", r"ƀ?", "π") == "πäπçπđπ"
-@test replace("äƀçđ", r"ƀ+", "π") == "äπçđ"
-@test replace("äƀçđ", r"ƀ?ç?", "π") == "πäπđπ"
-@test replace("äƀçđ", r"[ƀç]?", "π") == "πäππđπ"
-
-@test replace("foobarfoo", r"(fo|ba)", "ẍẍ") == "ẍẍoẍẍrẍẍo"
-
-@test replace("ḟøøbarḟøø", r"(ḟø|ba)", "xx") == "xxøxxrxxø"
-@test replace("ḟøøbarḟøø", r"(ḟøø|ba)", "bar") == "barbarrbar"
-
-@test replace("fooƀäṙfoo", r"(fo|ƀä)", "xx") == "xxoxxṙxxo"
-@test replace("fooƀäṙfoo", r"(foo|ƀä)", "ƀäṙ") == "ƀäṙƀäṙṙƀäṙ"
-
-@test replace("ḟøøƀäṙḟøø", r"(ḟø|ƀä)", "xx") == "xxøxxṙxxø"
-@test replace("ḟøøƀäṙḟøø", r"(ḟøø|ƀä)", "ƀäṙ") == "ƀäṙƀäṙṙƀäṙ"
-
-@test replace("foo", "oo", uppercase) == "fOO"
-
-# chomp/chop
-@test chomp("foo\n") == "foo"
-@test chop("foob") == "foo"
-
-# lower and upper
-@test uppercase("aBc") == "ABC"
-@test uppercase('A') == 'A'
-@test uppercase('a') == 'A'
-@test lowercase("AbC") == "abc"
-@test lowercase('A') == 'a'
-@test lowercase('a') == 'a'
-@test uppercase('α') == '\u0391'
-@test lowercase('Δ') == 'δ'
-@test lowercase('\U118bf') == '\U118df'
-@test uppercase('\U1044d') == '\U10425'
-@test ucfirst("Abc") == "Abc"
-@test ucfirst("abc") == "Abc"
-@test lcfirst("ABC") == "aBC"
-@test lcfirst("aBC") == "aBC"
-
-# {starts,ends}with
-@test startswith("abcd", 'a')
-@test startswith("abcd", "a")
-@test startswith("abcd", "ab")
-@test !startswith("ab", "abcd")
-@test !startswith("abcd", "bc")
-@test endswith("abcd", 'd')
-@test endswith("abcd", "d")
-@test endswith("abcd", "cd")
-@test !endswith("abcd", "dc")
-@test !endswith("cd", "abcd")
-
-@test filter(x -> x ∈ ['f', 'o'], "foobar") == "foo"
-
-# RepStrings and SubStrings
-u8str2 = u8str^2
-len_u8str = length(u8str)
-slen_u8str = length(u8str)
-len_u8str2 = length(u8str2)
-slen_u8str2 = length(u8str2)
-
-@test len_u8str2 == 2 * len_u8str
-@test slen_u8str2 == 2 * slen_u8str
-
-u8str2plain = utf8(u8str2)
-
-for i1 = 1:length(u8str2)
-    if !isvalid(u8str2, i1); continue; end
-    for i2 = i1:length(u8str2)
-        if !isvalid(u8str2, i2); continue; end
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test u8str2[i1:i2] == u8str2plain[i1:i2]
-    end
-end
-
-str="tempus fugit"              #length(str)==12
-ss=SubString(str,1,length(str)) #match source string
-@test length(ss)==length(str)
-
-ss=SubString(str,1,0)    #empty SubString
-@test length(ss)==0
-
-ss=SubString(str,14,20)  #start indexed beyond source string length
-@test length(ss)==0
-
-ss=SubString(str,10,16)  #end indexed beyond source string length
-@test length(ss)==3
-
-str2=""
-ss=SubString(str2,1,4)  #empty source string
-@test length(ss)==0
-
-ss=SubString(str2,1,1)  #empty source string, identical start and end index
-@test length(ss)==0
-
-@test SubString("foobar",big(1),big(3)) == "foo"
-
-str = "aa\u2200\u2222bb"
-u = SubString(str, 3, 6)
-@test length(u)==2
-b = IOBuffer()
-write(b, u)
-@test takebuf_string(b) == "\u2200\u2222"
-
-str = "føøbar"
-u = SubString(str, 4, 3)
-@test length(u)==0
-b = IOBuffer()
-write(b, u)
-@test takebuf_string(b) == ""
-
-str = "føøbar"
-u = SubString(str, 10, 10)
-@test length(u)==0
-b = IOBuffer()
-write(b, u)
-@test takebuf_string(b) == ""
-
-@test replace("\u2202", '*', '\0') == "\u2202"
-
-# search and SubString (issue #5679)
-str = "Hello, world!"
-u = SubString(str, 1, 5)
-@test rsearch(u, "World") == 0:-1
-@test rsearch(u, 'z') == 0
-@test rsearch(u, "ll") == 3:4
-
-# quotes + interpolation (issue #455)
-@test "$("string")" == "string"
-arr = ["a","b","c"]
-@test "[$(join(arr, " - "))]" == "[a - b - c]"
-
-# string iteration, and issue #1454
-str = "é"
-str_a = vcat(str...)
-@test length(str_a)==1
-@test str_a[1] == str[1]
-
-str = "s\u2200"
-@test str[1:end] == str
-
-# triple-quote delimited strings
-@test """abc""" == "abc"
-@test """ab"c""" == "ab\"c"
-@test """ab""c""" == "ab\"\"c"
-@test """ab"\"c""" == "ab\"\"c"
-@test """abc\"""" == "abc\""
-n = 3
-@test """$n\n""" == "$n\n"
-@test """$(n)""" == "3"
-@test """$(2n)""" == "6"
-@test """$(n+4)""" == "7"
-@test """$("string")""" == "string"
-a = [3,1,2]
-@test """$(a[2])""" == "1"
-@test """$(a[3]+7)""" == "9"
-@test """$(floor(Int,4.5))""" == "4"
-nl = "
-"
-@test """
-     a
-     b
-
-     c
-     """ == "a$(nl)b$(nl)$(nl)c$(nl)"
-@test """
-      """ == ""
-@test """x
-     a
-    """ == "x$(nl) a$(nl)"
-@test """
-     $n
-   """ == "  $n$(nl)"
-@test """
-      a
-     b
-       c""" == " a$(nl)b$(nl)  c"
-# tabs + spaces
-@test """
-	 a
-	 b
-	""" == " a$(nl) b$(nl)"
-@test """
-      a
-       """ == "a$(nl) "
-s = "   p"
-@test """
-      $s""" == "$s"
-@test """
-       $s
-      """ == " $s$(nl)"
-@test """\t""" == "\t"
-@test """
-      \t""" == ""
-@test """
-      foo
-      \tbar""" == "foo$(nl)\tbar"
-@test """
-      foo
-      \tbar
-      """ == "foo$(nl)\tbar$(nl)"
-@test """
-      foo
-      bar\t""" == "foo$(nl)bar\t"
-@test """
-      $("\n      ")
-      """ == "\n      $(nl)"
-
-# bytes2hex and hex2bytes
-hex_str = "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592"
-bin_val = hex2bytes(hex_str)
-
-@test div(length(hex_str), 2) == length(bin_val)
-@test hex_str == bytes2hex(bin_val)
-
-bin_val = hex2bytes("07bf")
-@test bin_val[1] == 7
-@test bin_val[2] == 191
-@test typeof(bin_val) == Array{UInt8, 1}
-@test length(bin_val) == 2
-
-# all valid hex chars
-@test "0123456789abcdefabcdef" == bytes2hex(hex2bytes("0123456789abcdefABCDEF"))
-
-# odd size
-@test_throws ArgumentError hex2bytes("0123456789abcdefABCDEF0")
-
-#non-hex characters
-@test_throws ArgumentError hex2bytes("0123456789abcdefABCDEFGH")
-
-# sizeof
-@test sizeof("abc") == 3
-@test sizeof("\u2222") == 3
-@test sizeof(SubString("abc\u2222def",4,4)) == 3
-@test sizeof(RopeString("abc","def")) == 6
-
-# issue #3597
-@test string(utf32(['T', 'e', 's', 't'])[1:1], "X") == "TX"
-
-# issue #3710
-@test prevind(SubString("{var}",2,4),4) == 3
-
-# printf
-# int
-@test (@sprintf "%d" typemax(Int64)) == "9223372036854775807"
-@test (@sprintf "%i" 42) == "42"
-@test (@sprintf "%u" 42) == "42"
-@test (@sprintf "Test: %i" 42) == "Test: 42"
-@test (@sprintf "%#x" 42) == "0x2a"
-@test (@sprintf "%#o" 42) == "052"
-@test (@sprintf "%X" 42) == "2A"
-@test (@sprintf "%X" 42) == "2A"
-@test (@sprintf "% i" 42) == " 42"
-@test (@sprintf "%+i" 42) == "+42"
-@test (@sprintf "%4i" 42) == "  42"
-@test (@sprintf "%-4i" 42) == "42  "
-# float
-@test (@sprintf "%7.2f" 1.2345) == "   1.23"
-@test (@sprintf "%-7.2f" 1.2345) == "1.23   "
-@test (@sprintf "%07.2f" 1.2345) == "0001.23"
-@test (@sprintf "%.0f" 1.2345) == "1"
-@test (@sprintf "%#.0f" 1.2345) == "1."
-# Inf / NaN handling
-@test (@sprintf "%f" Inf) == "Inf"
-@test (@sprintf "%f" NaN) == "NaN"
-# scientific notation
-@test (@sprintf "%.4e" 1.2345) == "1.2345e+00"
-@test (@sprintf "%.0e" 3e142) == "3e+142"
-@test (@sprintf "%#.0e" 3e142) == "3.e+142"
-# hex float
-@test (@sprintf "%a" 1.5) == "0x1.8p+0"
-@test (@sprintf "%#.0a" 1.5) == "0x2.p+0"
-@test (@sprintf "%+30a" 1/3) == "         +0x1.5555555555555p-2"
-# chars
-@test (@sprintf "%c" 65) == "A"
-@test (@sprintf "%c" 'A') == "A"
-@test (@sprintf "%c" 248) == "ø"
-@test (@sprintf "%c" 'ø') == "ø"
-# strings
-@test (@sprintf "%s" "test") == "test"
-@test (@sprintf "%s" "tést") == "tést"
-# reasonably complex
-@test (@sprintf "Test: %s%c%C%c%#-.0f." "t" 65 66 67 -42) == "Test: tABC-42.."
-#test simple splatting
-@test (@sprintf "%d%d" [1 2]...) == "12"
-# combo
-@test (@sprintf "%f %d %d %f" 1.0 [3 4]... 5) == "1.000000 3 4 5.000000"
-# multi
-@test (@sprintf "%s %f %9.5f %d %d %d %d%d%d%d" [1:6;]... [7,8,9,10]...) == "1 2.000000   3.00000 4 5 6 78910"
-# comprehension
-@test (@sprintf "%s %s %s %d %d %d %f %f %f" Any[10^x+y for x=1:3,y=1:3 ]...) == "11 101 1001 12 102 1002 13.000000 103.000000 1003.000000"
-
-# issue #4183
-@test split(SubString(ascii("x"), 2, 0), "y") == AbstractString[""]
-@test split(SubString(utf8("x"), 2, 0), "y") == AbstractString[""]
-
-# issue #4586
-@test rsplit(RevString("ailuj"),'l') == ["ju","ia"]
-@test parse(Float64,RevString("64")) === 46.0
-
-# issue #6772
-@test float(SubString("10",1,1)) === 1.0
-@test float(SubString("1 0",1,1)) === 1.0
-@test parse(Float32,SubString("10",1,1)) === 1.0f0
-
-for T = (UInt8,Int8,UInt16,Int16,UInt32,Int32,UInt64,Int64,UInt128,Int128,BigInt),
-    b = 2:62, _ = 1:10
-    n = T != BigInt ? rand(T) : BigInt(rand(Int128))
-    @test parse(T,base(b,n),b) == n
-end
-
-# normalize_string (Unicode normalization etc.):
-@test normalize_string("\u006e\u0303", :NFC) == "\u00f1"
-@test "\u006e\u0303" == normalize_string("\u00f1", :NFD)
-@test normalize_string("\ufb00", :NFC) != "ff"
-@test normalize_string("\ufb00", :NFKC) == "ff"
-@test normalize_string("\u006e\u0303\ufb00", :NFKC) == "\u00f1"*"ff"
-@test normalize_string("\u00f1\ufb00", :NFKD) == "\u006e\u0303"*"ff"
-@test normalize_string("\u006e\u0303", compose=true) == "\u00f1"
-@test "\u006e\u0303" == normalize_string("\u00f1", decompose=true)
-@test normalize_string("\u006e\u0303\u00b5",compat=true) == "\u00f1\u03bc"
-@test normalize_string("Σσς",casefold=true) == "σσσ"
-@test normalize_string("∕⁄", lump=true) == "//"
-@test normalize_string("\ua\n\r\r\ua", newline2lf=true) == "\ua\ua\ua\ua"
-@test normalize_string("\ua\n\r\r\ua", newline2ls=true) == "\u2028\u2028\u2028\u2028"
-@test normalize_string("\ua\n\r\r\ua", newline2ps=true) == "\u2029\u2029\u2029\u2029"
-@test normalize_string("\u00f1", stripmark=true) == "n"
-@test isempty(normalize_string("\u00ad", stripignore=true))
-@test normalize_string("\t\r", stripcc=true) == "  "
-@test normalize_string("\t\r", stripcc=true, newline2ls=true) == " \u2028"
-
-#Tests from Unicode SA#15, "Unicode normalization forms"
-#http://www.unicode.org/reports/tr15/
-
-#1. Canonical equivalence
-let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFC)==normalize_string(string(b...), :NFC)
-    ==(a,b) = Base.(:(==))(a,b)
-    @test ['C', '̧'] == ['Ç']
-    @test ['q', '̇', '̣'] == ['q', '̣', '̇']
-    @test ['가'] == ['ᄀ', 'ᅡ']
-    @test ['Ω'] == ['Ω']
-end
-
-#2. Compatibility Equivalence
-let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFKC)==normalize_string(string(b...), :NFKC)
-    ==(a,b) = Base.(:(==))(a,b)
-    @test ['ℌ'] == ['ℍ'] == ['H']
-    @test ['ﻨ'] == ['ﻧ'] == ['ﻦ'] == ['ﻥ']
-    @test ['①'] == ['1']
-    @test ['ｶ'] == ['カ']
-    @test ['︷'] == ['{']
-    @test ['⁹'] == ['₉']
-    @test ['㌀'] == ['ア', 'パ', 'ー', 'ト']
-    @test ['¼'] == ['1', '⁄', '4']
-    @test ['ǆ'] == ['d', 'ž']
-end
-
-#3. Singletons
-@test normalize_string("\U212b", :NFD) == "A\U030a"
-@test normalize_string("\U212b", :NFC) == "\U00c5"
-@test normalize_string("\U2126", :NFC) == normalize_string("\U2126", :NFD) == "\U03a9"
-
-#4. Canonical Composites
-@test normalize_string("\U00c5", :NFC) == "\U00c5"
-@test normalize_string("\U00c5", :NFD) == "A\U030a"
-@test normalize_string("\U00f4", :NFC) == "\U00f4"
-@test normalize_string("\U00f4", :NFD) == "o\U0302"
-
-#5. Multiple Combining Marks
-@test normalize_string("\U1e69", :NFD) == "s\U0323\U0307"
-@test normalize_string("\U1e69", :NFC) == "\U1e69"
-@test normalize_string("\U1e0b\U0323", :NFD) == "d\U0323\U0307"
-@test normalize_string("\U1e0b\U0323", :NFC) == "\U1e0d\U0307"
-@test normalize_string("q\U0307\U0323", :NFC) == "q\U0323\U0307"
-@test normalize_string("q\U0307\U0323", :NFD) == "q\U0323\U0307"
-
-#6. Compatibility Composites
-@test normalize_string("\Ufb01", :NFD) == normalize_string("\Ufb01", :NFC) == "\Ufb01"
-@test normalize_string("\Ufb01", :NFKD) == normalize_string("\Ufb01", :NFKC) == "fi"
-@test normalize_string("2\U2075", :NFD) == normalize_string("2\U2075", :NFC) == "2\U2075"
-@test normalize_string("2\U2075", :NFKD) == normalize_string("2\U2075", :NFKC) == "25"
-@test normalize_string("\U1e9b\U0323", :NFD) == "\U017f\U0323\U0307"
-@test normalize_string("\U1e9b\U0323", :NFC) == "\U1e9b\U0323"
-@test normalize_string("\U1e9b\U0323", :NFKD) == "s\U0323\U0307"
-@test normalize_string("\U1e9b\U0323", :NFKC) == "\U1e69"
-
-# issue #5870
-@test !ismatch(Regex("aa"), SubString("",1,0))
-@test ismatch(Regex(""), SubString("",1,0))
-
-# issue #6027
-let
-    # make symbol with invalid char
-    sym = symbol(Char(0xdcdb))
-    @test string(sym) == string(Char(0xdcdb))
-    @test expand(sym) === sym
-    res = string(parse(string(Char(0xdcdb)," = 1"),1,raise=false)[1])
-    @test res == """\$(Expr(:error, "invalid character \\\"\\udcdb\\\"\"))"""
-end
-
-@test symbol("asdf") === :asdf
-@test symbol(:abc,"def",'g',"hi",0) === :abcdefghi0
-@test :a < :b
-@test startswith(string(gensym("asdf")),"##asdf#")
-@test gensym("asdf") != gensym("asdf")
-@test gensym() != gensym()
-@test startswith(string(gensym()),"##")
-@test_throws ArgumentError symbol("ab\0")
-@test_throws ArgumentError gensym("ab\0")
-
-# issue #6949
-let f =IOBuffer(),
-    x = split("1 2 3")
-    @test write(f, x) == 3
-    @test takebuf_string(f) == "123"
-    @test invoke(write, Tuple{IO, AbstractArray}, f, x) == 3
-    @test takebuf_string(f) == "123"
-end
-
-# issue #7248
-@test_throws BoundsError ind2chr("hello", -1)
-@test_throws BoundsError chr2ind("hello", -1)
-@test_throws BoundsError ind2chr("hellø", -1)
-@test_throws BoundsError chr2ind("hellø", -1)
-@test_throws BoundsError ind2chr("hello", 10)
-@test_throws BoundsError chr2ind("hello", 10)
-@test_throws BoundsError ind2chr("hellø", 10)
-@test_throws BoundsError chr2ind("hellø", 10)
-@test_throws BoundsError checkbounds("hello", 0)
-@test_throws BoundsError checkbounds("hello", 6)
-@test_throws BoundsError checkbounds("hello", 0:3)
-@test_throws BoundsError checkbounds("hello", 4:6)
-@test_throws BoundsError checkbounds("hello", [0:3;])
-@test_throws BoundsError checkbounds("hello", [4:6;])
-@test checkbounds("hello", 2)
-@test checkbounds("hello", 1:5)
-@test checkbounds("hello", [1:5;])
-
-
-# isvalid(), chr2ind() and ind2chr() for SubString{DirectIndexString}
-let s="lorem ipsum",
-    sdict=Dict(SubString(s,1,11)=>s,
-               SubString(s,1,6)=>"lorem ",
-               SubString(s,1,0)=>"",
-               SubString(s,2,4)=>"ore",
-               SubString(s,2,16)=>"orem ipsum",
-               SubString(s,12,14)=>""
-               )
-    for (ss,s) in sdict
-        for i in -1:12
-            @test isvalid(ss,i)==isvalid(s,i)
-        end
-    end
-    for (ss,s) in sdict
-        for i in 1:length(ss)
-            @test ind2chr(ss,i)==ind2chr(s,i)
-        end
-    end
-    for (ss,s) in sdict
-        for i in 1:length(ss)
-            @test chr2ind(ss,i)==chr2ind(s,i)
-        end
-    end
-end #let
-
-#for isvalid(SubString{UTF8String})
-let s = utf8("Σx + βz - 2")
-  for i in -1:length(s)+2
-      ss=SubString(s,1,i)
-      @test isvalid(ss,i)==isvalid(s,i)
-  end
-end
-
-ss=SubString("hello",1,5)
-@test_throws BoundsError ind2chr(ss, -1)
-@test_throws BoundsError chr2ind(ss, -1)
-@test_throws BoundsError chr2ind(ss, 10)
-@test_throws BoundsError ind2chr(ss, 10)
-
-# length(SubString{UTF8String}) performance specialization
-let s = "|η(α)-ϕ(κ)| < ε"
-    @test length(SubString(s,1,0))==length(s[1:0])
-    @test length(SubString(s,4,4))==length(s[4:4])
-    @test length(SubString(s,1,7))==length(s[1:7])
-    @test length(SubString(s,4,11))==length(s[4:11])
-end
-
-# issue #7764
-let
-    srep = RepString("Σβ",2)
-    s="Σβ"
-    ss=SubString(s,1,endof(s))
-
-    @test ss^2 == "ΣβΣβ"
-    @test RepString(ss,2) == "ΣβΣβ"
-
-    @test endof(srep) == 7
-
-    @test next(srep, 3) == ('β',5)
-    @test next(srep, 7) == ('β',9)
-
-    @test srep[7] == 'β'
-    @test_throws BoundsError srep[8]
-end
-
-#issue #5939  uft8proc/libmojibake character predicates
-let
-    alower=['a', 'd', 'j', 'y', 'z']
-    ulower=['α', 'β', 'γ', 'δ', 'ф', 'я']
-    for c in vcat(alower,ulower)
-        @test islower(c) == true
-        @test isupper(c) == false
-        @test isdigit(c) == false
-        @test isnumber(c) == false
-    end
-
-    aupper=['A', 'D', 'J', 'Y', 'Z']
-    uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'ǅ', 'Ж', 'Д']
-
-    for c in vcat(aupper,uupper)
-        @test islower(c) == false
-        @test isupper(c) == true
-        @test isdigit(c) == false
-        @test isnumber(c) == false
-    end
-
-    nocase=['א','ﺵ']
-    alphas=vcat(alower,ulower,aupper,uupper,nocase)
-
-    for c in alphas
-         @test isalpha(c) == true
-         @test isnumber(c) == false
-    end
-
-
-    anumber=['0', '1', '5', '9']
-    unumber=['٣', '٥', '٨', '¹', 'ⅳ' ]
-
-    for c in anumber
-         @test isdigit(c) == true
-         @test isnumber(c) == true
-    end
-    for c in unumber
-         @test isdigit(c) == false
-         @test isnumber(c) == true
-    end
-
-    alnums=vcat(alphas,anumber,unumber)
-    for c in alnums
-         @test isalnum(c) == true
-         @test ispunct(c) == false
-    end
-
-    asymbol = ['(',')', '~', '$' ]
-    usymbol = ['∪', '∩', '⊂', '⊃', '√', '€', '¥', '↰', '△', '§']
-
-    apunct =['.',',',';',':','&']
-    upunct =['‡', '؟', '჻' ]
-
-    for c in vcat(apunct,upunct)
-         @test ispunct(c) == true
-         @test isalnum(c) == false
-    end
-
-    for c in vcat(alnums,asymbol,usymbol,apunct,upunct)
-        @test isprint(c) == true
-        @test isgraph(c) == true
-        @test isspace(c) == false
-        @test iscntrl(c) == false
-    end
-
-    NBSP = Char(0x0000A0)
-    ENSPACE = Char(0x002002)
-    EMSPACE = Char(0x002003)
-    THINSPACE = Char(0x002009)
-    ZWSPACE = Char(0x002060)
-
-    uspace = [ENSPACE, EMSPACE, THINSPACE]
-    aspace = [' ']
-    acntrl_space = ['\t', '\n', '\v', '\f', '\r']
-    for c in vcat(aspace,uspace)
-        @test isspace(c) == true
-        @test isprint(c) == true
-        @test isgraph(c) == false
-    end
-
-    for c in vcat(acntrl_space)
-        @test isspace(c) == true
-        @test isprint(c) == false
-        @test isgraph(c) == false
-    end
-
-    @test isspace(ZWSPACE) == false # zero-width space
-
-    acontrol = [ Char(0x001c), Char(0x001d), Char(0x001e), Char(0x001f)]
-    latincontrol = [ Char(0x0080), Char(0x0085) ]
-    ucontrol = [ Char(0x200E), Char(0x202E) ]
-
-    for c in vcat(acontrol, acntrl_space, latincontrol)
-        @test iscntrl(c) == true
-        @test isalnum(c) == false
-        @test isprint(c) == false
-        @test isgraph(c) == false
-    end
-
-    for c in ucontrol  #non-latin1 controls
-        if c!=Char(0x0085)
-            @test iscntrl(c) == false
-            @test isspace(c) == false
-            @test isalnum(c) == false
-            @test isprint(c) == false
-            @test isgraph(c) == false
-        end
-    end
-
-end
-
-@test isspace("  \t   \n   \r  ")==true
-@test isgraph("  \t   \n   \r  ")==false
-@test isprint("  \t   \n   \r  ")==false
-@test isalpha("  \t   \n   \r  ")==false
-@test isnumber("  \t   \n   \r  ")==false
-@test ispunct("  \t   \n   \r  ")==false
-
-@test isspace("ΣβΣβ")==false
-@test isalpha("ΣβΣβ")==true
-@test isgraph("ΣβΣβ")==true
-@test isprint("ΣβΣβ")==true
-@test isupper("ΣβΣβ")==false
-@test islower("ΣβΣβ")==false
-@test isnumber("ΣβΣβ")==false
-@test iscntrl("ΣβΣβ")==false
-@test ispunct("ΣβΣβ")==false
-
-@test isnumber("23435")==true
-@test isdigit("23435")==true
-@test isalnum("23435")==true
-@test isalpha("23435")==false
-@test iscntrl( string(Char(0x0080))) == true
-@test ispunct( "‡؟჻") ==true
-
-@test isxdigit('0') == true
-@test isxdigit("0") == true
-@test isxdigit("a") == true
-@test isxdigit("g") == false
-
-# Issue #11140
-@test isvalid(utf32("a")) == true
-@test isvalid(utf32("\x00")) == true
-@test isvalid(UTF32String, UInt32[0xd800,0]) == false
-
-# Issue #11241
-
-@test isvalid(ASCIIString, "is_valid_ascii") == true
-@test isvalid(ASCIIString, "Σ_not_valid_ascii") == false
-
-# test all edge conditions
-for (val, pass) in (
-        (0, true), (0xd7ff, true),
-        (0xd800, false), (0xdfff, false),
-        (0xe000, true), (0xffff, true),
-        (0x10000, true), (0x10ffff, true),
-        (0x110000, false)
-    )
-    @test isvalid(Char, val) == pass
-end
-for (val, pass) in (
-        (b"\x00", true),
-        (b"\x7f", true),
-        (b"\x80", false),
-        (b"\xbf", false),
-        (b"\xc0", false),
-        (b"\xff", false),
-        (b"\xc0\x80", false),
-        (b"\xc1\x80", false),
-        (b"\xc2\x80", true),
-        (b"\xc2\xc0", false),
-        (b"\xed\x9f\xbf", true),
-        (b"\xed\xa0\x80", false),
-        (b"\xed\xbf\xbf", false),
-        (b"\xee\x80\x80", true),
-        (b"\xef\xbf\xbf", true),
-        (b"\xf0\x90\x80\x80", true),
-        (b"\xf4\x8f\xbf\xbf", true),
-        (b"\xf4\x90\x80\x80", false),
-        (b"\xf5\x80\x80\x80", false),
-        (b"\ud800\udc00", false),
-        (b"\udbff\udfff", false),
-        (b"\ud800\u0100", false),
-        (b"\udc00\u0100", false),
-        (b"\udc00\ud800", false)
-        )
-    @test isvalid(UTF8String, val) == pass
-end
-for (val, pass) in (
-        (UInt16[0x0000], true),
-        (UInt16[0xd7ff,0], true),
-        (UInt16[0xd800,0], false),
-        (UInt16[0xdfff,0], false),
-        (UInt16[0xe000,0], true),
-        (UInt16[0xffff,0], true),
-        (UInt16[0xd800,0xdc00,0], true),
-        (UInt16[0xdbff,0xdfff,0], true),
-        (UInt16[0xd800,0x0100,0], false),
-        (UInt16[0xdc00,0x0100,0], false),
-        (UInt16[0xdc00,0xd800,0], false)
-        )
-    @test isvalid(UTF16String, val) == pass
-end
-for (val, pass) in (
-        (UInt32[0x0000], true),
-        (UInt32[0xd7ff,0], true),
-        (UInt32[0xd800,0], false),
-        (UInt32[0xdfff,0], false),
-        (UInt32[0xe000,0], true),
-        (UInt32[0xffff,0], true),
-        (UInt32[0x100000,0], true),
-        (UInt32[0x10ffff,0], true),
-        (UInt32[0x110000,0], false),
-        )
-    @test isvalid(UTF32String, val) == pass
-end
-
-# Issue #11203
-@test isvalid(ASCIIString,UInt8[]) == true
-@test isvalid(UTF8String, UInt8[]) == true
-@test isvalid(UTF16String,UInt16[]) == true
-@test isvalid(UTF32String,UInt32[]) == true
-
-# Check UTF-8 characters
-# Check ASCII range (true),
-# then single continuation bytes and lead bytes with no following continuation bytes (false)
-for (rng,flg) in ((0:0x7f, true), (0x80:0xff, false))
-    for byt in rng
-        @test isvalid(UTF8String, UInt8[byt]) == flg
-    end
-end
-# Check overlong lead bytes for 2-character sequences (false)
-for byt = 0xc0:0xc1
-    @test isvalid(UTF8String, UInt8[byt,0x80]) == false
-end
-# Check valid lead-in to two-byte sequences (true)
-for byt = 0xc2:0xdf
-    for (rng,flg) in ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
-        for cont in rng
-            @test isvalid(UTF8String, UInt8[byt, cont]) == flg
-        end
-    end
-end
-# Check three-byte sequences
-for r1 in (0xe0:0xec, 0xee:0xef)
-    for byt = r1
-        # Check for short sequence
-        @test isvalid(UTF8String, UInt8[byt]) == false
-        for (rng,flg) in ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
-            for cont in rng
-                @test isvalid(UTF8String, UInt8[byt, cont]) == false
-                @test isvalid(UTF8String, UInt8[byt, cont, 0x80]) == flg
-            end
-        end
-    end
-end
-# Check hangul characters (0xd000-0xd7ff) hangul
-# Check for short sequence, or start of surrogate pair
-for (rng,flg) in ((0x00:0x7f, false), (0x80:0x9f, true), (0xa0:0xff, false))
-    for cont in rng
-        @test isvalid(UTF8String, UInt8[0xed, cont]) == false
-        @test isvalid(UTF8String, UInt8[0xed, cont, 0x80]) == flg
-    end
-end
-# Check valid four-byte sequences
-for byt = 0xf0:0xf4
-    if (byt == 0xf0)
-        r0 = ((0x00:0x8f, false), (0x90:0xbf, true), (0xc0:0xff, false))
-    elseif byt == 0xf4
-        r0 = ((0x00:0x7f, false), (0x80:0x8f, true), (0x90:0xff, false))
-    else
-        r0 = ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
-    end
-    for (rng,flg) in r0
-        for cont in rng
-            @test isvalid(UTF8String, UInt8[byt, cont]) == false
-            @test isvalid(UTF8String, UInt8[byt, cont, 0x80]) == false
-            @test isvalid(UTF8String, UInt8[byt, cont, 0x80, 0x80]) == flg
-        end
-    end
-end
-# Check five-byte sequences, should be invalid
-for byt = 0xf8:0xfb
-    @test isvalid(UTF8String, UInt8[byt, 0x80, 0x80, 0x80, 0x80]) == false
-end
-# Check six-byte sequences, should be invalid
-for byt = 0xfc:0xfd
-    @test isvalid(UTF8String, UInt8[byt, 0x80, 0x80, 0x80, 0x80, 0x80]) == false
-end
-# Check seven-byte sequences, should be invalid
-@test isvalid(UTF8String, UInt8[0xfe, 0x80, 0x80, 0x80, 0x80, 0x80]) == false
-
-# 11482
-
-# isvalid
-let s = "abcdef", u8 = "abcdef\uff", u16 = utf16(u8), u32 = utf32(u8),
-    bad32 = utf32(UInt32[65,0x110000]), badch = Char[0x110000][1]
-
-    @test !isvalid(bad32)
-    @test !isvalid(badch)
-    @test isvalid(s)
-    @test isvalid(u8)
-    @test isvalid(u16)
-    @test isvalid(u32)
-    @test isvalid(ASCIIString, s)
-    @test isvalid(UTF8String,  u8)
-    @test isvalid(UTF16String, u16)
-    @test isvalid(UTF32String, u32)
-end
-
-# This caused JuliaLang/JSON.jl#82
-@test first('\x00':'\x7f') === '\x00'
-@test last('\x00':'\x7f') === '\x7f'
-
-# Tests of join()
-@test join([]) == ""
-@test join(["a"],"?") == "a"
-@test join("HELLO",'-') == "H-E-L-L-O"
-@test join(1:5, ", ", " and ") == "1, 2, 3, 4 and 5"
-@test join(["apples", "bananas", "pineapples"], ", ", " and ") == "apples, bananas and pineapples"
-
-# issue #9178 `join` calls `done()` twice on the iterables
-type i9178
-    nnext::Int64
-    ndone::Int64
-end
-Base.start(jt::i9178) = (jt.nnext=0 ; jt.ndone=0 ; 0)
-Base.done(jt::i9178, n) = (jt.ndone += 1 ; n > 3)
-Base.next(jt::i9178, n) = (jt.nnext += 1 ; ("$(jt.nnext),$(jt.ndone)", n+1))
-@test join(i9178(0,0), ";") == "1,1;2,2;3,3;4,4"
-
-# make sure substrings handle last code unit even if not start of codepoint
-let s = "x\u0302"
-    @test s[1:3] == s
-end
-
-# reverseind
-for T in (ASCIIString, UTF8String, UTF16String, UTF32String)
-    for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
-        for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
-            for c in ('X', 'δ', '\U0001d6a5')
-                T != ASCIIString || (isascii(prefix) && isascii(suffix) && isascii(c)) || continue
-                s = convert(T, string(prefix, c, suffix))
-                ri = search(reverse(s), c)
-                @test reverse(s) == RevString(s)
-                @test c == s[reverseind(s, ri)] == reverse(s)[ri]
-                s = RevString(s)
-                ri = search(reverse(s), c)
-                @test c == s[reverseind(s, ri)] == reverse(s)[ri]
-                s = convert(T, string(prefix, prefix, c, suffix, suffix))
-                pre = convert(T, prefix)
-                sb = SubString(s, nextind(pre, endof(pre)), endof(convert(T, string(prefix, prefix, c, suffix))))
-                ri = search(reverse(sb), c)
-                @test c == sb[reverseind(sb, ri)] == reverse(sb)[ri]
-            end
-        end
-    end
-end
-
-# issue #9781
-# float(SubString) wasn't tolerant of trailing whitespace, which was different
-# to "normal" strings. This also checks we aren't being too tolerant and allowing
-# any arbitrary trailing characters.
-@test parse(Float64,"1\n") == 1.0
-@test [parse(Float64,x) for x in split("0,1\n",",")][2] == 1.0
-@test_throws ArgumentError parse(Float64,split("0,1 X\n",",")[2])
-@test parse(Float32,"1\n") == 1.0
-@test [parse(Float32,x) for x in split("0,1\n",",")][2] == 1.0
-@test_throws ArgumentError parse(Float32,split("0,1 X\n",",")[2])
-
-#more ascii tests
-@test convert(ASCIIString, UInt8[32,107,75], "*") == " kK"
-@test convert(ASCIIString, UInt8[132,107,75], "*") == "*kK"
-@test convert(ASCIIString, UInt8[], "*") == ""
-@test convert(ASCIIString, UInt8[255], "*") == "*"
-
-@test ucfirst("Hola")=="Hola"
-@test ucfirst("hola")=="Hola"
-@test ucfirst("")==""
-@test ucfirst("*")=="*"
-
-@test lcfirst("Hola")=="hola"
-@test lcfirst("hola")=="hola"
-@test lcfirst("")==""
-@test lcfirst("*")=="*"
-
-#more UTF8String tests
-@test convert(UTF8String, UInt8[32,107,75], "*") == " kK"
-@test convert(UTF8String, UInt8[132,107,75], "*") == "*kK"
-@test convert(UTF8String, UInt8[32,107,75], "αβ") == " kK"
-@test convert(UTF8String, UInt8[132,107,75], "αβ") == "αβkK"
-@test convert(UTF8String, UInt8[], "*") == ""
-@test convert(UTF8String, UInt8[255], "αβ") == "αβ"
-
-# test AbstractString functions at beginning of string.jl
-immutable tstStringType <: AbstractString
-    data::Array{UInt8,1}
-end
-tstr = tstStringType("12");
-@test_throws ErrorException endof(tstr)
-@test_throws ErrorException next(tstr, Bool(1))
-
-gstr = GenericString("12");
-@test typeof(string(gstr))==GenericString
-@test bytestring()==""
-
-@test convert(Array{UInt8}, gstr) ==[49;50]
-@test convert(Array{Char,1}, gstr) ==['1';'2']
-@test convert(Symbol, gstr)==symbol("12")
-
-@test getindex(gstr, Bool(1))=='1'
-@test getindex(gstr,Bool(1):Bool(1))=="1"
-@test getindex(gstr,AbstractVector([Bool(1):Bool(1);]))=="1"
-
-@test symbol(gstr)==symbol("12")
-
-@test_throws ErrorException sizeof(gstr)
-
-@test length(GenericString(""))==0
-
-@test getindex(gstr,AbstractVector([Bool(1):Bool(1);]))=="1"
-
-@test nextind(AbstractArray([Bool(1):Bool(1);]),1)==2
-
-@test ind2chr(gstr,2)==2
-
-# issue #10307
-@test typeof(map(Int16,String[])) == Vector{Int16}
-
-for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128]
-    for i in [typemax(T), typemin(T)]
-        s = "$i"
-        @test get(tryparse(T, s)) == i
-    end
-end
-
-for T in [Int8, Int16, Int32, Int64, Int128]
-    for i in [typemax(T), typemin(T)]
-        f = "$(i)0"
-        @test isnull(tryparse(T, f))
-    end
-end
-
-# issue #11142
-s = "abcdefghij"
-sp = pointer(s)
-@test ascii(sp) == s
-@test ascii(sp,5) == "abcde"
-@test typeof(ascii(sp)) == ASCIIString
-@test typeof(utf8(sp)) == UTF8String
-s = "abcde\uff\u2000\U1f596"
-sp = pointer(s)
-@test utf8(sp) == s
-@test utf8(sp,5) == "abcde"
-@test typeof(utf8(sp)) == UTF8String
-
-@test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890)
-@test isnull(tryparse(BigInt, "1234567890-"))
-
-@test get(tryparse(Float64, "64")) == 64.0
-@test isnull(tryparse(Float64, "64o"))
-@test get(tryparse(Float32, "32")) == 32.0f0
-@test isnull(tryparse(Float32, "32o"))
-
-# issue #10994: handle embedded NUL chars for string parsing
-for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128]
-    @test_throws ArgumentError parse(T, "1\0")
-end
-for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, Float64, Float32]
-    @test isnull(tryparse(T, "1\0"))
-end
-let s = normalize_string("tést",:NFKC)
-    @test bytestring(Base.unsafe_convert(Cstring, s)) == s
-    @test bytestring(convert(Cstring, symbol(s))) == s
-    @test wstring(Base.unsafe_convert(Cwstring, wstring(s))) == s
-end
-let s = "ba\0d"
-    @test_throws ArgumentError Base.unsafe_convert(Cstring, s)
-    @test_throws ArgumentError Base.unsafe_convert(Cwstring, wstring(s))
-end
-
-# issue # 11389: Vector{UInt32} was copied with UTF32String, unlike Vector{Char}
-a = UInt32[48,0]
-b = UTF32String(a)
-@test b=="0"
-a[1] = 65
-@test b=="A"
-c = Char['0','\0']
-d = UTF32String(c)
-@test d=="0"
-c[1] = 'A'
-@test d=="A"
-
-# Issue #11575
-# Test invalid sequences
-
-byt = 0x0 # Needs to be defined outside the try block!
-try
-    # Continuation byte not after lead
-    for byt in 0x80:0xbf
-        @test_throws UnicodeError Base.checkstring(UInt8[byt])
-    end
-
-    # Test lead bytes
-    for byt in 0xc0:0xff
-        # Single lead byte at end of string
-        @test_throws UnicodeError Base.checkstring(UInt8[byt])
-        # Lead followed by non-continuation character < 0x80
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0])
-        # Lead followed by non-continuation character > 0xbf
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0xc0])
-    end
-
-    # Test overlong 2-byte
-    for byt in 0x81:0xbf
-        @test_throws UnicodeError Base.checkstring(UInt8[0xc0,byt])
-    end
-    for byt in 0x80:0xbf
-        @test_throws UnicodeError Base.checkstring(UInt8[0xc1,byt])
-    end
-
-    # Test overlong 3-byte
-    for byt in 0x80:0x9f
-        @test_throws UnicodeError Base.checkstring(UInt8[0xe0,byt,0x80])
-    end
-
-    # Test overlong 4-byte
-    for byt in 0x80:0x8f
-        @test_throws UnicodeError Base.checkstring(UInt8[0xef,byt,0x80,0x80])
-    end
-
-    # Test 4-byte > 0x10ffff
-    for byt in 0x90:0xbf
-        @test_throws UnicodeError Base.checkstring(UInt8[0xf4,byt,0x80,0x80])
-    end
-    for byt in 0xf5:0xf7
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0x80])
-    end
-
-    # Test 5-byte
-    for byt in 0xf8:0xfb
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0x80,0x80])
-    end
-
-    # Test 6-byte
-    for byt in 0xfc:0xfd
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0x80,0x80,0x80])
-    end
-
-    # Test 7-byte
-    @test_throws UnicodeError Base.checkstring(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80])
-
-    # Three and above byte sequences
-    for byt in 0xe0:0xef
-        # Lead followed by only 1 continuation byte
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80])
-        # Lead ended by non-continuation character < 0x80
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0])
-        # Lead ended by non-continuation character > 0xbf
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0xc0])
-    end
-
-    # 3-byte encoded surrogate character(s)
-    # Single surrogate
-    @test_throws UnicodeError Base.checkstring(UInt8[0xed,0xa0,0x80])
-    # Not followed by surrogate
-    @test_throws UnicodeError Base.checkstring(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80])
-    # Trailing surrogate first
-    @test_throws UnicodeError Base.checkstring(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80])
-    # Followed by lead surrogate
-    @test_throws UnicodeError Base.checkstring(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80])
-
-    # Four byte sequences
-    for byt in 0xf0:0xf4
-        # Lead followed by only 2 continuation bytes
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80])
-        # Lead followed by non-continuation character < 0x80
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0])
-        # Lead followed by non-continuation character > 0xbf
-        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0xc0])
-    end
-catch exp;
-    println("Error testing checkstring: $byt, $exp")
-    throw(exp)
-end
-
-# Surrogates
-@test_throws UnicodeError Base.checkstring(UInt16[0xd800])
-@test_throws UnicodeError Base.checkstring(UInt16[0xdc00])
-@test_throws UnicodeError Base.checkstring(UInt16[0xdc00,0xd800])
-
-# Surrogates in UTF-32
-@test_throws UnicodeError Base.checkstring(UInt32[0xd800])
-@test_throws UnicodeError Base.checkstring(UInt32[0xdc00])
-@test_throws UnicodeError Base.checkstring(UInt32[0xdc00,0xd800])
-
-# Characters > 0x10ffff
-@test_throws UnicodeError Base.checkstring(UInt32[0x110000])
-
-# Test valid sequences
-for (seq, res) in (
-    (UInt8[0x0],                (1,0,0,0,0)),   # Nul byte, beginning of ASCII range
-    (UInt8[0x7f],               (1,0,0,0,0)),   # End of ASCII range
-    (UInt8[0xc0,0x80],          (1,1,0,0,0)),   # Long encoded Nul byte (Modified UTF-8, Java)
-    (UInt8[0xc2,0x80],          (1,2,0,0,1)),   # \u80, beginning of Latin1 range
-    (UInt8[0xc3,0xbf],          (1,2,0,0,1)),   # \uff, end of Latin1 range
-    (UInt8[0xc4,0x80],          (1,4,0,0,1)),   # \u100, beginning of non-Latin1 2-byte range
-    (UInt8[0xdf,0xbf],          (1,4,0,0,1)),   # \u7ff, end of non-Latin1 2-byte range
-    (UInt8[0xe0,0xa0,0x80],     (1,8,0,1,0)),   # \u800, beginning of 3-byte range
-    (UInt8[0xed,0x9f,0xbf],     (1,8,0,1,0)),   # \ud7ff, end of first part of 3-byte range
-    (UInt8[0xee,0x80,0x80],     (1,8,0,1,0)),   # \ue000, beginning of second part of 3-byte range
-    (UInt8[0xef,0xbf,0xbf],     (1,8,0,1,0)),   # \uffff, end of 3-byte range
-    (UInt8[0xf0,0x90,0x80,0x80],(1,16,1,0,0)),  # \U10000, beginning of 4-byte range
-    (UInt8[0xf4,0x8f,0xbf,0xbf],(1,16,1,0,0)),  # \U10ffff, end of 4-byte range
-    (UInt8[0xed,0xa0,0x80,0xed,0xb0,0x80], (1,0x30,1,0,0)), # Overlong \U10000, (CESU-8)
-    (UInt8[0xed,0xaf,0xbf,0xed,0xbf,0xbf], (1,0x30,1,0,0)), # Overlong \U10ffff, (CESU-8)
-    (UInt16[0x0000],            (1,0,0,0,0)),   # Nul byte, beginning of ASCII range
-    (UInt16[0x007f],            (1,0,0,0,0)),   # End of ASCII range
-    (UInt16[0x0080],            (1,2,0,0,1)),   # Beginning of Latin1 range
-    (UInt16[0x00ff],            (1,2,0,0,1)),   # End of Latin1 range
-    (UInt16[0x0100],            (1,4,0,0,1)),   # Beginning of non-Latin1 2-byte range
-    (UInt16[0x07ff],            (1,4,0,0,1)),   # End of non-Latin1 2-byte range
-    (UInt16[0x0800],            (1,8,0,1,0)),   # Beginning of 3-byte range
-    (UInt16[0xd7ff],            (1,8,0,1,0)),   # End of first part of 3-byte range
-    (UInt16[0xe000],            (1,8,0,1,0)),   # Beginning of second part of 3-byte range
-    (UInt16[0xffff],            (1,8,0,1,0)),   # End of 3-byte range
-    (UInt16[0xd800,0xdc00],     (1,16,1,0,0)),  # \U10000, beginning of 4-byte range
-    (UInt16[0xdbff,0xdfff],     (1,16,1,0,0)),  # \U10ffff, end of 4-byte range
-    (UInt32[0x0000],            (1,0,0,0,0)),   # Nul byte, beginning of ASCII range
-    (UInt32[0x007f],            (1,0,0,0,0)),   # End of ASCII range
-    (UInt32[0x0080],            (1,2,0,0,1)),   # Beginning of Latin1 range
-    (UInt32[0x00ff],            (1,2,0,0,1)),   # End of Latin1 range
-    (UInt32[0x0100],            (1,4,0,0,1)),   # Beginning of non-Latin1 2-byte range
-    (UInt32[0x07ff],            (1,4,0,0,1)),   # End of non-Latin1 2-byte range
-    (UInt32[0x0800],            (1,8,0,1,0)),   # Beginning of 3-byte range
-    (UInt32[0xd7ff],            (1,8,0,1,0)),   # End of first part of 3-byte range
-    (UInt32[0xe000],            (1,8,0,1,0)),   # Beginning of second part of 3-byte range
-    (UInt32[0xffff],            (1,8,0,1,0)),   # End of 3-byte range
-    (UInt32[0x10000],           (1,16,1,0,0)),  # \U10000, beginning of 4-byte range
-    (UInt32[0x10ffff],          (1,16,1,0,0)),  # \U10ffff, end of 4-byte range
-    (UInt32[0xd800,0xdc00],     (1,0x30,1,0,0)),# Overlong \U10000, (CESU-8)
-    (UInt32[0xdbff,0xdfff],     (1,0x30,1,0,0)))# Overlong \U10ffff, (CESU-8)
-    @test Base.checkstring(seq) == res
-end
-
-# Test bounds checking
-@test_throws BoundsError Base.checkstring(b"abcdef", -10)
-@test_throws BoundsError Base.checkstring(b"abcdef", 0)
-@test_throws BoundsError Base.checkstring(b"abcdef", 7)
-@test_throws BoundsError Base.checkstring(b"abcdef", 3, -10)
-@test_throws BoundsError Base.checkstring(b"abcdef", 3, 0)
-@test_throws BoundsError Base.checkstring(b"abcdef", 3, 7)
-@test_throws ArgumentError Base.checkstring(b"abcdef", 3, 1)
-
-# iteration
-@test [c for c in "ḟøøƀäṙ"] == ['ḟ', 'ø', 'ø', 'ƀ', 'ä', 'ṙ']
-@test [i for i in eachindex("ḟøøƀäṙ")] == [1, 4, 6, 8, 10, 12]
-@test [x for x in enumerate("ḟøøƀäṙ")] == [(1, 'ḟ'), (2, 'ø'), (3, 'ø'), (4, 'ƀ'), (5, 'ä'), (6, 'ṙ')]
-
-# issue # 11464: uppercase/lowercase of UTF16String becomes a UTF8String
-str = "abcdef\uff\uffff\u10ffffABCDEF"
-@test typeof(uppercase("abcdef")) == ASCIIString
-@test typeof(uppercase(utf8(str))) == UTF8String
-@test typeof(uppercase(utf16(str))) == UTF16String
-@test typeof(uppercase(utf32(str))) == UTF32String
-@test typeof(lowercase("ABCDEF")) == ASCIIString
-@test typeof(lowercase(utf8(str))) == UTF8String
-@test typeof(lowercase(utf16(str))) == UTF16String
-@test typeof(lowercase(utf32(str))) == UTF32String
-
-foomap(ch) = (ch > 65)
-foobar(ch) = Char(0xd800)
-foobaz(ch) = Char(0x200000)
-@test_throws UnicodeError map(foomap, utf16(str))
-@test_throws UnicodeError map(foobar, utf16(str))
-@test_throws UnicodeError map(foobaz, utf16(str))
-
-# issue #11551 (#11004,#10959)
-function tstcvt(strUTF8::UTF8String, strUTF16::UTF16String)
-    @test utf16(strUTF8) == strUTF16
-    @test utf8(strUTF16) == strUTF8
-end
-
-# Create some ASCII, UTF8 and UTF16
-strAscii = "abcdefgh"
-strA_UTF8 = ("abcdefgh\uff")[1:8]
-strL_UTF8 = "abcdef\uff\uff"
-str2_UTF8 = "abcd\uff\uff\u7ff\u7ff"
-str3_UTF8 = "abcd\uff\uff\u7fff\u7fff"
-str4_UTF8 = "abcd\uff\u7ff\u7fff\U7ffff"
-strS_UTF8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xed\xa0\x80\xed\xb0\x80")
-strC_UTF8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\U10000")
-strZ_UTF8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xc0\x80")
-strz_UTF8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\0")
-
-strA_UTF16 = utf16(strA_UTF8)
-strL_UTF16 = utf16(strL_UTF8)
-str2_UTF16 = utf16(str2_UTF8)
-str3_UTF16 = utf16(str3_UTF8)
-str4_UTF16 = utf16(str4_UTF8)
-strS_UTF16 = utf16(strS_UTF8)
-
-@test utf8(strAscii) == strAscii
-@test utf16(strAscii) == strAscii
-
-tstcvt(strA_UTF8,strA_UTF16)
-tstcvt(strL_UTF8,strL_UTF16)
-tstcvt(str2_UTF8,str2_UTF16)
-tstcvt(str3_UTF8,str3_UTF16)
-tstcvt(str4_UTF8,str4_UTF16)
-
-# Test converting surrogate pairs
-@test utf16(strS_UTF8) == strC_UTF8
-@test utf8(strS_UTF16) == strC_UTF8
-
-# Test converting overlong \0
-# @test utf8(strZ_UTF8)  == strz_UTF8   # currently broken! (in utf8.jl)
-@test utf16(strZ_UTF8) == strz_UTF8
-
-# Test invalid sequences
-
-byt = 0x0
-for T in (UTF16String,) # UTF32String
-    try
-    # Continuation byte not after lead
-    for byt in 0x80:0xbf
-        @test_throws UnicodeError convert(T,  UTF8String(UInt8[byt]))
-    end
-
-    # Test lead bytes
-    for byt in 0xc0:0xff
-        # Single lead byte at end of string
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt]))
-        # Lead followed by non-continuation character < 0x80
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0]))
-        # Lead followed by non-continuation character > 0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0xc0]))
-    end
-
-    # Test overlong 2-byte
-    for byt in 0x81:0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xc0,byt]))
-    end
-    for byt in 0x80:0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xc1,byt]))
-    end
-
-    # Test overlong 3-byte
-    for byt in 0x80:0x9f
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xe0,byt,0x80]))
-    end
-
-    # Test overlong 4-byte
-    for byt in 0x80:0x8f
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xef,byt,0x80,0x80]))
-    end
-
-    # Test 4-byte > 0x10ffff
-    for byt in 0x90:0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xf4,byt,0x80,0x80]))
-    end
-    for byt in 0xf5:0xf7
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80]))
-    end
-
-    # Test 5-byte
-    for byt in 0xf8:0xfb
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80]))
-    end
-
-    # Test 6-byte
-    for byt in 0xfc:0xfd
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80,0x80]))
-    end
-
-    # Test 7-byte
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))
-
-    # Three and above byte sequences
-    for byt in 0xe0:0xef
-        # Lead followed by only 1 continuation byte
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80]))
-        # Lead ended by non-continuation character < 0x80
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0]))
-        # Lead ended by non-continuation character > 0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0xc0]))
-    end
-
-    # 3-byte encoded surrogate character(s)
-    # Single surrogate
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xed,0xa0,0x80]))
-    # Not followed by surrogate
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))
-    # Trailing surrogate first
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))
-    # Followed by lead surrogate
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))
-
-    # Four byte sequences
-    for byt in 0xf0:0xf4
-        # Lead followed by only 2 continuation bytes
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80]))
-        # Lead followed by non-continuation character < 0x80
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0]))
-        # Lead followed by non-continuation character > 0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0xc0]))
-    end
-    catch exp ;
-        println("Error checking $T: $byt")
-        throw(exp)
-    end
-end
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
new file mode 100644
index 0000000000000..25b879c34e27f
--- /dev/null
+++ b/test/strings/basic.jl
@@ -0,0 +1,465 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# {starts,ends}with
+@test startswith("abcd", 'a')
+@test startswith("abcd", "a")
+@test startswith("abcd", "ab")
+@test !startswith("ab", "abcd")
+@test !startswith("abcd", "bc")
+@test endswith("abcd", 'd')
+@test endswith("abcd", "d")
+@test endswith("abcd", "cd")
+@test !endswith("abcd", "dc")
+@test !endswith("cd", "abcd")
+
+@test filter(x -> x ∈ ['f', 'o'], "foobar") == "foo"
+
+# string iteration, and issue #1454
+str = "é"
+str_a = vcat(str...)
+@test length(str_a)==1
+@test str_a[1] == str[1]
+
+str = "s\u2200"
+@test str[1:end] == str
+
+# sizeof
+@test sizeof("abc") == 3
+@test sizeof("\u2222") == 3
+
+# issue #3597
+@test string(utf32(['T', 'e', 's', 't'])[1:1], "X") == "TX"
+
+for T = (UInt8,Int8,UInt16,Int16,UInt32,Int32,UInt64,Int64,UInt128,Int128,BigInt),
+    b = 2:62, _ = 1:10
+    n = T != BigInt ? rand(T) : BigInt(rand(Int128))
+    @test parse(T,base(b,n),b) == n
+end
+
+# issue #6027
+let
+    # make symbol with invalid char
+    sym = symbol(Char(0xdcdb))
+    @test string(sym) == string(Char(0xdcdb))
+    @test expand(sym) === sym
+    res = string(parse(string(Char(0xdcdb)," = 1"),1,raise=false)[1])
+    @test res == """\$(Expr(:error, "invalid character \\\"\\udcdb\\\"\"))"""
+end
+
+@test symbol("asdf") === :asdf
+@test symbol(:abc,"def",'g',"hi",0) === :abcdefghi0
+@test :a < :b
+@test startswith(string(gensym("asdf")),"##asdf#")
+@test gensym("asdf") != gensym("asdf")
+@test gensym() != gensym()
+@test startswith(string(gensym()),"##")
+@test_throws ArgumentError symbol("ab\0")
+@test_throws ArgumentError gensym("ab\0")
+
+# issue #6949
+let f =IOBuffer(),
+    x = split("1 2 3")
+    @test write(f, x) == 3
+    @test takebuf_string(f) == "123"
+    @test invoke(write, Tuple{IO, AbstractArray}, f, x) == 3
+    @test takebuf_string(f) == "123"
+end
+
+# issue #7248
+@test_throws BoundsError ind2chr("hello", -1)
+@test_throws BoundsError chr2ind("hello", -1)
+@test_throws BoundsError ind2chr("hellø", -1)
+@test_throws BoundsError chr2ind("hellø", -1)
+@test_throws BoundsError ind2chr("hello", 10)
+@test_throws BoundsError chr2ind("hello", 10)
+@test_throws BoundsError ind2chr("hellø", 10)
+@test_throws BoundsError chr2ind("hellø", 10)
+@test_throws BoundsError checkbounds("hello", 0)
+@test_throws BoundsError checkbounds("hello", 6)
+@test_throws BoundsError checkbounds("hello", 0:3)
+@test_throws BoundsError checkbounds("hello", 4:6)
+@test_throws BoundsError checkbounds("hello", [0:3;])
+@test_throws BoundsError checkbounds("hello", [4:6;])
+@test checkbounds("hello", 2)
+@test checkbounds("hello", 1:5)
+@test checkbounds("hello", [1:5;])
+
+#=
+# issue #7764
+let
+    srep = repeat("Σβ",2)
+    s="Σβ"
+    ss=SubString(s,1,endof(s))
+
+    @test repeat(ss,2) == "ΣβΣβ"
+
+    @test endof(srep) == 7
+
+    @test next(srep, 3) == ('β',5)
+    @test next(srep, 7) == ('β',9)
+
+    @test srep[7] == 'β'
+    @test_throws BoundsError srep[8]
+end
+=#
+
+# This caused JuliaLang/JSON.jl#82
+@test first('\x00':'\x7f') === '\x00'
+@test last('\x00':'\x7f') === '\x7f'
+
+# make sure substrings handle last code unit even if not start of codepoint
+let s = "x\u0302"
+    @test s[1:3] == s
+end
+
+# issue #9781
+# float(SubString) wasn't tolerant of trailing whitespace, which was different
+# to "normal" strings. This also checks we aren't being too tolerant and allowing
+# any arbitrary trailing characters.
+@test parse(Float64,"1\n") == 1.0
+@test [parse(Float64,x) for x in split("0,1\n",",")][2] == 1.0
+@test_throws ArgumentError parse(Float64,split("0,1 X\n",",")[2])
+@test parse(Float32,"1\n") == 1.0
+@test [parse(Float32,x) for x in split("0,1\n",",")][2] == 1.0
+@test_throws ArgumentError parse(Float32,split("0,1 X\n",",")[2])
+
+#more ascii tests
+@test convert(ASCIIString, UInt8[32,107,75], "*") == " kK"
+@test convert(ASCIIString, UInt8[132,107,75], "*") == "*kK"
+@test convert(ASCIIString, UInt8[], "*") == ""
+@test convert(ASCIIString, UInt8[255], "*") == "*"
+
+@test ucfirst("Hola")=="Hola"
+@test ucfirst("hola")=="Hola"
+@test ucfirst("")==""
+@test ucfirst("*")=="*"
+
+@test lcfirst("Hola")=="hola"
+@test lcfirst("hola")=="hola"
+@test lcfirst("")==""
+@test lcfirst("*")=="*"
+
+#more UTF8String tests
+@test convert(UTF8String, UInt8[32,107,75], "*") == " kK"
+@test convert(UTF8String, UInt8[132,107,75], "*") == "*kK"
+@test convert(UTF8String, UInt8[32,107,75], "αβ") == " kK"
+@test convert(UTF8String, UInt8[132,107,75], "αβ") == "αβkK"
+@test convert(UTF8String, UInt8[], "*") == ""
+@test convert(UTF8String, UInt8[255], "αβ") == "αβ"
+
+# test AbstractString functions at beginning of string.jl
+immutable tstStringType <: AbstractString
+    data::Array{UInt8,1}
+end
+tstr = tstStringType("12");
+@test_throws ErrorException endof(tstr)
+@test_throws ErrorException next(tstr, Bool(1))
+
+## generic string uses only endof and next ##
+
+immutable GenericString <: AbstractString
+    string::AbstractString
+end
+
+Base.endof(s::GenericString) = endof(s.string)
+Base.next(s::GenericString, i::Int) = next(s.string, i)
+
+gstr = GenericString("12");
+@test typeof(string(gstr))==GenericString
+@test bytestring()==""
+
+@test convert(Array{UInt8}, gstr) ==[49;50]
+@test convert(Array{Char,1}, gstr) ==['1';'2']
+@test convert(Symbol, gstr)==symbol("12")
+
+@test getindex(gstr, Bool(1))=='1'
+@test getindex(gstr,Bool(1):Bool(1))=="1"
+@test getindex(gstr,AbstractVector([Bool(1):Bool(1);]))=="1"
+
+@test symbol(gstr)==symbol("12")
+
+@test_throws ErrorException sizeof(gstr)
+
+@test length(GenericString(""))==0
+
+@test getindex(gstr,AbstractVector([Bool(1):Bool(1);]))=="1"
+
+@test nextind(AbstractArray([Bool(1):Bool(1);]),1)==2
+
+@test ind2chr(gstr,2)==2
+
+# issue #10307
+@test typeof(map(Int16,String[])) == Vector{Int16}
+
+for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128]
+    for i in [typemax(T), typemin(T)]
+        s = "$i"
+        @test get(tryparse(T, s)) == i
+    end
+end
+
+for T in [Int8, Int16, Int32, Int64, Int128]
+    for i in [typemax(T), typemin(T)]
+        f = "$(i)0"
+        @test isnull(tryparse(T, f))
+    end
+end
+
+# issue #11142
+s = "abcdefghij"
+sp = pointer(s)
+@test ascii(sp) == s
+@test ascii(sp,5) == "abcde"
+@test typeof(ascii(sp)) == ASCIIString
+@test typeof(utf8(sp)) == UTF8String
+s = "abcde\uff\u2000\U1f596"
+sp = pointer(s)
+@test utf8(sp) == s
+@test utf8(sp,5) == "abcde"
+@test typeof(utf8(sp)) == UTF8String
+
+@test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890)
+@test isnull(tryparse(BigInt, "1234567890-"))
+
+@test get(tryparse(Float64, "64")) == 64.0
+@test isnull(tryparse(Float64, "64o"))
+@test get(tryparse(Float32, "32")) == 32.0f0
+@test isnull(tryparse(Float32, "32o"))
+
+# issue #10994: handle embedded NUL chars for string parsing
+for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128]
+    @test_throws ArgumentError parse(T, "1\0")
+end
+for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, Float64, Float32]
+    @test isnull(tryparse(T, "1\0"))
+end
+let s = normalize_string("tést",:NFKC)
+    @test bytestring(Base.unsafe_convert(Cstring, s)) == s
+    @test bytestring(convert(Cstring, symbol(s))) == s
+    @test wstring(Base.unsafe_convert(Cwstring, wstring(s))) == s
+end
+let s = "ba\0d"
+    @test_throws ArgumentError Base.unsafe_convert(Cstring, s)
+    @test_throws ArgumentError Base.unsafe_convert(Cwstring, wstring(s))
+end
+
+# issue # 11389: Vector{UInt32} was copied with UTF32String, unlike Vector{Char}
+a = UInt32[48,0]
+b = UTF32String(a)
+@test b=="0"
+a[1] = 65
+@test b=="A"
+c = Char['0','\0']
+d = UTF32String(c)
+@test d=="0"
+c[1] = 'A'
+@test d=="A"
+
+# iteration
+@test [c for c in "ḟøøƀäṙ"] == ['ḟ', 'ø', 'ø', 'ƀ', 'ä', 'ṙ']
+@test [i for i in eachindex("ḟøøƀäṙ")] == [1, 4, 6, 8, 10, 12]
+@test [x for x in enumerate("ḟøøƀäṙ")] == [(1, 'ḟ'), (2, 'ø'), (3, 'ø'), (4, 'ƀ'), (5, 'ä'), (6, 'ṙ')]
+
+# Issue #11140
+@test isvalid(utf32("a")) == true
+@test isvalid(utf32("\x00")) == true
+@test isvalid(UTF32String, UInt32[0xd800,0]) == false
+
+# Issue #11241
+
+@test isvalid(ASCIIString, "is_valid_ascii") == true
+@test isvalid(ASCIIString, "Σ_not_valid_ascii") == false
+
+# test all edge conditions
+for (val, pass) in (
+        (0, true), (0xd7ff, true),
+        (0xd800, false), (0xdfff, false),
+        (0xe000, true), (0xffff, true),
+        (0x10000, true), (0x10ffff, true),
+        (0x110000, false)
+    )
+    @test isvalid(Char, val) == pass
+end
+for (val, pass) in (
+        (b"\x00", true),
+        (b"\x7f", true),
+        (b"\x80", false),
+        (b"\xbf", false),
+        (b"\xc0", false),
+        (b"\xff", false),
+        (b"\xc0\x80", false),
+        (b"\xc1\x80", false),
+        (b"\xc2\x80", true),
+        (b"\xc2\xc0", false),
+        (b"\xed\x9f\xbf", true),
+        (b"\xed\xa0\x80", false),
+        (b"\xed\xbf\xbf", false),
+        (b"\xee\x80\x80", true),
+        (b"\xef\xbf\xbf", true),
+        (b"\xf0\x90\x80\x80", true),
+        (b"\xf4\x8f\xbf\xbf", true),
+        (b"\xf4\x90\x80\x80", false),
+        (b"\xf5\x80\x80\x80", false),
+        (b"\ud800\udc00", false),
+        (b"\udbff\udfff", false),
+        (b"\ud800\u0100", false),
+        (b"\udc00\u0100", false),
+        (b"\udc00\ud800", false)
+        )
+    @test isvalid(UTF8String, val) == pass
+end
+for (val, pass) in (
+        (UInt16[0x0000], true),
+        (UInt16[0xd7ff,0], true),
+        (UInt16[0xd800,0], false),
+        (UInt16[0xdfff,0], false),
+        (UInt16[0xe000,0], true),
+        (UInt16[0xffff,0], true),
+        (UInt16[0xd800,0xdc00,0], true),
+        (UInt16[0xdbff,0xdfff,0], true),
+        (UInt16[0xd800,0x0100,0], false),
+        (UInt16[0xdc00,0x0100,0], false),
+        (UInt16[0xdc00,0xd800,0], false)
+        )
+    @test isvalid(UTF16String, val) == pass
+end
+for (val, pass) in (
+        (UInt32[0x0000], true),
+        (UInt32[0xd7ff,0], true),
+        (UInt32[0xd800,0], false),
+        (UInt32[0xdfff,0], false),
+        (UInt32[0xe000,0], true),
+        (UInt32[0xffff,0], true),
+        (UInt32[0x100000,0], true),
+        (UInt32[0x10ffff,0], true),
+        (UInt32[0x110000,0], false),
+        )
+    @test isvalid(UTF32String, val) == pass
+end
+
+# Issue #11203
+@test isvalid(ASCIIString,UInt8[]) == true
+@test isvalid(UTF8String, UInt8[]) == true
+@test isvalid(UTF16String,UInt16[]) == true
+@test isvalid(UTF32String,UInt32[]) == true
+
+# Check UTF-8 characters
+# Check ASCII range (true),
+# then single continuation bytes and lead bytes with no following continuation bytes (false)
+for (rng,flg) in ((0:0x7f, true), (0x80:0xff, false))
+    for byt in rng
+        @test isvalid(UTF8String, UInt8[byt]) == flg
+    end
+end
+# Check overlong lead bytes for 2-character sequences (false)
+for byt = 0xc0:0xc1
+    @test isvalid(UTF8String, UInt8[byt,0x80]) == false
+end
+# Check valid lead-in to two-byte sequences (true)
+for byt = 0xc2:0xdf
+    for (rng,flg) in ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
+        for cont in rng
+            @test isvalid(UTF8String, UInt8[byt, cont]) == flg
+        end
+    end
+end
+# Check three-byte sequences
+for r1 in (0xe0:0xec, 0xee:0xef)
+    for byt = r1
+        # Check for short sequence
+        @test isvalid(UTF8String, UInt8[byt]) == false
+        for (rng,flg) in ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
+            for cont in rng
+                @test isvalid(UTF8String, UInt8[byt, cont]) == false
+                @test isvalid(UTF8String, UInt8[byt, cont, 0x80]) == flg
+            end
+        end
+    end
+end
+# Check hangul characters (0xd000-0xd7ff) hangul
+# Check for short sequence, or start of surrogate pair
+for (rng,flg) in ((0x00:0x7f, false), (0x80:0x9f, true), (0xa0:0xff, false))
+    for cont in rng
+        @test isvalid(UTF8String, UInt8[0xed, cont]) == false
+        @test isvalid(UTF8String, UInt8[0xed, cont, 0x80]) == flg
+    end
+end
+# Check valid four-byte sequences
+for byt = 0xf0:0xf4
+    if (byt == 0xf0)
+        r0 = ((0x00:0x8f, false), (0x90:0xbf, true), (0xc0:0xff, false))
+    elseif byt == 0xf4
+        r0 = ((0x00:0x7f, false), (0x80:0x8f, true), (0x90:0xff, false))
+    else
+        r0 = ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
+    end
+    for (rng,flg) in r0
+        for cont in rng
+            @test isvalid(UTF8String, UInt8[byt, cont]) == false
+            @test isvalid(UTF8String, UInt8[byt, cont, 0x80]) == false
+            @test isvalid(UTF8String, UInt8[byt, cont, 0x80, 0x80]) == flg
+        end
+    end
+end
+# Check five-byte sequences, should be invalid
+for byt = 0xf8:0xfb
+    @test isvalid(UTF8String, UInt8[byt, 0x80, 0x80, 0x80, 0x80]) == false
+end
+# Check six-byte sequences, should be invalid
+for byt = 0xfc:0xfd
+    @test isvalid(UTF8String, UInt8[byt, 0x80, 0x80, 0x80, 0x80, 0x80]) == false
+end
+# Check seven-byte sequences, should be invalid
+@test isvalid(UTF8String, UInt8[0xfe, 0x80, 0x80, 0x80, 0x80, 0x80]) == false
+
+# 11482
+
+# isvalid
+let s = "abcdef", u8 = "abcdef\uff", u16 = utf16(u8), u32 = utf32(u8),
+    bad32 = utf32(UInt32[65,0x110000]), badch = Char[0x110000][1]
+
+    @test !isvalid(bad32)
+    @test !isvalid(badch)
+    @test isvalid(s)
+    @test isvalid(u8)
+    @test isvalid(u16)
+    @test isvalid(u32)
+    @test isvalid(ASCIIString, s)
+    @test isvalid(UTF8String,  u8)
+    @test isvalid(UTF16String, u16)
+    @test isvalid(UTF32String, u32)
+end
+
+# lower and upper
+@test uppercase("aBc") == "ABC"
+@test uppercase('A') == 'A'
+@test uppercase('a') == 'A'
+@test lowercase("AbC") == "abc"
+@test lowercase('A') == 'a'
+@test lowercase('a') == 'a'
+@test uppercase('α') == '\u0391'
+@test lowercase('Δ') == 'δ'
+@test lowercase('\U118bf') == '\U118df'
+@test uppercase('\U1044d') == '\U10425'
+@test ucfirst("Abc") == "Abc"
+@test ucfirst("abc") == "Abc"
+@test lcfirst("ABC") == "aBC"
+@test lcfirst("aBC") == "aBC"
+
+# issue # 11464: uppercase/lowercase of UTF16String becomes a UTF8String
+str = "abcdef\uff\uffff\u10ffffABCDEF"
+@test typeof(uppercase("abcdef")) == ASCIIString
+@test typeof(uppercase(utf8(str))) == UTF8String
+@test typeof(uppercase(utf16(str))) == UTF16String
+@test typeof(uppercase(utf32(str))) == UTF32String
+@test typeof(lowercase("ABCDEF")) == ASCIIString
+@test typeof(lowercase(utf8(str))) == UTF8String
+@test typeof(lowercase(utf16(str))) == UTF16String
+@test typeof(lowercase(utf32(str))) == UTF32String
+
+foomap(ch) = (ch > 65)
+foobar(ch) = Char(0xd800)
+foobaz(ch) = Char(0x200000)
+@test_throws UnicodeError map(foomap, utf16(str))
+@test_throws UnicodeError map(foobar, utf16(str))
+@test_throws UnicodeError map(foobaz, utf16(str))
diff --git a/test/strings/io.jl b/test/strings/io.jl
new file mode 100644
index 0000000000000..92717a337f815
--- /dev/null
+++ b/test/strings/io.jl
@@ -0,0 +1,225 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# string escaping & unescaping
+cx = Any[
+    0x00000000      '\0'        "\\0"
+    0x00000001      '\x01'      "\\x01"
+    0x00000006      '\x06'      "\\x06"
+    0x00000007      '\a'        "\\a"
+    0x00000008      '\b'        "\\b"
+    0x00000009      '\t'        "\\t"
+    0x0000000a      '\n'        "\\n"
+    0x0000000b      '\v'        "\\v"
+    0x0000000c      '\f'        "\\f"
+    0x0000000d      '\r'        "\\r"
+    0x0000000e      '\x0e'      "\\x0e"
+    0x0000001a      '\x1a'      "\\x1a"
+    0x0000001b      '\e'        "\\e"
+    0x0000001c      '\x1c'      "\\x1c"
+    0x0000001f      '\x1f'      "\\x1f"
+    0x00000020      ' '         " "
+    0x0000002f      '/'         "/"
+    0x00000030      '0'         "0"
+    0x00000039      '9'         "9"
+    0x0000003a      ':'         ":"
+    0x00000040      '@'         "@"
+    0x00000041      'A'         "A"
+    0x0000005a      'Z'         "Z"
+    0x0000005b      '['         "["
+    0x00000060      '`'         "`"
+    0x00000061      'a'         "a"
+    0x0000007a      'z'         "z"
+    0x0000007b      '{'         "{"
+    0x0000007e      '~'         "~"
+    0x0000007f      '\x7f'      "\\x7f"
+    0x000000bf      '\ubf'      "\\ubf"
+    0x000000ff      '\uff'      "\\uff"
+    0x00000100      '\u100'     "\\u100"
+    0x000001ff      '\u1ff'     "\\u1ff"
+    0x00000fff      '\ufff'     "\\ufff"
+    0x00001000      '\u1000'    "\\u1000"
+    0x00001fff      '\u1fff'    "\\u1fff"
+    0x0000ffff      '\uffff'    "\\uffff"
+    0x00010000      '\U10000'   "\\U10000"
+    0x0001ffff      '\U1ffff'   "\\U1ffff"
+    0x0002ffff      '\U2ffff'   "\\U2ffff"
+    0x00030000      '\U30000'   "\\U30000"
+    0x000dffff      '\Udffff'   "\\Udffff"
+    0x000e0000      '\Ue0000'   "\\Ue0000"
+    0x000effff      '\Ueffff'   "\\Ueffff"
+    0x000f0000      '\Uf0000'   "\\Uf0000"
+    0x000fffff      '\Ufffff'   "\\Ufffff"
+    0x00100000      '\U100000'  "\\U100000"
+    0x0010ffff      '\U10ffff'  "\\U10ffff"
+]
+
+for i = 1:size(cx,1)
+    @test cx[i,1] == convert(UInt32, cx[i,2])
+    @test string(cx[i,2]) == unescape_string(cx[i,3])
+    if isascii(cx[i,2]) || !isprint(cx[i,2])
+        @test cx[i,3] == escape_string(string(cx[i,2]))
+    end
+    for j = 1:size(cx,1)
+        str = string(cx[i,2], cx[j,2])
+        @test str == unescape_string(escape_string(str))
+    end
+end
+
+for i = 0:0x7f, p = ["","\0","x","xxx","\x7f","\uFF","\uFFF",
+                     "\uFFFF","\U10000","\U10FFF","\U10FFFF"]
+    c = Char(i)
+    cp = string(c,p)
+    op = string(Char(div(i,8)), oct(i%8), p)
+    hp = string(Char(div(i,16)), hex(i%16), p)
+    @test string(unescape_string(string("\\",oct(i,1),p))) == cp
+    @test string(unescape_string(string("\\",oct(i,2),p))) == cp
+    @test string(unescape_string(string("\\",oct(i,3),p))) == cp
+    @test string(unescape_string(string("\\",oct(i,4),p))) == op
+    @test string(unescape_string(string("\\x",hex(i,1),p))) == cp
+    @test string(unescape_string(string("\\x",hex(i,2),p))) == cp
+    @test string(unescape_string(string("\\x",hex(i,3),p))) == hp
+end
+
+@test "\z" == unescape_string("\z") == "z"
+@test "\X" == unescape_string("\X") == "X"
+@test "\AbC" == unescape_string("\AbC") == "AbC"
+
+@test "\0" == unescape_string("\\0")
+@test "\1" == unescape_string("\\1")
+@test "\7" == unescape_string("\\7")
+@test "\0x" == unescape_string("\\0x")
+@test "\1x" == unescape_string("\\1x")
+@test "\7x" == unescape_string("\\7x")
+@test "\00" == unescape_string("\\00")
+@test "\01" == unescape_string("\\01")
+@test "\07" == unescape_string("\\07")
+@test "\70" == unescape_string("\\70")
+@test "\71" == unescape_string("\\71")
+@test "\77" == unescape_string("\\77")
+@test "\00x" == unescape_string("\\00x")
+@test "\01x" == unescape_string("\\01x")
+@test "\07x" == unescape_string("\\07x")
+@test "\70x" == unescape_string("\\70x")
+@test "\71x" == unescape_string("\\71x")
+@test "\77x" == unescape_string("\\77x")
+@test "\000" == unescape_string("\\000")
+@test "\001" == unescape_string("\\001")
+@test "\007" == unescape_string("\\007")
+@test "\070" == unescape_string("\\070")
+@test "\071" == unescape_string("\\071")
+@test "\077" == unescape_string("\\077")
+@test "\170" == unescape_string("\\170")
+@test "\171" == unescape_string("\\171")
+@test "\177" == unescape_string("\\177")
+@test "\0001" == unescape_string("\\0001")
+@test "\0011" == unescape_string("\\0011")
+@test "\0071" == unescape_string("\\0071")
+@test "\0701" == unescape_string("\\0701")
+@test "\0711" == unescape_string("\\0711")
+@test "\0771" == unescape_string("\\0771")
+@test "\1701" == unescape_string("\\1701")
+@test "\1711" == unescape_string("\\1711")
+@test "\1771" == unescape_string("\\1771")
+
+@test "\x0" == unescape_string("\\x0")
+@test "\x1" == unescape_string("\\x1")
+@test "\xf" == unescape_string("\\xf")
+@test "\xF" == unescape_string("\\xF")
+@test "\x0x" == unescape_string("\\x0x")
+@test "\x1x" == unescape_string("\\x1x")
+@test "\xfx" == unescape_string("\\xfx")
+@test "\xFx" == unescape_string("\\xFx")
+@test "\x00" == unescape_string("\\x00")
+@test "\x01" == unescape_string("\\x01")
+@test "\x0f" == unescape_string("\\x0f")
+@test "\x0F" == unescape_string("\\x0F")
+
+if !success(`iconv --version`)
+    warn("iconv not found, skipping unicode tests!")
+    @windows_only warn("Use WinRPM.install(\"win_iconv\") to run these tests")
+else
+    # Create unicode test data directory
+    unicodedir = mktempdir()
+
+    # Use perl to generate the primary data
+    primary_encoding = "UTF-32BE"
+    primary_path = replace(joinpath(unicodedir, primary_encoding*".unicode"),"\\","\\\\\\\\")
+    run(`perl -e "
+        $$fname = \"$primary_path\";
+        open(UNICODEF, \">\", \"$$fname\")         or die \"can\'t open $$fname: $$!\";
+        binmode(UNICODEF);
+        print UNICODEF pack \"N*\", 0xfeff, 0..0xd7ff, 0xe000..0x10ffff;
+        close(UNICODEF);"` )
+
+    # Use iconv to generate the other data
+    for encoding in ["UTF-32LE", "UTF-16BE", "UTF-16LE", "UTF-8"]
+        output_path = joinpath(unicodedir, encoding*".unicode")
+        f = Base.FS.open(output_path,Base.JL_O_WRONLY|Base.JL_O_CREAT,Base.S_IRUSR | Base.S_IWUSR | Base.S_IRGRP | Base.S_IROTH)
+        run(pipe(`iconv -f $primary_encoding -t $encoding $primary_path`, f))
+        Base.FS.close(f)
+    end
+
+    f=open(joinpath(unicodedir,"UTF-32LE.unicode"))
+    str1 = utf32(read(f, UInt32, 1112065)[2:end])
+    close(f)
+
+    f=open(joinpath(unicodedir,"UTF-8.unicode"))
+    str2 = UTF8String(read(f, UInt8, 4382595)[4:end])
+    close(f)
+    @test str1 == str2
+
+    @test str1 == open(joinpath(unicodedir,"UTF-16LE.unicode")) do f
+        utf16(read(f, UInt16, 2160641)[2:end])
+    end
+
+    @test str1 == open(joinpath(unicodedir,"UTF-16LE.unicode")) do f
+        utf16(read(f, UInt8, 2160641*2))
+    end
+    @test str1 == open(joinpath(unicodedir,"UTF-16BE.unicode")) do f
+        utf16(read(f, UInt8, 2160641*2))
+    end
+
+    @test str1 == open(joinpath(unicodedir,"UTF-32LE.unicode")) do f
+        utf32(read(f, UInt8, 1112065*4))
+    end
+    @test str1 == open(joinpath(unicodedir,"UTF-32BE.unicode")) do f
+        utf32(read(f, UInt8, 1112065*4))
+    end
+
+    str1 = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
+    str2 = UTF32String(UInt32[
+                 8704, 32, 949, 32, 62, 32, 48, 44, 32, 8707, 32,
+                 948, 32, 62, 32, 48, 58, 32, 124, 120, 45, 121, 124,
+                 32, 60, 32, 948, 32, 8658, 32, 124, 102, 40, 120,
+                 41, 45, 102, 40, 121, 41, 124, 32, 60, 32, 949
+                 ,0])
+    @test str1 == str2
+
+    # Cleanup unicode data
+    for encoding in ["UTF-32BE", "UTF-32LE", "UTF-16BE", "UTF-16LE", "UTF-8"]
+        rm(joinpath(unicodedir,encoding*".unicode"))
+    end
+    rm(unicodedir)
+end
+
+# Tests of join()
+@test join([]) == ""
+@test join(["a"],"?") == "a"
+@test join("HELLO",'-') == "H-E-L-L-O"
+@test join(1:5, ", ", " and ") == "1, 2, 3, 4 and 5"
+@test join(["apples", "bananas", "pineapples"], ", ", " and ") == "apples, bananas and pineapples"
+
+# issue #9178 `join` calls `done()` twice on the iterables
+type i9178
+    nnext::Int64
+    ndone::Int64
+end
+Base.start(jt::i9178) = (jt.nnext=0 ; jt.ndone=0 ; 0)
+Base.done(jt::i9178, n) = (jt.ndone += 1 ; n > 3)
+Base.next(jt::i9178, n) = (jt.nnext += 1 ; ("$(jt.nnext),$(jt.ndone)", n+1))
+@test join(i9178(0,0), ";") == "1,1;2,2;3,3;4,4"
+
+# quotes + interpolation (issue #455)
+@test "$("string")" == "string"
+arr = ["a","b","c"]
+@test "[$(join(arr, " - "))]" == "[a - b - c]"
diff --git a/test/strings/search.jl b/test/strings/search.jl
new file mode 100644
index 0000000000000..524322615b836
--- /dev/null
+++ b/test/strings/search.jl
@@ -0,0 +1,352 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# some test strings
+astr = "Hello, world.\n"
+u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
+
+## generic string uses only endof and next ##
+
+immutable GenericString <: AbstractString
+    string::AbstractString
+end
+
+Base.endof(s::GenericString) = endof(s.string)
+Base.next(s::GenericString, i::Int) = next(s.string, i)
+
+# ascii search
+for str in [astr, GenericString(astr)]
+    @test_throws BoundsError search(str, 'z', 0)
+    @test_throws BoundsError search(str, '∀', 0)
+    @test search(str, 'x') == 0
+    @test search(str, '\0') == 0
+    @test search(str, '\u80') == 0
+    @test search(str, '∀') == 0
+    @test search(str, 'H') == 1
+    @test search(str, 'l') == 3
+    @test search(str, 'l', 4) == 4
+    @test search(str, 'l', 5) == 11
+    @test search(str, 'l', 12) == 0
+    @test search(str, ',') == 6
+    @test search(str, ',', 7) == 0
+    @test search(str, '\n') == 14
+    @test search(str, '\n', 15) == 0
+    @test_throws BoundsError search(str, 'ε', nextind(str,endof(str))+1)
+    @test_throws BoundsError search(str, 'a', nextind(str,endof(str))+1)
+end
+
+# ascii rsearch
+for str in [astr]
+    @test rsearch(str, 'x') == 0
+    @test rsearch(str, '\0') == 0
+    @test rsearch(str, '\u80') == 0
+    @test rsearch(str, '∀') == 0
+    @test rsearch(str, 'H') == 1
+    @test rsearch(str, 'H', 0) == 0
+    @test rsearch(str, 'l') == 11
+    @test rsearch(str, 'l', 5) == 4
+    @test rsearch(str, 'l', 4) == 4
+    @test rsearch(str, 'l', 3) == 3
+    @test rsearch(str, 'l', 2) == 0
+    @test rsearch(str, ',') == 6
+    @test rsearch(str, ',', 5) == 0
+    @test rsearch(str, '\n') == 14
+end
+
+# utf-8 search
+for str in (u8str, GenericString(u8str))
+    @test_throws BoundsError search(str, 'z', 0)
+    @test_throws BoundsError search(str, '∀', 0)
+    @test search(str, 'z') == 0
+    @test search(str, '\0') == 0
+    @test search(str, '\u80') == 0
+    @test search(str, '∄') == 0
+    @test search(str, '∀') == 1
+    @test_throws UnicodeError search(str, '∀', 2)
+    @test search(str, '∀', 4) == 0
+    @test search(str, '∃') == 13
+    @test_throws UnicodeError search(str, '∃', 15)
+    @test search(str, '∃', 16) == 0
+    @test search(str, 'x') == 26
+    @test search(str, 'x', 27) == 43
+    @test search(str, 'x', 44) == 0
+    @test search(str, 'δ') == 17
+    @test_throws UnicodeError search(str, 'δ', 18)
+    @test search(str, 'δ', nextind(str,17)) == 33
+    @test search(str, 'δ', nextind(str,33)) == 0
+    @test search(str, 'ε') == 5
+    @test search(str, 'ε', nextind(str,5)) == 54
+    @test search(str, 'ε', nextind(str,54)) == 0
+    @test search(str, 'ε', nextind(str,endof(str))) == 0
+    @test search(str, 'a', nextind(str,endof(str))) == 0
+    @test_throws BoundsError search(str, 'ε', nextind(str,endof(str))+1)
+    @test_throws BoundsError search(str, 'a', nextind(str,endof(str))+1)
+end
+
+# utf-8 rsearch
+for str in [u8str]
+    @test rsearch(str, 'z') == 0
+    @test rsearch(str, '\0') == 0
+    @test rsearch(str, '\u80') == 0
+    @test rsearch(str, '∄') == 0
+    @test rsearch(str, '∀') == 1
+    @test rsearch(str, '∀', 0) == 0
+    @test rsearch(str, '∃') == 13
+    @test rsearch(str, '∃', 14) == 13
+    @test rsearch(str, '∃', 13) == 13
+    @test rsearch(str, '∃', 12) == 0
+    @test rsearch(str, 'x') == 43
+    @test rsearch(str, 'x', 42) == 26
+    @test rsearch(str, 'x', 25) == 0
+    @test rsearch(str, 'δ') == 33
+    @test rsearch(str, 'δ', 32) == 17
+    @test rsearch(str, 'δ', 16) == 0
+    @test rsearch(str, 'ε') == 54
+    @test rsearch(str, 'ε', 53) == 5
+    @test rsearch(str, 'ε', 4) == 0
+end
+
+# string search with a single-char string
+@test search(astr, "x") == 0:-1
+@test search(astr, "H") == 1:1
+@test search(astr, "H", 2) == 0:-1
+@test search(astr, "l") == 3:3
+@test search(astr, "l", 4) == 4:4
+@test search(astr, "l", 5) == 11:11
+@test search(astr, "l", 12) == 0:-1
+@test search(astr, "\n") == 14:14
+@test search(astr, "\n", 15) == 0:-1
+
+@test search(u8str, "z") == 0:-1
+@test search(u8str, "∄") == 0:-1
+@test search(u8str, "∀") == 1:1
+@test search(u8str, "∀", 4) == 0:-1
+@test search(u8str, "∃") == 13:13
+@test search(u8str, "∃", 16) == 0:-1
+@test search(u8str, "x") == 26:26
+@test search(u8str, "x", 27) == 43:43
+@test search(u8str, "x", 44) == 0:-1
+@test search(u8str, "ε") == 5:5
+@test search(u8str, "ε", 7) == 54:54
+@test search(u8str, "ε", 56) == 0:-1
+
+# string rsearch with a single-char string
+@test rsearch(astr, "x") == 0:-1
+@test rsearch(astr, "H") == 1:1
+@test rsearch(astr, "H", 2) == 1:1
+@test rsearch(astr, "H", 0) == 0:-1
+@test rsearch(astr, "l") == 11:11
+@test rsearch(astr, "l", 10) == 4:4
+@test rsearch(astr, "l", 4) == 4:4
+@test rsearch(astr, "l", 3) == 3:3
+@test rsearch(astr, "l", 2) == 0:-1
+@test rsearch(astr, "\n") == 14:14
+@test rsearch(astr, "\n", 13) == 0:-1
+
+@test rsearch(u8str, "z") == 0:-1
+@test rsearch(u8str, "∄") == 0:-1
+@test rsearch(u8str, "∀") == 1:1
+@test rsearch(u8str, "∀", 0) == 0:-1
+#TODO: setting the limit in the middle of a wide char
+#      makes search fail but rsearch succeed.
+#      Should rsearch fail as well?
+#@test rsearch(u8str, "∀", 2) == 0:-1 # gives 1:3
+@test rsearch(u8str, "∃") == 13:13
+@test rsearch(u8str, "∃", 12) == 0:-1
+@test rsearch(u8str, "x") == 43:43
+@test rsearch(u8str, "x", 42) == 26:26
+@test rsearch(u8str, "x", 25) == 0:-1
+@test rsearch(u8str, "ε") == 54:54
+@test rsearch(u8str, "ε", 53) == 5:5
+@test rsearch(u8str, "ε", 4) == 0:-1
+
+# string search with a single-char regex
+@test search(astr, r"x") == 0:-1
+@test search(astr, r"H") == 1:1
+@test search(astr, r"H", 2) == 0:-1
+@test search(astr, r"l") == 3:3
+@test search(astr, r"l", 4) == 4:4
+@test search(astr, r"l", 5) == 11:11
+@test search(astr, r"l", 12) == 0:-1
+@test search(astr, r"\n") == 14:14
+@test search(astr, r"\n", 15) == 0:-1
+@test search(u8str, r"z") == 0:-1
+@test search(u8str, r"∄") == 0:-1
+@test search(u8str, r"∀") == 1:1
+@test search(u8str, r"∀", 4) == 0:-1
+@test search(u8str, r"∀") == search(u8str, r"\u2200")
+@test search(u8str, r"∀", 4) == search(u8str, r"\u2200", 4)
+@test search(u8str, r"∃") == 13:13
+@test search(u8str, r"∃", 16) == 0:-1
+@test search(u8str, r"x") == 26:26
+@test search(u8str, r"x", 27) == 43:43
+@test search(u8str, r"x", 44) == 0:-1
+@test search(u8str, r"ε") == 5:5
+@test search(u8str, r"ε", 7) == 54:54
+@test search(u8str, r"ε", 56) == 0:-1
+for i = 1:endof(astr)
+    @test search(astr, r"."s, i) == i:i
+end
+for i = 1:endof(u8str)
+    if isvalid(u8str,i)
+        @test search(u8str, r"."s, i) == i:i
+    end
+end
+
+# string search with a zero-char string
+for i = 1:endof(astr)
+    @test search(astr, "", i) == i:i-1
+end
+for i = 1:endof(u8str)
+    @test search(u8str, "", i) == i:i-1
+end
+@test search("", "") == 1:0
+
+# string rsearch with a zero-char string
+for i = 1:endof(astr)
+    @test rsearch(astr, "", i) == i:i-1
+end
+for i = 1:endof(u8str)
+    @test rsearch(u8str, "", i) == i:i-1
+end
+@test rsearch("", "") == 1:0
+
+# string search with a zero-char regex
+for i = 1:endof(astr)
+    @test search(astr, r"", i) == i:i-1
+end
+for i = 1:endof(u8str)
+    # TODO: should regex search fast-forward invalid indices?
+    if isvalid(u8str,i)
+        @test search(u8str, r""s, i) == i:i-1
+    end
+end
+
+# string search with a two-char string literal
+@test search("foo,bar,baz", "xx") == 0:-1
+@test search("foo,bar,baz", "fo") == 1:2
+@test search("foo,bar,baz", "fo", 3) == 0:-1
+@test search("foo,bar,baz", "oo") == 2:3
+@test search("foo,bar,baz", "oo", 4) == 0:-1
+@test search("foo,bar,baz", "o,") == 3:4
+@test search("foo,bar,baz", "o,", 5) == 0:-1
+@test search("foo,bar,baz", ",b") == 4:5
+@test search("foo,bar,baz", ",b", 6) == 8:9
+@test search("foo,bar,baz", ",b", 10) == 0:-1
+@test search("foo,bar,baz", "az") == 10:11
+@test search("foo,bar,baz", "az", 12) == 0:-1
+
+# issue #9365
+# string search with a two-char UTF-8 (2 byte) string literal
+@test search("ééé", "éé") == 1:3
+@test search("ééé", "éé", 1) == 1:3
+# string search with a two-char UTF-8 (3 byte) string literal
+@test search("€€€", "€€") == 1:4
+@test search("€€€", "€€", 1) == 1:4
+# string search with a two-char UTF-8 (4 byte) string literal
+@test search("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 1:5
+@test search("\U1f596\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
+
+# string search with a two-char UTF-8 (2 byte) string literal
+@test search("éé", "éé") == 1:3
+@test search("éé", "éé", 1) == 1:3
+# string search with a two-char UTF-8 (3 byte) string literal
+@test search("€€", "€€") == 1:4
+@test search("€€", "€€", 1) == 1:4
+# string search with a two-char UTF-8 (4 byte) string literal
+@test search("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
+@test search("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
+
+# string rsearch with a two-char UTF-8 (2 byte) string literal
+@test rsearch("ééé", "éé") == 3:5
+@test rsearch("ééé", "éé", endof("ééé")) == 3:5
+# string rsearch with a two-char UTF-8 (3 byte) string literal
+@test rsearch("€€€", "€€") == 4:7
+@test rsearch("€€€", "€€", endof("€€€")) == 4:7
+# string rsearch with a two-char UTF-8 (4 byte) string literal
+@test rsearch("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 5:9
+@test rsearch("\U1f596\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5:9
+
+# string rsearch with a two-char UTF-8 (2 byte) string literal
+@test rsearch("éé", "éé") == 1:3        # should really be 1:4!
+@test rsearch("éé", "éé", endof("ééé")) == 1:3
+# string search with a two-char UTF-8 (3 byte) string literal
+@test rsearch("€€", "€€") == 1:4        # should really be 1:6!
+@test rsearch("€€", "€€", endof("€€€")) == 1:4
+# string search with a two-char UTF-8 (4 byte) string literal
+@test rsearch("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
+@test rsearch("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1:5
+
+# string rsearch with a two-char string literal
+@test rsearch("foo,bar,baz", "xx") == 0:-1
+@test rsearch("foo,bar,baz", "fo") == 1:2
+@test rsearch("foo,bar,baz", "fo", 1) == 0:-1
+@test rsearch("foo,bar,baz", "oo") == 2:3
+@test rsearch("foo,bar,baz", "oo", 2) == 0:-1
+@test rsearch("foo,bar,baz", "o,") == 3:4
+@test rsearch("foo,bar,baz", "o,", 1) == 0:-1
+@test rsearch("foo,bar,baz", ",b") == 8:9
+@test rsearch("foo,bar,baz", ",b", 6) == 4:5
+@test rsearch("foo,bar,baz", ",b", 3) == 0:-1
+@test rsearch("foo,bar,baz", "az") == 10:11
+@test rsearch("foo,bar,baz", "az", 10) == 0:-1
+
+# array rsearch
+@test rsearch(UInt8[1,2,3],UInt8[2,3],3) == 2:3
+@test rsearch(UInt8[1,2,3],UInt8[2,3],1) == 0:-1
+
+# string search with a two-char regex
+@test search("foo,bar,baz", r"xx") == 0:-1
+@test search("foo,bar,baz", r"fo") == 1:2
+@test search("foo,bar,baz", r"fo", 3) == 0:-1
+@test search("foo,bar,baz", r"oo") == 2:3
+@test search("foo,bar,baz", r"oo", 4) == 0:-1
+@test search("foo,bar,baz", r"o,") == 3:4
+@test search("foo,bar,baz", r"o,", 5) == 0:-1
+@test search("foo,bar,baz", r",b") == 4:5
+@test search("foo,bar,baz", r",b", 6) == 8:9
+@test search("foo,bar,baz", r",b", 10) == 0:-1
+@test search("foo,bar,baz", r"az") == 10:11
+@test search("foo,bar,baz", r"az", 12) == 0:-1
+
+@test searchindex("foo", 'o') == 2
+@test searchindex("foo", 'o', 3) == 3
+
+# string searchindex with a two-char UTF-8 (2 byte) string literal
+@test searchindex("ééé", "éé") == 1
+@test searchindex("ééé", "éé", 1) == 1
+# string searchindex with a two-char UTF-8 (3 byte) string literal
+@test searchindex("€€€", "€€") == 1
+@test searchindex("€€€", "€€", 1) == 1
+# string searchindex with a two-char UTF-8 (4 byte) string literal
+@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 1
+@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", 1) == 1
+
+# string searchindex with a two-char UTF-8 (2 byte) string literal
+@test searchindex("éé", "éé") == 1
+@test searchindex("éé", "éé", 1) == 1
+# string searchindex with a two-char UTF-8 (3 byte) string literal
+@test searchindex("€€", "€€") == 1
+@test searchindex("€€", "€€", 1) == 1
+# string searchindex with a two-char UTF-8 (4 byte) string literal
+@test searchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
+@test searchindex("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1
+
+# string rsearchindex with a two-char UTF-8 (2 byte) string literal
+@test rsearchindex("ééé", "éé") == 3
+@test rsearchindex("ééé", "éé", endof("ééé")) == 3
+# string rsearchindex with a two-char UTF-8 (3 byte) string literal
+@test rsearchindex("€€€", "€€") == 4
+@test rsearchindex("€€€", "€€", endof("€€€")) == 4
+# string rsearchindex with a two-char UTF-8 (4 byte) string literal
+@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 5
+@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5
+
+# string rsearchindex with a two-char UTF-8 (2 byte) string literal
+@test rsearchindex("éé", "éé") == 1
+@test rsearchindex("éé", "éé", endof("ééé")) == 1
+# string searchindex with a two-char UTF-8 (3 byte) string literal
+@test rsearchindex("€€", "€€") == 1
+@test rsearchindex("€€", "€€", endof("€€€")) == 1
+# string searchindex with a two-char UTF-8 (4 byte) string literal
+@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
+@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1
diff --git a/test/strings/types.jl b/test/strings/types.jl
new file mode 100644
index 0000000000000..66c49b1e88ba0
--- /dev/null
+++ b/test/strings/types.jl
@@ -0,0 +1,196 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+## SubString, RevString, and RepString tests ##
+
+## SubString tests ##
+u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
+u8str2 = u8str^2
+len_u8str = length(u8str)
+slen_u8str = length(u8str)
+len_u8str2 = length(u8str2)
+slen_u8str2 = length(u8str2)
+
+@test len_u8str2 == 2 * len_u8str
+@test slen_u8str2 == 2 * slen_u8str
+
+u8str2plain = utf8(u8str2)
+
+for i1 = 1:length(u8str2)
+    if !isvalid(u8str2, i1); continue; end
+    for i2 = i1:length(u8str2)
+        if !isvalid(u8str2, i2); continue; end
+        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+        @test u8str2[i1:i2] == u8str2plain[i1:i2]
+    end
+end
+
+str="tempus fugit"              #length(str)==12
+ss=SubString(str,1,length(str)) #match source string
+@test length(ss)==length(str)
+
+ss=SubString(str,1,0)    #empty SubString
+@test length(ss)==0
+
+ss=SubString(str,14,20)  #start indexed beyond source string length
+@test length(ss)==0
+
+ss=SubString(str,10,16)  #end indexed beyond source string length
+@test length(ss)==3
+
+str2=""
+ss=SubString(str2,1,4)  #empty source string
+@test length(ss)==0
+
+ss=SubString(str2,1,1)  #empty source string, identical start and end index
+@test length(ss)==0
+
+@test SubString("foobar",big(1),big(3)) == "foo"
+
+str = "aa\u2200\u2222bb"
+u = SubString(str, 3, 6)
+@test length(u)==2
+b = IOBuffer()
+write(b, u)
+@test takebuf_string(b) == "\u2200\u2222"
+
+str = "føøbar"
+u = SubString(str, 4, 3)
+@test length(u)==0
+b = IOBuffer()
+write(b, u)
+@test takebuf_string(b) == ""
+
+str = "føøbar"
+u = SubString(str, 10, 10)
+@test length(u)==0
+b = IOBuffer()
+write(b, u)
+@test takebuf_string(b) == ""
+
+# search and SubString (issue #5679)
+str = "Hello, world!"
+u = SubString(str, 1, 5)
+@test rsearch(u, "World") == 0:-1
+@test rsearch(u, 'z') == 0
+@test rsearch(u, "ll") == 3:4
+
+# sizeof
+@test sizeof(SubString("abc\u2222def",4,4)) == 3
+
+# issue #3710
+@test prevind(SubString("{var}",2,4),4) == 3
+
+# issue #4183
+@test split(SubString(ascii("x"), 2, 0), "y") == AbstractString[""]
+@test split(SubString(utf8("x"), 2, 0), "y") == AbstractString[""]
+
+# issue #6772
+@test float(SubString("10",1,1)) === 1.0
+@test float(SubString("1 0",1,1)) === 1.0
+@test parse(Float32,SubString("10",1,1)) === 1.0f0
+
+# issue #5870
+@test !ismatch(Regex("aa"), SubString("",1,0))
+@test ismatch(Regex(""), SubString("",1,0))
+
+# isvalid(), chr2ind() and ind2chr() for SubString{DirectIndexString}
+let s="lorem ipsum",
+    sdict=Dict(SubString(s,1,11)=>s,
+               SubString(s,1,6)=>"lorem ",
+               SubString(s,1,0)=>"",
+               SubString(s,2,4)=>"ore",
+               SubString(s,2,16)=>"orem ipsum",
+               SubString(s,12,14)=>""
+               )
+    for (ss,s) in sdict
+        for i in -1:12
+            @test isvalid(ss,i)==isvalid(s,i)
+        end
+    end
+    for (ss,s) in sdict
+        for i in 1:length(ss)
+            @test ind2chr(ss,i)==ind2chr(s,i)
+        end
+    end
+    for (ss,s) in sdict
+        for i in 1:length(ss)
+            @test chr2ind(ss,i)==chr2ind(s,i)
+        end
+    end
+end #let
+
+#for isvalid(SubString{UTF8String})
+let s = utf8("Σx + βz - 2")
+  for i in -1:length(s)+2
+      ss=SubString(s,1,i)
+      @test isvalid(ss,i)==isvalid(s,i)
+  end
+end
+
+ss=SubString("hello",1,5)
+@test_throws BoundsError ind2chr(ss, -1)
+@test_throws BoundsError chr2ind(ss, -1)
+@test_throws BoundsError chr2ind(ss, 10)
+@test_throws BoundsError ind2chr(ss, 10)
+
+# length(SubString{UTF8String}) performance specialization
+let s = "|η(α)-ϕ(κ)| < ε"
+    @test length(SubString(s,1,0))==length(s[1:0])
+    @test length(SubString(s,4,4))==length(s[4:4])
+    @test length(SubString(s,1,7))==length(s[1:7])
+    @test length(SubString(s,4,11))==length(s[4:11])
+end
+
+## Reverse strings ##
+
+# issue #4586
+@test rsplit(RevString("ailuj"),'l') == ["ju","ia"]
+@test parse(Float64,RevString("64")) === 46.0
+
+# reverseind
+for T in (ASCIIString, UTF8String, UTF16String, UTF32String)
+    for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
+        for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
+            for c in ('X', 'δ', '\U0001d6a5')
+                T != ASCIIString || (isascii(prefix) && isascii(suffix) && isascii(c)) || continue
+                s = convert(T, string(prefix, c, suffix))
+                ri = search(reverse(s), c)
+                @test reverse(s) == RevString(s)
+                @test c == s[reverseind(s, ri)] == reverse(s)[ri]
+                s = RevString(s)
+                ri = search(reverse(s), c)
+                @test c == s[reverseind(s, ri)] == reverse(s)[ri]
+                s = convert(T, string(prefix, prefix, c, suffix, suffix))
+                pre = convert(T, prefix)
+                sb = SubString(s, nextind(pre, endof(pre)), endof(convert(T, string(prefix, prefix, c, suffix))))
+                ri = search(reverse(sb), c)
+                @test c == sb[reverseind(sb, ri)] == reverse(sb)[ri]
+            end
+        end
+    end
+end
+
+## Repeat strings ##
+
+# issue #7764
+let
+    srep = RepString("Σβ",2)
+    s="Σβ"
+    ss=SubString(s,1,endof(s))
+
+    @test ss^2 == "ΣβΣβ"
+    @test RepString(ss,2) == "ΣβΣβ"
+
+    @test endof(srep) == 7
+
+    @test next(srep, 3) == ('β',5)
+    @test next(srep, 7) == ('β',9)
+
+    @test srep[7] == 'β'
+    @test_throws BoundsError srep[8]
+end
+
+## Rope strings ##
+
+@test sizeof(RopeString("abc","def")) == 6
diff --git a/test/strings/util.jl b/test/strings/util.jl
new file mode 100644
index 0000000000000..65ab6c11c5526
--- /dev/null
+++ b/test/strings/util.jl
@@ -0,0 +1,210 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# padding (lpad and rpad)
+@test lpad("foo", 3) == "foo"
+@test rpad("foo", 3) == "foo"
+@test lpad("foo", 5) == "  foo"
+@test rpad("foo", 5) == "foo  "
+@test lpad("foo", 5, "  ") == "  foo"
+@test rpad("foo", 5, "  ") == "foo  "
+@test lpad("foo", 6, "  ") == "   foo"
+@test rpad("foo", 6, "  ") == "foo   "
+
+# string manipulation
+@test strip("\t  hi   \n") == "hi"
+@test strip("foobarfoo", ['f', 'o']) == "bar"
+
+# split
+@test isequal(split("foo,bar,baz", 'x'), ["foo,bar,baz"])
+@test isequal(split("foo,bar,baz", ','), ["foo","bar","baz"])
+@test isequal(split("foo,bar,baz", ","), ["foo","bar","baz"])
+@test isequal(split("foo,bar,baz", r","), ["foo","bar","baz"])
+@test isequal(split("foo,bar,baz", ','; limit=0), ["foo","bar","baz"])
+@test isequal(split("foo,bar,baz", ','; limit=1), ["foo,bar,baz"])
+@test isequal(split("foo,bar,baz", ','; limit=2), ["foo","bar,baz"])
+@test isequal(split("foo,bar,baz", ','; limit=3), ["foo","bar","baz"])
+@test isequal(split("foo,bar", "o,b"), ["fo","ar"])
+
+@test isequal(split("", ','), [""])
+@test isequal(split(",", ','), ["",""])
+@test isequal(split(",,", ','), ["","",""])
+@test isequal(split("", ','  ; keep=false), [])
+@test isequal(split(",", ',' ; keep=false), [])
+@test isequal(split(",,", ','; keep=false), [])
+
+@test isequal(split("a b c"), ["a","b","c"])
+@test isequal(split("a  b \t c\n"), ["a","b","c"])
+
+@test isequal(rsplit("foo,bar,baz", 'x'), ["foo,bar,baz"])
+@test isequal(rsplit("foo,bar,baz", ','), ["foo","bar","baz"])
+@test isequal(rsplit("foo,bar,baz", ","), ["foo","bar","baz"])
+@test isequal(rsplit("foo,bar,baz", ','; limit=0), ["foo","bar","baz"])
+@test isequal(rsplit("foo,bar,baz", ','; limit=1), ["foo,bar,baz"])
+@test isequal(rsplit("foo,bar,baz", ','; limit=2), ["foo,bar","baz"])
+@test isequal(rsplit("foo,bar,baz", ','; limit=3), ["foo","bar","baz"])
+@test isequal(rsplit("foo,bar", "o,b"), ["fo","ar"])
+
+@test isequal(rsplit("", ','), [""])
+@test isequal(rsplit(",", ','), ["",""])
+@test isequal(rsplit(",,", ','), ["","",""])
+@test isequal(rsplit(",,", ','; limit=2), [",",""])
+@test isequal(rsplit("", ','  ; keep=false), [])
+@test isequal(rsplit(",", ',' ; keep=false), [])
+@test isequal(rsplit(",,", ','; keep=false), [])
+
+#@test isequal(rsplit("a b c"), ["a","b","c"])
+#@test isequal(rsplit("a  b \t c\n"), ["a","b","c"])
+
+let str = "a.:.ba..:..cba.:.:.dcba.:."
+@test isequal(split(str, ".:."), ["a","ba.",".cba",":.dcba",""])
+@test isequal(split(str, ".:."; keep=false), ["a","ba.",".cba",":.dcba"])
+@test isequal(split(str, ".:."), ["a","ba.",".cba",":.dcba",""])
+@test isequal(split(str, r"\.(:\.)+"), ["a","ba.",".cba","dcba",""])
+@test isequal(split(str, r"\.(:\.)+"; keep=false), ["a","ba.",".cba","dcba"])
+@test isequal(split(str, r"\.+:\.+"), ["a","ba","cba",":.dcba",""])
+@test isequal(split(str, r"\.+:\.+"; keep=false), ["a","ba","cba",":.dcba"])
+
+@test isequal(rsplit(str, ".:."), ["a","ba.",".cba.:","dcba",""])
+@test isequal(rsplit(str, ".:."; keep=false), ["a","ba.",".cba.:","dcba"])
+@test isequal(rsplit(str, ".:."; limit=2), ["a.:.ba..:..cba.:.:.dcba", ""])
+@test isequal(rsplit(str, ".:."; limit=3), ["a.:.ba..:..cba.:", "dcba", ""])
+@test isequal(rsplit(str, ".:."; limit=4), ["a.:.ba.", ".cba.:", "dcba", ""])
+@test isequal(rsplit(str, ".:."; limit=5), ["a", "ba.", ".cba.:", "dcba", ""])
+@test isequal(rsplit(str, ".:."; limit=6), ["a", "ba.", ".cba.:", "dcba", ""])
+end
+
+# zero-width splits
+@test isequal(rsplit("", ""), [""])
+
+@test isequal(split("", ""), [""])
+@test isequal(split("", r""), [""])
+@test isequal(split("abc", ""), ["a","b","c"])
+@test isequal(split("abc", r""), ["a","b","c"])
+@test isequal(split("abcd", r"b?"), ["a","c","d"])
+@test isequal(split("abcd", r"b*"), ["a","c","d"])
+@test isequal(split("abcd", r"b+"), ["a","cd"])
+@test isequal(split("abcd", r"b?c?"), ["a","d"])
+@test isequal(split("abcd", r"[bc]?"), ["a","","d"])
+@test isequal(split("abcd", r"a*"), ["","b","c","d"])
+@test isequal(split("abcd", r"a+"), ["","bcd"])
+@test isequal(split("abcd", r"d*"), ["a","b","c",""])
+@test isequal(split("abcd", r"d+"), ["abc",""])
+@test isequal(split("abcd", r"[ad]?"), ["","b","c",""])
+
+# replace
+@test replace("\u2202", '*', '\0') == "\u2202"
+
+@test replace("foobar", 'o', '0') == "f00bar"
+@test replace("foobar", 'o', '0', 1) == "f0obar"
+@test replace("foobar", 'o', "") == "fbar"
+@test replace("foobar", 'o', "", 1) == "fobar"
+@test replace("foobar", 'f', 'F') == "Foobar"
+@test replace("foobar", 'r', 'R') == "foobaR"
+
+@test replace("foofoofoo", "foo", "bar") == "barbarbar"
+@test replace("foobarfoo", "foo", "baz") == "bazbarbaz"
+@test replace("barfoofoo", "foo", "baz") == "barbazbaz"
+
+@test replace("", "", "") == ""
+@test replace("", "", "x") == "x"
+@test replace("", "x", "y") == ""
+
+@test replace("abcd", "", "^") == "^a^b^c^d^"
+@test replace("abcd", "b", "^") == "a^cd"
+@test replace("abcd", r"b?", "^") == "^a^c^d^"
+@test replace("abcd", r"b+", "^") == "a^cd"
+@test replace("abcd", r"b?c?", "^") == "^a^d^"
+@test replace("abcd", r"[bc]?", "^") == "^a^^d^"
+
+@test replace("foobarfoo", r"(fo|ba)", "xx") == "xxoxxrxxo"
+@test replace("foobarfoo", r"(foo|ba)", "bar") == "barbarrbar"
+
+@test replace("foobar", 'o', 'ø') == "føøbar"
+@test replace("foobar", 'o', 'ø', 1) == "føobar"
+@test replace("føøbar", 'ø', 'o') == "foobar"
+@test replace("føøbar", 'ø', 'o', 1) == "foøbar"
+@test replace("føøbar", 'ø', 'ö') == "fööbar"
+@test replace("føøbar", 'ø', 'ö', 1) == "föøbar"
+@test replace("føøbar", 'ø', "") == "fbar"
+@test replace("føøbar", 'ø', "", 1) == "føbar"
+@test replace("føøbar", 'f', 'F') == "Føøbar"
+@test replace("ḟøøbar", 'ḟ', 'F') == "Føøbar"
+@test replace("føøbar", 'f', 'Ḟ') == "Ḟøøbar"
+@test replace("ḟøøbar", 'ḟ', 'Ḟ') == "Ḟøøbar"
+@test replace("føøbar", 'r', 'R') == "føøbaR"
+@test replace("føøbaṙ", 'ṙ', 'R') == "føøbaR"
+@test replace("føøbar", 'r', 'Ṙ') == "føøbaṘ"
+@test replace("føøbaṙ", 'ṙ', 'Ṙ') == "føøbaṘ"
+
+@test replace("ḟøøḟøøḟøø", "ḟøø", "bar") == "barbarbar"
+@test replace("ḟøøbarḟøø", "ḟøø", "baz") == "bazbarbaz"
+@test replace("barḟøøḟøø", "ḟøø", "baz") == "barbazbaz"
+
+@test replace("foofoofoo", "foo", "ƀäṙ") == "ƀäṙƀäṙƀäṙ"
+@test replace("fooƀäṙfoo", "foo", "baz") == "bazƀäṙbaz"
+@test replace("ƀäṙfoofoo", "foo", "baz") == "ƀäṙbazbaz"
+
+@test replace("foofoofoo", "foo", "bar") == "barbarbar"
+@test replace("foobarfoo", "foo", "ƀäż") == "ƀäżbarƀäż"
+@test replace("barfoofoo", "foo", "ƀäż") == "barƀäżƀäż"
+
+@test replace("ḟøøḟøøḟøø", "ḟøø", "ƀäṙ") == "ƀäṙƀäṙƀäṙ"
+@test replace("ḟøøƀäṙḟøø", "ḟøø", "baz") == "bazƀäṙbaz"
+@test replace("ƀäṙḟøøḟøø", "ḟøø", "baz") == "ƀäṙbazbaz"
+
+@test replace("ḟøøḟøøḟøø", "ḟøø", "bar") == "barbarbar"
+@test replace("ḟøøbarḟøø", "ḟøø", "ƀäż") == "ƀäżbarƀäż"
+@test replace("barḟøøḟøø", "ḟøø", "ƀäż") == "barƀäżƀäż"
+
+@test replace("ḟøøḟøøḟøø", "ḟøø", "ƀäṙ") == "ƀäṙƀäṙƀäṙ"
+@test replace("ḟøøƀäṙḟøø", "ḟøø", "ƀäż") == "ƀäżƀäṙƀäż"
+@test replace("ƀäṙḟøøḟøø", "ḟøø", "ƀäż") == "ƀäṙƀäżƀäż"
+
+@test replace("", "", "ẍ") == "ẍ"
+@test replace("", "ẍ", "ÿ") == ""
+
+@test replace("äƀçđ", "", "π") == "πäπƀπçπđπ"
+@test replace("äƀçđ", "ƀ", "π") == "äπçđ"
+@test replace("äƀçđ", r"ƀ?", "π") == "πäπçπđπ"
+@test replace("äƀçđ", r"ƀ+", "π") == "äπçđ"
+@test replace("äƀçđ", r"ƀ?ç?", "π") == "πäπđπ"
+@test replace("äƀçđ", r"[ƀç]?", "π") == "πäππđπ"
+
+@test replace("foobarfoo", r"(fo|ba)", "ẍẍ") == "ẍẍoẍẍrẍẍo"
+
+@test replace("ḟøøbarḟøø", r"(ḟø|ba)", "xx") == "xxøxxrxxø"
+@test replace("ḟøøbarḟøø", r"(ḟøø|ba)", "bar") == "barbarrbar"
+
+@test replace("fooƀäṙfoo", r"(fo|ƀä)", "xx") == "xxoxxṙxxo"
+@test replace("fooƀäṙfoo", r"(foo|ƀä)", "ƀäṙ") == "ƀäṙƀäṙṙƀäṙ"
+
+@test replace("ḟøøƀäṙḟøø", r"(ḟø|ƀä)", "xx") == "xxøxxṙxxø"
+@test replace("ḟøøƀäṙḟøø", r"(ḟøø|ƀä)", "ƀäṙ") == "ƀäṙƀäṙṙƀäṙ"
+
+@test replace("foo", "oo", uppercase) == "fOO"
+
+# chomp/chop
+@test chomp("foo\n") == "foo"
+@test chop("foob") == "foo"
+
+# bytes2hex and hex2bytes
+hex_str = "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592"
+bin_val = hex2bytes(hex_str)
+
+@test div(length(hex_str), 2) == length(bin_val)
+@test hex_str == bytes2hex(bin_val)
+
+bin_val = hex2bytes("07bf")
+@test bin_val[1] == 7
+@test bin_val[2] == 191
+@test typeof(bin_val) == Array{UInt8, 1}
+@test length(bin_val) == 2
+
+# all valid hex chars
+@test "0123456789abcdefabcdef" == bytes2hex(hex2bytes("0123456789abcdefABCDEF"))
+
+# odd size
+@test_throws ArgumentError hex2bytes("0123456789abcdefABCDEF0")
+
+#non-hex characters
+@test_throws ArgumentError hex2bytes("0123456789abcdefABCDEFGH")
diff --git a/test/triplequote.jl b/test/triplequote.jl
new file mode 100644
index 0000000000000..074afec18a22c
--- /dev/null
+++ b/test/triplequote.jl
@@ -0,0 +1,68 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# triple-quote delimited strings
+@test """abc""" == "abc"
+@test """ab"c""" == "ab\"c"
+@test """ab""c""" == "ab\"\"c"
+@test """ab"\"c""" == "ab\"\"c"
+@test """abc\"""" == "abc\""
+n = 3
+@test """$n\n""" == "$n\n"
+@test """$(n)""" == "3"
+@test """$(2n)""" == "6"
+@test """$(n+4)""" == "7"
+@test """$("string")""" == "string"
+a = [3,1,2]
+@test """$(a[2])""" == "1"
+@test """$(a[3]+7)""" == "9"
+@test """$(floor(Int,4.5))""" == "4"
+nl = "
+"
+@test """
+     a
+     b
+
+     c
+     """ == "a$(nl)b$(nl)$(nl)c$(nl)"
+@test """
+      """ == ""
+@test """x
+     a
+    """ == "x$(nl) a$(nl)"
+@test """
+     $n
+   """ == "  $n$(nl)"
+@test """
+      a
+     b
+       c""" == " a$(nl)b$(nl)  c"
+# tabs + spaces
+@test """
+	 a
+	 b
+	""" == " a$(nl) b$(nl)"
+@test """
+      a
+       """ == "a$(nl) "
+s = "   p"
+@test """
+      $s""" == "$s"
+@test """
+       $s
+      """ == " $s$(nl)"
+@test """\t""" == "\t"
+@test """
+      \t""" == ""
+@test """
+      foo
+      \tbar""" == "foo$(nl)\tbar"
+@test """
+      foo
+      \tbar
+      """ == "foo$(nl)\tbar$(nl)"
+@test """
+      foo
+      bar\t""" == "foo$(nl)bar\t"
+@test """
+      $("\n      ")
+      """ == "\n      $(nl)"
diff --git a/test/unicode.jl b/test/unicode.jl
index 6af8e8e63a527..1e3c384306cd0 100644
--- a/test/unicode.jl
+++ b/test/unicode.jl
@@ -1,140 +1,6 @@
 # This file is a part of Julia. License is MIT: http://julialang.org/license
 
-# UTF16
-u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a"
-u16 = utf16(u8)
-@test sizeof(u16) == 18
-@test length(u16.data) == 10 && u16.data[end] == 0
-@test length(u16) == 5
-@test utf8(u16) == u8
-@test collect(u8) == collect(u16)
-@test u8 == utf16(u16.data[1:end-1]) == utf16(copy!(Array(UInt8, 18), 1, reinterpret(UInt8, u16.data), 1, 18))
-@test u8 == utf16(pointer(u16)) == utf16(convert(Ptr{Int16}, pointer(u16)))
-@test_throws UnicodeError utf16(utf32(Char(0x120000)))
-@test_throws UnicodeError utf16(UInt8[1,2,3])
-
-# UTF32
-u32 = utf32(u8)
-@test sizeof(u32) == 20
-@test length(u32.data) == 6 && u32.data[end] == Char(0)
-@test length(u32) == 5
-@test utf8(u32) == u8
-@test collect(u8) == collect(u32)
-@test u8 == utf32(u32.data[1:end-1]) == utf32(copy!(Array(UInt8, 20), 1, reinterpret(UInt8, u32.data), 1, 20))
-@test u8 == utf32(pointer(u32)) == utf32(convert(Ptr{Int32}, pointer(u32)))
-@test_throws UnicodeError utf32(UInt8[1,2,3])
-
-# Wstring
-w = wstring(u8)
-@test length(w) == 5 && utf8(w) == u8 && collect(u8) == collect(w)
-@test u8 == WString(w.data)
-
-if !success(`iconv --version`)
-    warn("iconv not found, skipping unicode tests!")
-    @windows_only warn("Use WinRPM.install(\"win_iconv\") to run these tests")
-else
-    # Create unicode test data directory
-    unicodedir = mktempdir()
-
-    # Use perl to generate the primary data
-    primary_encoding = "UTF-32BE"
-    primary_path = replace(joinpath(unicodedir, primary_encoding*".unicode"),"\\","\\\\\\\\")
-    run(`perl -e "
-        $$fname = \"$primary_path\";
-        open(UNICODEF, \">\", \"$$fname\")         or die \"can\'t open $$fname: $$!\";
-        binmode(UNICODEF);
-        print UNICODEF pack \"N*\", 0xfeff, 0..0xd7ff, 0xe000..0x10ffff;
-        close(UNICODEF);"` )
-
-    # Use iconv to generate the other data
-    for encoding in ["UTF-32LE", "UTF-16BE", "UTF-16LE", "UTF-8"]
-        output_path = joinpath(unicodedir, encoding*".unicode")
-        f = Base.FS.open(output_path,Base.JL_O_WRONLY|Base.JL_O_CREAT,Base.S_IRUSR | Base.S_IWUSR | Base.S_IRGRP | Base.S_IROTH)
-        run(pipe(`iconv -f $primary_encoding -t $encoding $primary_path`, f))
-        Base.FS.close(f)
-    end
-
-    f=open(joinpath(unicodedir,"UTF-32LE.unicode"))
-    str1 = utf32(read(f, UInt32, 1112065)[2:end])
-    close(f)
-
-    f=open(joinpath(unicodedir,"UTF-8.unicode"))
-    str2 = UTF8String(read(f, UInt8, 4382595)[4:end])
-    close(f)
-    @test str1 == str2
-
-    @test str1 == open(joinpath(unicodedir,"UTF-16LE.unicode")) do f
-        utf16(read(f, UInt16, 2160641)[2:end])
-    end
-
-    @test str1 == open(joinpath(unicodedir,"UTF-16LE.unicode")) do f
-        utf16(read(f, UInt8, 2160641*2))
-    end
-    @test str1 == open(joinpath(unicodedir,"UTF-16BE.unicode")) do f
-        utf16(read(f, UInt8, 2160641*2))
-    end
-
-    @test str1 == open(joinpath(unicodedir,"UTF-32LE.unicode")) do f
-        utf32(read(f, UInt8, 1112065*4))
-    end
-    @test str1 == open(joinpath(unicodedir,"UTF-32BE.unicode")) do f
-        utf32(read(f, UInt8, 1112065*4))
-    end
-
-    str1 = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
-    str2 = UTF32String(UInt32[
-                 8704, 32, 949, 32, 62, 32, 48, 44, 32, 8707, 32,
-                 948, 32, 62, 32, 48, 58, 32, 124, 120, 45, 121, 124,
-                 32, 60, 32, 948, 32, 8658, 32, 124, 102, 40, 120,
-                 41, 45, 102, 40, 121, 41, 124, 32, 60, 32, 949
-                 ,0])
-    @test str1 == str2
-
-    # Cleanup unicode data
-    for encoding in ["UTF-32BE", "UTF-32LE", "UTF-16BE", "UTF-16LE", "UTF-8"]
-        rm(joinpath(unicodedir,encoding*".unicode"))
-    end
-    rm(unicodedir)
-end
-
-# check utf8proc handling of CN category constants
-let c_ll = 'β', c_cn = '\u038B'
-    @test Base.UTF8proc.category_code(c_ll) == Base.UTF8proc.UTF8PROC_CATEGORY_LL
-    # check codepoint with category code CN
-    @test Base.UTF8proc.category_code(c_cn) == Base.UTF8proc.UTF8PROC_CATEGORY_CN
-end
-
-# graphemes
-let grphtest = (("b\u0300lahβlahb\u0302láh", ["b\u0300","l","a","h",
-                                              "β","l","a","h",
-                                              "b\u0302","l","á","h"]),
-                ("", UTF8String[]),
-                ("x\u0302", ["x\u0302"]),
-                ("\U1d4c1\u0302", ["\U1d4c1\u0302"]),
-                ("\U1d4c1\u0302\U1d4c1\u0300", ["\U1d4c1\u0302",
-                                                "\U1d4c1\u0300"]),
-                ("x",["x"]),
-                ("abc",["a","b","c"]))
-    for T in (utf8,utf16,utf32)
-        for nf in (:NFC, :NFD)
-            for (s, g) in grphtest
-                s_ = T(normalize_string(s, nf))
-                g_ = map(s -> normalize_string(s, nf), g)
-                grph = collect(graphemes(s_))
-                @test grph == g_
-                @test length(graphemes(s_)) == length(grph)
-            end
-            S = [T(normalize_string(s)) for (s,g) in grphtest]
-            G = map(graphemes, S)
-            @test map(graphemes, sort!(S)) == sort!(G)
-        end
-    end
-end
-
-# up-to-date character widths (#3721, #6939)
-@test charwidth('\U1f355') == strwidth("\U1f355") == strwidth(utf16("\U1f355")) == strwidth("\U1f355\u0302") == strwidth(utf16("\U1f355\u0302")) == 2
-
-# handling of embedded NUL chars (#10958)
-@test length("\0w") == length("\0α") == 2
-@test strwidth("\0w") == strwidth("\0α") == 1
-@test normalize_string("\0W", casefold=true) == "\0w"
+include("unicode/checkstring.jl")
+include("unicode/utf16.jl")
+include("unicode/utf32.jl")
+include("unicode/utf8proc.jl")
\ No newline at end of file
diff --git a/test/unicode/checkstring.jl b/test/unicode/checkstring.jl
new file mode 100644
index 0000000000000..9777e1e0ceb51
--- /dev/null
+++ b/test/unicode/checkstring.jl
@@ -0,0 +1,162 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# 11575
+# Test invalid sequences
+
+byt = 0x0 # Needs to be defined outside the try block!
+try
+    # Continuation byte not after lead
+    for byt in 0x80:0xbf
+        @test_throws UnicodeError Base.checkstring(UInt8[byt])
+    end
+
+    # Test lead bytes
+    for byt in 0xc0:0xff
+        # Single lead byte at end of string
+        @test_throws UnicodeError Base.checkstring(UInt8[byt])
+        # Lead followed by non-continuation character < 0x80
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0])
+        # Lead followed by non-continuation character > 0xbf
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0xc0])
+    end
+
+    # Test overlong 2-byte
+    for byt in 0x81:0xbf
+        @test_throws UnicodeError Base.checkstring(UInt8[0xc0,byt])
+    end
+    for byt in 0x80:0xbf
+        @test_throws UnicodeError Base.checkstring(UInt8[0xc1,byt])
+    end
+
+    # Test overlong 3-byte
+    for byt in 0x80:0x9f
+        @test_throws UnicodeError Base.checkstring(UInt8[0xe0,byt,0x80])
+    end
+
+    # Test overlong 4-byte
+    for byt in 0x80:0x8f
+        @test_throws UnicodeError Base.checkstring(UInt8[0xef,byt,0x80,0x80])
+    end
+
+    # Test 4-byte > 0x10ffff
+    for byt in 0x90:0xbf
+        @test_throws UnicodeError Base.checkstring(UInt8[0xf4,byt,0x80,0x80])
+    end
+    for byt in 0xf5:0xf7
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0x80])
+    end
+
+    # Test 5-byte
+    for byt in 0xf8:0xfb
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0x80,0x80])
+    end
+
+    # Test 6-byte
+    for byt in 0xfc:0xfd
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0x80,0x80,0x80])
+    end
+
+    # Test 7-byte
+    @test_throws UnicodeError Base.checkstring(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80])
+
+    # Three and above byte sequences
+    for byt in 0xe0:0xef
+        # Lead followed by only 1 continuation byte
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80])
+        # Lead ended by non-continuation character < 0x80
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0])
+        # Lead ended by non-continuation character > 0xbf
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0xc0])
+    end
+
+    # 3-byte encoded surrogate character(s)
+    # Single surrogate
+    @test_throws UnicodeError Base.checkstring(UInt8[0xed,0xa0,0x80])
+    # Not followed by surrogate
+    @test_throws UnicodeError Base.checkstring(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80])
+    # Trailing surrogate first
+    @test_throws UnicodeError Base.checkstring(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80])
+    # Followed by lead surrogate
+    @test_throws UnicodeError Base.checkstring(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80])
+
+    # Four byte sequences
+    for byt in 0xf0:0xf4
+        # Lead followed by only 2 continuation bytes
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80])
+        # Lead followed by non-continuation character < 0x80
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0])
+        # Lead followed by non-continuation character > 0xbf
+        @test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0xc0])
+    end
+catch exp;
+    println("Error testing checkstring: $byt, $exp")
+    throw(exp)
+end
+
+# Surrogates
+@test_throws UnicodeError Base.checkstring(UInt16[0xd800])
+@test_throws UnicodeError Base.checkstring(UInt16[0xdc00])
+@test_throws UnicodeError Base.checkstring(UInt16[0xdc00,0xd800])
+
+# Surrogates in UTF-32
+@test_throws UnicodeError Base.checkstring(UInt32[0xd800])
+@test_throws UnicodeError Base.checkstring(UInt32[0xdc00])
+@test_throws UnicodeError Base.checkstring(UInt32[0xdc00,0xd800])
+
+# Characters > 0x10ffff
+@test_throws UnicodeError Base.checkstring(UInt32[0x110000])
+
+# Test valid sequences
+for (seq, res) in (
+    (UInt8[0x0],                (1,0,0,0,0)),   # Nul byte, beginning of ASCII range
+    (UInt8[0x7f],               (1,0,0,0,0)),   # End of ASCII range
+    (UInt8[0xc0,0x80],          (1,1,0,0,0)),   # Long encoded Nul byte (Modified UTF-8, Java)
+    (UInt8[0xc2,0x80],          (1,2,0,0,1)),   # \u80, beginning of Latin1 range
+    (UInt8[0xc3,0xbf],          (1,2,0,0,1)),   # \uff, end of Latin1 range
+    (UInt8[0xc4,0x80],          (1,4,0,0,1)),   # \u100, beginning of non-Latin1 2-byte range
+    (UInt8[0xdf,0xbf],          (1,4,0,0,1)),   # \u7ff, end of non-Latin1 2-byte range
+    (UInt8[0xe0,0xa0,0x80],     (1,8,0,1,0)),   # \u800, beginning of 3-byte range
+    (UInt8[0xed,0x9f,0xbf],     (1,8,0,1,0)),   # \ud7ff, end of first part of 3-byte range
+    (UInt8[0xee,0x80,0x80],     (1,8,0,1,0)),   # \ue000, beginning of second part of 3-byte range
+    (UInt8[0xef,0xbf,0xbf],     (1,8,0,1,0)),   # \uffff, end of 3-byte range
+    (UInt8[0xf0,0x90,0x80,0x80],(1,16,1,0,0)),  # \U10000, beginning of 4-byte range
+    (UInt8[0xf4,0x8f,0xbf,0xbf],(1,16,1,0,0)),  # \U10ffff, end of 4-byte range
+    (UInt8[0xed,0xa0,0x80,0xed,0xb0,0x80], (1,0x30,1,0,0)), # Overlong \U10000, (CESU-8)
+    (UInt8[0xed,0xaf,0xbf,0xed,0xbf,0xbf], (1,0x30,1,0,0)), # Overlong \U10ffff, (CESU-8)
+    (UInt16[0x0000],            (1,0,0,0,0)),   # Nul byte, beginning of ASCII range
+    (UInt16[0x007f],            (1,0,0,0,0)),   # End of ASCII range
+    (UInt16[0x0080],            (1,2,0,0,1)),   # Beginning of Latin1 range
+    (UInt16[0x00ff],            (1,2,0,0,1)),   # End of Latin1 range
+    (UInt16[0x0100],            (1,4,0,0,1)),   # Beginning of non-Latin1 2-byte range
+    (UInt16[0x07ff],            (1,4,0,0,1)),   # End of non-Latin1 2-byte range
+    (UInt16[0x0800],            (1,8,0,1,0)),   # Beginning of 3-byte range
+    (UInt16[0xd7ff],            (1,8,0,1,0)),   # End of first part of 3-byte range
+    (UInt16[0xe000],            (1,8,0,1,0)),   # Beginning of second part of 3-byte range
+    (UInt16[0xffff],            (1,8,0,1,0)),   # End of 3-byte range
+    (UInt16[0xd800,0xdc00],     (1,16,1,0,0)),  # \U10000, beginning of 4-byte range
+    (UInt16[0xdbff,0xdfff],     (1,16,1,0,0)),  # \U10ffff, end of 4-byte range
+    (UInt32[0x0000],            (1,0,0,0,0)),   # Nul byte, beginning of ASCII range
+    (UInt32[0x007f],            (1,0,0,0,0)),   # End of ASCII range
+    (UInt32[0x0080],            (1,2,0,0,1)),   # Beginning of Latin1 range
+    (UInt32[0x00ff],            (1,2,0,0,1)),   # End of Latin1 range
+    (UInt32[0x0100],            (1,4,0,0,1)),   # Beginning of non-Latin1 2-byte range
+    (UInt32[0x07ff],            (1,4,0,0,1)),   # End of non-Latin1 2-byte range
+    (UInt32[0x0800],            (1,8,0,1,0)),   # Beginning of 3-byte range
+    (UInt32[0xd7ff],            (1,8,0,1,0)),   # End of first part of 3-byte range
+    (UInt32[0xe000],            (1,8,0,1,0)),   # Beginning of second part of 3-byte range
+    (UInt32[0xffff],            (1,8,0,1,0)),   # End of 3-byte range
+    (UInt32[0x10000],           (1,16,1,0,0)),  # \U10000, beginning of 4-byte range
+    (UInt32[0x10ffff],          (1,16,1,0,0)),  # \U10ffff, end of 4-byte range
+    (UInt32[0xd800,0xdc00],     (1,0x30,1,0,0)),# Overlong \U10000, (CESU-8)
+    (UInt32[0xdbff,0xdfff],     (1,0x30,1,0,0)))# Overlong \U10ffff, (CESU-8)
+    @test Base.checkstring(seq) == res
+end
+
+# Test bounds checking
+@test_throws BoundsError Base.checkstring(b"abcdef", -10)
+@test_throws BoundsError Base.checkstring(b"abcdef", 0)
+@test_throws BoundsError Base.checkstring(b"abcdef", 7)
+@test_throws BoundsError Base.checkstring(b"abcdef", 3, -10)
+@test_throws BoundsError Base.checkstring(b"abcdef", 3, 0)
+@test_throws BoundsError Base.checkstring(b"abcdef", 3, 7)
+@test_throws ArgumentError Base.checkstring(b"abcdef", 3, 1)
diff --git a/test/unicode/utf16.jl b/test/unicode/utf16.jl
new file mode 100644
index 0000000000000..7c5fbac5ae4ff
--- /dev/null
+++ b/test/unicode/utf16.jl
@@ -0,0 +1,14 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# UTF16
+u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a"
+u16 = utf16(u8)
+@test sizeof(u16) == 18
+@test length(u16.data) == 10 && u16.data[end] == 0
+@test length(u16) == 5
+@test utf8(u16) == u8
+@test collect(u8) == collect(u16)
+@test u8 == utf16(u16.data[1:end-1]) == utf16(copy!(Array(UInt8, 18), 1, reinterpret(UInt8, u16.data), 1, 18))
+@test u8 == utf16(pointer(u16)) == utf16(convert(Ptr{Int16}, pointer(u16)))
+@test_throws UnicodeError utf16(utf32(Char(0x120000)))
+@test_throws UnicodeError utf16(UInt8[1,2,3])
diff --git a/test/unicode/utf32.jl b/test/unicode/utf32.jl
new file mode 100644
index 0000000000000..15ddb1da56f74
--- /dev/null
+++ b/test/unicode/utf32.jl
@@ -0,0 +1,19 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# UTF32
+u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a"
+u32 = utf32(u8)
+@test sizeof(u32) == 20
+@test length(u32.data) == 6 && u32.data[end] == Char(0)
+@test length(u32) == 5
+@test utf8(u32) == u8
+@test collect(u8) == collect(u32)
+@test u8 == utf32(u32.data[1:end-1]) == utf32(copy!(Array(UInt8, 20), 1, reinterpret(UInt8, u32.data), 1, 20))
+@test u8 == utf32(pointer(u32)) == utf32(convert(Ptr{Int32}, pointer(u32)))
+@test_throws UnicodeError utf32(UInt8[1,2,3])
+
+# Wstring
+u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a"
+w = wstring(u8)
+@test length(w) == 5 && utf8(w) == u8 && collect(u8) == collect(w)
+@test u8 == WString(w.data)
diff --git a/test/unicode/utf8proc.jl b/test/unicode/utf8proc.jl
new file mode 100644
index 0000000000000..2963393cd1910
--- /dev/null
+++ b/test/unicode/utf8proc.jl
@@ -0,0 +1,259 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# normalize_string (Unicode normalization etc.):
+@test normalize_string("\u006e\u0303", :NFC) == "\u00f1"
+@test "\u006e\u0303" == normalize_string("\u00f1", :NFD)
+@test normalize_string("\ufb00", :NFC) != "ff"
+@test normalize_string("\ufb00", :NFKC) == "ff"
+@test normalize_string("\u006e\u0303\ufb00", :NFKC) == "\u00f1"*"ff"
+@test normalize_string("\u00f1\ufb00", :NFKD) == "\u006e\u0303"*"ff"
+@test normalize_string("\u006e\u0303", compose=true) == "\u00f1"
+@test "\u006e\u0303" == normalize_string("\u00f1", decompose=true)
+@test normalize_string("\u006e\u0303\u00b5",compat=true) == "\u00f1\u03bc"
+@test normalize_string("Σσς",casefold=true) == "σσσ"
+@test normalize_string("∕⁄", lump=true) == "//"
+@test normalize_string("\ua\n\r\r\ua", newline2lf=true) == "\ua\ua\ua\ua"
+@test normalize_string("\ua\n\r\r\ua", newline2ls=true) == "\u2028\u2028\u2028\u2028"
+@test normalize_string("\ua\n\r\r\ua", newline2ps=true) == "\u2029\u2029\u2029\u2029"
+@test normalize_string("\u00f1", stripmark=true) == "n"
+@test isempty(normalize_string("\u00ad", stripignore=true))
+@test normalize_string("\t\r", stripcc=true) == "  "
+@test normalize_string("\t\r", stripcc=true, newline2ls=true) == " \u2028"
+
+#Tests from Unicode SA#15, "Unicode normalization forms"
+#http://www.unicode.org/reports/tr15/
+
+#1. Canonical equivalence
+let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFC)==normalize_string(string(b...), :NFC)
+    ==(a,b) = Base.(:(==))(a,b)
+    @test ['C', '̧'] == ['Ç']
+    @test ['q', '̇', '̣'] == ['q', '̣', '̇']
+    @test ['가'] == ['ᄀ', 'ᅡ']
+    @test ['Ω'] == ['Ω']
+end
+
+#2. Compatibility Equivalence
+let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFKC)==normalize_string(string(b...), :NFKC)
+    ==(a,b) = Base.(:(==))(a,b)
+    @test ['ℌ'] == ['ℍ'] == ['H']
+    @test ['ﻨ'] == ['ﻧ'] == ['ﻦ'] == ['ﻥ']
+    @test ['①'] == ['1']
+    @test ['ｶ'] == ['カ']
+    @test ['︷'] == ['{']
+    @test ['⁹'] == ['₉']
+    @test ['㌀'] == ['ア', 'パ', 'ー', 'ト']
+    @test ['¼'] == ['1', '⁄', '4']
+    @test ['ǆ'] == ['d', 'ž']
+end
+
+#3. Singletons
+@test normalize_string("\U212b", :NFD) == "A\U030a"
+@test normalize_string("\U212b", :NFC) == "\U00c5"
+@test normalize_string("\U2126", :NFC) == normalize_string("\U2126", :NFD) == "\U03a9"
+
+#4. Canonical Composites
+@test normalize_string("\U00c5", :NFC) == "\U00c5"
+@test normalize_string("\U00c5", :NFD) == "A\U030a"
+@test normalize_string("\U00f4", :NFC) == "\U00f4"
+@test normalize_string("\U00f4", :NFD) == "o\U0302"
+
+#5. Multiple Combining Marks
+@test normalize_string("\U1e69", :NFD) == "s\U0323\U0307"
+@test normalize_string("\U1e69", :NFC) == "\U1e69"
+@test normalize_string("\U1e0b\U0323", :NFD) == "d\U0323\U0307"
+@test normalize_string("\U1e0b\U0323", :NFC) == "\U1e0d\U0307"
+@test normalize_string("q\U0307\U0323", :NFC) == "q\U0323\U0307"
+@test normalize_string("q\U0307\U0323", :NFD) == "q\U0323\U0307"
+
+#6. Compatibility Composites
+@test normalize_string("\Ufb01", :NFD) == normalize_string("\Ufb01", :NFC) == "\Ufb01"
+@test normalize_string("\Ufb01", :NFKD) == normalize_string("\Ufb01", :NFKC) == "fi"
+@test normalize_string("2\U2075", :NFD) == normalize_string("2\U2075", :NFC) == "2\U2075"
+@test normalize_string("2\U2075", :NFKD) == normalize_string("2\U2075", :NFKC) == "25"
+@test normalize_string("\U1e9b\U0323", :NFD) == "\U017f\U0323\U0307"
+@test normalize_string("\U1e9b\U0323", :NFC) == "\U1e9b\U0323"
+@test normalize_string("\U1e9b\U0323", :NFKD) == "s\U0323\U0307"
+@test normalize_string("\U1e9b\U0323", :NFKC) == "\U1e69"
+
+#issue #5939  uft8proc/libmojibake character predicates
+let
+    alower=['a', 'd', 'j', 'y', 'z']
+    ulower=['α', 'β', 'γ', 'δ', 'ф', 'я']
+    for c in vcat(alower,ulower)
+        @test islower(c) == true
+        @test isupper(c) == false
+        @test isdigit(c) == false
+        @test isnumber(c) == false
+    end
+
+    aupper=['A', 'D', 'J', 'Y', 'Z']
+    uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'ǅ', 'Ж', 'Д']
+
+    for c in vcat(aupper,uupper)
+        @test islower(c) == false
+        @test isupper(c) == true
+        @test isdigit(c) == false
+        @test isnumber(c) == false
+    end
+
+    nocase=['א','ﺵ']
+    alphas=vcat(alower,ulower,aupper,uupper,nocase)
+
+    for c in alphas
+         @test isalpha(c) == true
+         @test isnumber(c) == false
+    end
+
+
+    anumber=['0', '1', '5', '9']
+    unumber=['٣', '٥', '٨', '¹', 'ⅳ' ]
+
+    for c in anumber
+         @test isdigit(c) == true
+         @test isnumber(c) == true
+    end
+    for c in unumber
+         @test isdigit(c) == false
+         @test isnumber(c) == true
+    end
+
+    alnums=vcat(alphas,anumber,unumber)
+    for c in alnums
+         @test isalnum(c) == true
+         @test ispunct(c) == false
+    end
+
+    asymbol = ['(',')', '~', '$' ]
+    usymbol = ['∪', '∩', '⊂', '⊃', '√', '€', '¥', '↰', '△', '§']
+
+    apunct =['.',',',';',':','&']
+    upunct =['‡', '؟', '჻' ]
+
+    for c in vcat(apunct,upunct)
+         @test ispunct(c) == true
+         @test isalnum(c) == false
+    end
+
+    for c in vcat(alnums,asymbol,usymbol,apunct,upunct)
+        @test isprint(c) == true
+        @test isgraph(c) == true
+        @test isspace(c) == false
+        @test iscntrl(c) == false
+    end
+
+    NBSP = Char(0x0000A0)
+    ENSPACE = Char(0x002002)
+    EMSPACE = Char(0x002003)
+    THINSPACE = Char(0x002009)
+    ZWSPACE = Char(0x002060)
+
+    uspace = [ENSPACE, EMSPACE, THINSPACE]
+    aspace = [' ']
+    acntrl_space = ['\t', '\n', '\v', '\f', '\r']
+    for c in vcat(aspace,uspace)
+        @test isspace(c) == true
+        @test isprint(c) == true
+        @test isgraph(c) == false
+    end
+
+    for c in vcat(acntrl_space)
+        @test isspace(c) == true
+        @test isprint(c) == false
+        @test isgraph(c) == false
+    end
+
+    @test isspace(ZWSPACE) == false # zero-width space
+
+    acontrol = [ Char(0x001c), Char(0x001d), Char(0x001e), Char(0x001f)]
+    latincontrol = [ Char(0x0080), Char(0x0085) ]
+    ucontrol = [ Char(0x200E), Char(0x202E) ]
+
+    for c in vcat(acontrol, acntrl_space, latincontrol)
+        @test iscntrl(c) == true
+        @test isalnum(c) == false
+        @test isprint(c) == false
+        @test isgraph(c) == false
+    end
+
+    for c in ucontrol  #non-latin1 controls
+        if c!=Char(0x0085)
+            @test iscntrl(c) == false
+            @test isspace(c) == false
+            @test isalnum(c) == false
+            @test isprint(c) == false
+            @test isgraph(c) == false
+        end
+    end
+
+end
+
+@test isspace("  \t   \n   \r  ")==true
+@test isgraph("  \t   \n   \r  ")==false
+@test isprint("  \t   \n   \r  ")==false
+@test isalpha("  \t   \n   \r  ")==false
+@test isnumber("  \t   \n   \r  ")==false
+@test ispunct("  \t   \n   \r  ")==false
+
+@test isspace("ΣβΣβ")==false
+@test isalpha("ΣβΣβ")==true
+@test isgraph("ΣβΣβ")==true
+@test isprint("ΣβΣβ")==true
+@test isupper("ΣβΣβ")==false
+@test islower("ΣβΣβ")==false
+@test isnumber("ΣβΣβ")==false
+@test iscntrl("ΣβΣβ")==false
+@test ispunct("ΣβΣβ")==false
+
+@test isnumber("23435")==true
+@test isdigit("23435")==true
+@test isalnum("23435")==true
+@test isalpha("23435")==false
+@test iscntrl( string(Char(0x0080))) == true
+@test ispunct( "‡؟჻") ==true
+
+@test isxdigit('0') == true
+@test isxdigit("0") == true
+@test isxdigit("a") == true
+@test isxdigit("g") == false
+
+# check utf8proc handling of CN category constants
+let c_ll = 'β', c_cn = '\u038B'
+    @test Base.UTF8proc.category_code(c_ll) == Base.UTF8proc.UTF8PROC_CATEGORY_LL
+    # check codepoint with category code CN
+    @test Base.UTF8proc.category_code(c_cn) == Base.UTF8proc.UTF8PROC_CATEGORY_CN
+end
+
+# graphemes
+let grphtest = (("b\u0300lahβlahb\u0302láh", ["b\u0300","l","a","h",
+                                              "β","l","a","h",
+                                              "b\u0302","l","á","h"]),
+                ("", UTF8String[]),
+                ("x\u0302", ["x\u0302"]),
+                ("\U1d4c1\u0302", ["\U1d4c1\u0302"]),
+                ("\U1d4c1\u0302\U1d4c1\u0300", ["\U1d4c1\u0302",
+                                                "\U1d4c1\u0300"]),
+                ("x",["x"]),
+                ("abc",["a","b","c"]))
+    for T in (utf8,utf16,utf32)
+        for nf in (:NFC, :NFD)
+            for (s, g) in grphtest
+                s_ = T(normalize_string(s, nf))
+                g_ = map(s -> normalize_string(s, nf), g)
+                grph = collect(graphemes(s_))
+                @test grph == g_
+                @test length(graphemes(s_)) == length(grph)
+            end
+            S = [T(normalize_string(s)) for (s,g) in grphtest]
+            G = map(graphemes, S)
+            @test map(graphemes, sort!(S)) == sort!(G)
+        end
+    end
+end
+
+# up-to-date character widths (#3721, #6939)
+@test charwidth('\U1f355') == strwidth("\U1f355") == strwidth(utf16("\U1f355")) == strwidth("\U1f355\u0302") == strwidth(utf16("\U1f355\u0302")) == 2
+
+# handling of embedded NUL chars (#10958)
+@test length("\0w") == length("\0α") == 2
+@test strwidth("\0w") == strwidth("\0α") == 1
+@test normalize_string("\0W", casefold=true) == "\0w"