-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Abstract over base type of Yi.Rope #15
base: master
Are you sure you want to change the base?
Changes from 7 commits
1f2a383
83d4f8b
ab51bfe
c21dece
9efafd5
c3a890f
2ea77af
3e33ad8
876bb5e
c35dac9
d72f1e1
1b48f12
b2f7af5
d9a6bc9
0c0735d
d8fe019
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,596 @@ | ||
{-# language BangPatterns #-} | ||
{-# language DeriveDataTypeable #-} | ||
{-# language LambdaCase #-} | ||
{-# language MultiParamTypeClasses #-} | ||
{-# language OverloadedStrings #-} | ||
{-# language ScopedTypeVariables #-} | ||
{-# language ViewPatterns #-} | ||
{-# language FlexibleContexts #-} | ||
{-# language UndecidableInstances #-} | ||
{-# language ConstraintKinds #-} | ||
{-# options_haddock show-extensions #-} | ||
|
||
-- | | ||
-- Module : Yi.Braid | ||
-- License : GPL-2 | ||
-- Maintainer : yi-devel@googlegroups.com | ||
-- Stability : experimental | ||
-- Portability : portable | ||
-- | ||
-- This module defines a @rope@ data structure for use in Yi. This | ||
-- specific implementation uses a fingertree over Text. | ||
-- | ||
-- In contrast to our old implementation, we can now reap all the | ||
-- benefits of Text: automatic unicode handling and blazing fast | ||
-- implementation on underlying strings. This frees us from a lot of | ||
-- book-keeping. We don't lose out on not using ByteString directly | ||
-- because the old implementation encoded it into UTF8 anyway, making | ||
-- it unsuitable for storing anything but text. | ||
|
||
module Yi.Braid | ||
( Braid(..) | ||
, Chunk(..) | ||
, HasSize(..) | ||
, Yi.Braid.mkChunk | ||
, Yi.Braid.overChunk | ||
, (Yi.Braid.-|) | ||
, (Yi.Braid.|-) | ||
, Yi.Braid.reverse | ||
, Yi.Braid.toReversed | ||
, Yi.Braid.toBraid | ||
, Yi.Braid.toBraid' | ||
, Yi.Braid.extractBraid | ||
, Yi.Braid.null | ||
, Yi.Braid.empty | ||
, Yi.Braid.length | ||
, Yi.Braid.append | ||
, Yi.Braid.concat | ||
, Yi.Braid.head | ||
, Yi.Braid.last | ||
, Yi.Braid.init | ||
, Yi.Braid.tail | ||
, Yi.Braid.splitAt | ||
, Yi.Braid.take | ||
, Yi.Braid.drop | ||
, Yi.Braid.dropWhile | ||
, Yi.Braid.dropWhileEnd | ||
, Yi.Braid.takeWhile | ||
, Yi.Braid.takeWhileEnd | ||
, Yi.Braid.span | ||
, Yi.Braid.break | ||
, Yi.Braid.intercalate | ||
, Yi.Braid.intersperse | ||
, Yi.Braid.cons | ||
, Yi.Braid.snoc | ||
, Yi.Braid.singleton | ||
, Yi.Braid.any | ||
, Yi.Braid.all | ||
, Yi.Braid.filter | ||
, Yi.Braid.map | ||
, Yi.Braid.split | ||
, Yi.Braid.withChunk | ||
, Yi.Braid.unsafeWithChunk | ||
, Yi.Braid.foldl' | ||
, Yi.Braid.replicate | ||
, Yi.Braid.replicateSegment | ||
) where | ||
|
||
import Control.DeepSeq | ||
import qualified Data.FingerTree as T | ||
import Data.FingerTree hiding (null, empty, reverse, split) | ||
import qualified Data.List as L (foldl') | ||
import Data.Maybe | ||
import Data.Monoid | ||
import Data.Typeable | ||
|
||
import qualified Yi.Segment as S | ||
|
||
type ValidBraid v a = (T.Measured v (Chunk a), S.Segmented a, HasSize v) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I took me a minute to realize this has kind Constraint, so could you add a kind signature? |
||
|
||
class HasSize a where | ||
getSize :: a -> Int | ||
|
||
|
||
-- | Makes a chunk from a given string. We allow for an arbitrary | ||
-- length function here to allow us to bypass the calculation with | ||
-- 'const' in case the length is known ahead of time. In most cases, | ||
-- the use of this is | ||
-- | ||
-- > mkChunk 'TX.Text.length' someText | ||
mkChunk :: (a -> Int) -- ^ The length function to use. | ||
-> a | ||
-> Chunk a | ||
mkChunk l t = Chunk (l t) t | ||
|
||
-- | Transform the chunk content. It's vital that the transformation | ||
-- preserves the length of the content. | ||
overChunk :: (a -> a) -- ^ Length-preserving content transformation. | ||
-> Chunk a -> Chunk a | ||
overChunk f (Chunk l t) = Chunk l (f t) | ||
|
||
newtype Braid v a = Braid { fromBraid :: T.FingerTree v (Chunk a) } | ||
deriving (Show, Typeable) | ||
|
||
instance (ValidBraid v a) => Monoid (Braid v a) where | ||
mempty = Yi.Braid.empty | ||
mappend = Yi.Braid.append | ||
mconcat = Yi.Braid.concat | ||
|
||
-- | Two 'YiString's are equal if their underlying text is. | ||
-- | ||
-- Implementation note: This just uses 'TX.Text' equality as there is | ||
-- no real opportunity for optimisation here except for a cached | ||
-- length check first. We could unroll the trees and mess around with | ||
-- matching prefixes but the overhead would be higher than a simple | ||
-- conversion and relying on GHC optimisation. | ||
-- | ||
-- The derived Eq implementation for the underlying tree only passes | ||
-- the equality check if the chunks are the same too which is not what | ||
-- we want. | ||
instance (Eq a, Ord a, ValidBraid v a) => Ord (Braid v a) where | ||
compare x y = extractBraid x `compare` extractBraid y | ||
|
||
data Chunk a = Chunk { chunkSize :: {-# UNPACK #-} !Int | ||
, _fromChunk :: {-# UNPACK #-} !a | ||
} deriving (Show, Eq, Typeable) | ||
|
||
-- | Two 'YiString's are equal if their underlying text is. | ||
-- | ||
-- Implementation note: This just uses 'TX.Text' equality as there is | ||
-- no real opportunity for optimisation here except for a cached | ||
-- length check first. We could unroll the trees and mess around with | ||
-- matching prefixes but the overhead would be higher than a simple | ||
-- conversion and relying on GHC optimisation. | ||
-- | ||
-- The derived Eq implementation for the underlying tree only passes | ||
-- the equality check if the chunks are the same too which is not what | ||
-- we want. | ||
instance (ValidBraid v a, Eq a) => Eq (Braid v a) where | ||
t == t' = Yi.Braid.length t == Yi.Braid.length t' && extractBraid t == extractBraid t' | ||
|
||
instance (NFData a) => NFData (Chunk a) where | ||
rnf (Chunk !i !t) = i `seq` rnf t | ||
|
||
instance (NFData a, ValidBraid v a) => NFData (Braid v a) where | ||
rnf = rnf . extractBraid | ||
|
||
(-|) :: (ValidBraid v a) => (Chunk a) -> FingerTree v (Chunk a) -> FingerTree v (Chunk a) | ||
b -| t | chunkSize b == 0 = t | ||
| otherwise = b <| t | ||
|
||
(|-) :: (ValidBraid v a) => FingerTree v (Chunk a) -> (Chunk a) -> FingerTree v (Chunk a) | ||
t |- b | chunkSize b == 0 = t | ||
| otherwise = t |> b | ||
|
||
-- | Default size chunk to use. Currently @1200@ as this is what | ||
-- benchmarks suggest. | ||
-- | ||
-- This makes the biggest difference with 'lines'-like and | ||
-- 'concat'-like functions. Bigger chunks make 'concat' (much) faster | ||
-- but 'lines' slower. In general it seems that we benefit more from | ||
-- larger chunks and 1200 seems to be the sweet spot. | ||
defaultChunkSize :: Int | ||
defaultChunkSize = 1200 | ||
|
||
-- | Reverse the whole underlying string. | ||
-- | ||
-- This involves reversing the order of the chunks as well as content | ||
-- of the chunks. We use a little optimisation here that re-uses the | ||
-- content of each chunk but this exposes a potential problem: after | ||
-- many transformations, our chunks size might become quite varied | ||
-- (but never more than the default size), perhaps we should | ||
-- periodically rechunk the tree to recover nice sizes? | ||
reverse :: (ValidBraid v a) => Braid v a -> Braid v a | ||
reverse = Braid . fmap' (overChunk S.reverse) . T.reverse . fromBraid | ||
|
||
-- | This is like 'fromText' but it allows the user to specify the | ||
-- chunk size to be used. Uses 'defaultChunkSize' if the given | ||
-- size is <= 0. | ||
toBraid' :: forall v a. (ValidBraid v a) => Int -> a -> Braid v a | ||
toBraid' n | n <= 0 = toBraid' defaultChunkSize | ||
| otherwise = Braid . r T.empty . f | ||
where | ||
f = S.chunksOf n | ||
|
||
-- Convert the given string into chunks in the tree. We have a | ||
-- special case for a single element case: because we split on | ||
-- predetermined chunk size, we know that all chunks but the last | ||
-- one will be the specified size so we can optimise here instead | ||
-- of having to recompute chunk size at creation. | ||
r :: FingerTree v (Chunk a) -> [a] -> FingerTree v (Chunk a) | ||
r !tr [] = tr | ||
r !tr (t:[]) = tr |- mkChunk S.length t | ||
r !tr (t:ts) = let r' = tr |- mkChunk (const n) t | ||
in r r' ts | ||
|
||
-- | Converts a 'TX.Text' into a 'YiString' using | ||
-- 'defaultChunkSize'-sized chunks for the underlying tree. | ||
toBraid :: (ValidBraid v a) => a -> Braid v a | ||
toBraid = toBraid' defaultChunkSize | ||
|
||
-- | Consider whether you really need to use this! | ||
extractBraid :: forall v a. (ValidBraid v a) => Braid v a -> a | ||
extractBraid = S.concat . go . fromBraid | ||
where | ||
go :: FingerTree v (Chunk a) -> [a] | ||
go t = case viewl t of | ||
Chunk _ !c :< cs -> c : go cs | ||
EmptyL -> [] | ||
|
||
-- | Spits out the underlying string, reversed. | ||
-- | ||
-- Note that this is actually slightly faster than manually unrolling | ||
-- the tree from the end, 'TX.reverse'ing each chunk and | ||
-- 'TX.concat'ing, at least with -O2 which you really need to be using | ||
-- with 'TX.Text' anyway. | ||
toReversed :: (ValidBraid v a) => Braid v a -> a | ||
toReversed = S.reverse . extractBraid | ||
|
||
-- | Checks if the given 'YiString' is actually empty. | ||
null :: (ValidBraid v a) => Braid v a -> Bool | ||
null = T.null . fromBraid | ||
|
||
-- | Creates an empty 'YiString'. | ||
empty :: (ValidBraid v a) => Braid v a | ||
empty = Braid T.empty | ||
|
||
-- | Length of the whole underlying string. | ||
-- | ||
-- Amortized constant time. | ||
length :: (ValidBraid v a) => Braid v a -> Int | ||
length = getSize . measure . fromBraid | ||
|
||
-- | Append two 'YiString's. | ||
-- | ||
-- We take the extra time to optimise this append for many small | ||
-- insertions. With naive append of the inner fingertree with 'T.><', | ||
-- it is often the case that we end up with a large collection of tiny | ||
-- chunks. This function instead tries to join the underlying trees at | ||
-- outermost chunks up to 'defaultChunkSize' which while slower, | ||
-- should improve memory usage. | ||
-- | ||
-- I suspect that this pays for itself as we'd spend more time | ||
-- computing over all the little chunks than few large ones anyway. | ||
append :: (ValidBraid v a) => Braid v a -> Braid v a -> Braid v a | ||
append (Braid t) (Braid t') = case (viewr t, viewl t') of | ||
(EmptyR, _) -> Braid t' | ||
(_, EmptyL) -> Braid t | ||
(ts :> Chunk l x, Chunk l' x' :< ts') -> | ||
let len = l + l' in case compare len defaultChunkSize of | ||
GT -> Braid (t <> t') | ||
_ -> Braid (ts |- Chunk len (x <> x') <> ts') | ||
|
||
-- | Concat a list of 'YiString's. | ||
concat :: (ValidBraid v a) => [Braid v a] -> Braid v a | ||
concat = L.foldl' append empty | ||
|
||
-- | Take the first character of the underlying string if possible. | ||
head :: (ValidBraid v a) => Braid v a -> Maybe (S.Segment a) | ||
head (Braid t) = case viewl t of | ||
EmptyL -> Nothing | ||
Chunk _ x :< _ -> if S.null x then Nothing else Just (S.head x) | ||
|
||
-- | Take the last character of the underlying string if possible. | ||
last :: (ValidBraid v a) => Braid v a -> Maybe (S.Segment a) | ||
last (Braid t) = case viewr t of | ||
EmptyR -> Nothing | ||
_ :> Chunk _ x -> if S.null x then Nothing else Just (S.last x) | ||
|
||
-- | Takes every character but the last one: returns Nothing on empty | ||
-- string. | ||
init :: (ValidBraid v a) => Braid v a -> Maybe (Braid v a) | ||
init (Braid t) = case viewr t of | ||
EmptyR -> Nothing | ||
ts :> Chunk 0 _ -> Yi.Braid.init (Braid ts) | ||
ts :> Chunk l x -> Just . Braid $ ts |- Chunk (l - 1) (S.init x) | ||
|
||
-- | Takes the tail of the underlying string. If the string is empty | ||
-- to begin with, returns Nothing. | ||
tail :: (ValidBraid v a) => Braid v a -> Maybe (Braid v a) | ||
tail (Braid t) = case viewl t of | ||
EmptyL -> Nothing | ||
Chunk 0 _ :< ts -> Yi.Braid.tail (Braid ts) | ||
Chunk l x :< ts -> Just . Braid $ Chunk (l - 1) (S.tail x) -| ts | ||
|
||
-- | Splits the string at given character position. | ||
-- | ||
-- If @position <= 0@ then the left string is empty and the right string | ||
-- contains everything else. | ||
-- | ||
-- If @position >= length of the string@ then the left string contains | ||
-- everything and the right string is empty. | ||
-- | ||
-- Implementation note: the way this works is by splitting the | ||
-- underlying finger at a closest chunk that goes *over* the given | ||
-- position (see 'T.split'). This either results in a perfect split at | ||
-- which point we're done or more commonly, it leaves as few | ||
-- characters short and we need to take few characters from the first | ||
-- chunk of the right side of the split. We do precisely that. | ||
-- | ||
-- All together, this split is only as expensive as underlying | ||
-- 'T.split', the cost of splitting a chunk into two, the cost of one | ||
-- cons and one cons of a chunk and lastly the cost of 'T.splitAt' of | ||
-- the underlying 'TX.Text'. It turns out to be fairly fast all | ||
-- together. | ||
splitAt :: (ValidBraid v a) => Int -> Braid v a -> (Braid v a, Braid v a) | ||
splitAt n (Braid t) | ||
| n <= 0 = (mempty, Braid t) | ||
| otherwise = case viewl s of | ||
Chunk l x :< ts | n' /= 0 -> | ||
let (lx, rx) = S.splitAt n' x | ||
in (Braid $ f |> Chunk n' lx, | ||
Braid $ Chunk (l - n') rx -| ts) | ||
_ -> (Braid f, Braid s) | ||
where | ||
(f, s) = T.split ((> n) . getSize) t | ||
n' = n - getSize (measure f) | ||
|
||
-- | Takes the first n given characters. | ||
take :: (ValidBraid v a) => Int -> Braid v a -> Braid v a | ||
take 1 = maybe mempty Yi.Braid.singleton . Yi.Braid.head | ||
take n = fst . Yi.Braid.splitAt n | ||
|
||
-- | Drops the first n characters. | ||
drop :: (ValidBraid v a) => Int -> Braid v a -> Braid v a | ||
drop 1 = fromMaybe mempty . Yi.Braid.tail | ||
drop n = snd . Yi.Braid.splitAt n | ||
|
||
-- | The usual 'Prelude.dropWhile' optimised for 'YiString's. | ||
dropWhile :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> Braid v a | ||
dropWhile p = Braid . go . fromBraid | ||
where | ||
go t = case viewl t of | ||
EmptyL -> T.empty | ||
Chunk 0 _ :< ts -> go ts | ||
Chunk l x :< ts -> | ||
let r = S.dropWhile p x | ||
l' = S.length r | ||
in case compare l' l of | ||
-- We dropped nothing so we must be done. | ||
EQ -> t | ||
-- We dropped something, if it was everything then drop from | ||
-- next chunk. | ||
LT | S.null r -> go ts | ||
-- It wasn't everything and we have left-overs, we must be done. | ||
| otherwise -> Chunk l' r <| ts | ||
-- We shouldn't really get here or it would mean that | ||
-- dropping stuff resulted in more content than we had. This | ||
-- can only happen if unsafe functions don't preserve the | ||
-- chunk size and it goes out of sync with the text length. | ||
-- Preserve this abomination, it may be useful for | ||
-- debugging. | ||
_ -> Chunk l' r -| ts | ||
|
||
-- | As 'Yi.Braid.dropWhile' but drops from the end instead. | ||
dropWhileEnd :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> Braid v a | ||
dropWhileEnd p = Braid . go . fromBraid | ||
where | ||
go t = case viewr t of | ||
EmptyR -> T.empty | ||
ts :> Chunk 0 _ -> go ts | ||
ts :> Chunk l x -> | ||
let r = S.dropWhileEnd p x | ||
l' = S.length r | ||
in case compare l' l of | ||
EQ -> t | ||
LT | S.null r -> go ts | ||
| otherwise -> ts |> Chunk l' r | ||
_ -> ts |- Chunk l' r | ||
|
||
-- | The usual 'Prelude.takeWhile' optimised for 'YiString's. | ||
takeWhile :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> Braid v a | ||
takeWhile p = Braid . go . fromBraid | ||
where | ||
go t = case viewl t of | ||
EmptyL -> T.empty | ||
Chunk 0 _ :< ts -> go ts | ||
Chunk l x :< ts -> | ||
let r = S.takeWhile p x | ||
l' = S.length r | ||
in case compare l' l of | ||
-- We took the whole chunk, keep taking more. | ||
EQ -> Chunk l x -| go ts | ||
-- We took some stuff but not everything so we're done. | ||
-- Alternatively, we took more than the size chunk so | ||
-- preserve this wonder. This should only ever happen if you | ||
-- use unsafe functions and Chunk size goes out of sync with | ||
-- actual text length. | ||
_ -> T.singleton $ Chunk l' r | ||
|
||
-- | Like 'Yi.Braid.takeWhile' but takes from the end instead. | ||
takeWhileEnd :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> Braid v a | ||
takeWhileEnd p = Braid . go . fromBraid | ||
where | ||
go t = case viewr t of | ||
EmptyR -> T.empty | ||
ts :> Chunk 0 _ -> go ts | ||
ts :> Chunk l x -> case compare l' l of | ||
EQ -> go ts |> Chunk l x | ||
_ -> T.singleton $ Chunk l' r | ||
where | ||
-- no TX.takeWhileEnd – https://github.com/bos/text/issues/89 | ||
r = S.reverse . S.takeWhile p . S.reverse $ x | ||
l' = S.length r | ||
|
||
|
||
-- | Returns a pair whose first element is the longest prefix | ||
-- (possibly empty) of t of elements that satisfy p, and whose second | ||
-- is the remainder of the string. See also 'TX.span'. | ||
-- | ||
-- This implementation uses 'Yi.Braid.splitAt' which actually is just | ||
-- as fast as hand-unrolling the tree. GHC sure is great! | ||
span :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> (Braid v a, Braid v a) | ||
span p y = let x = Yi.Braid.takeWhile p y | ||
in case Yi.Braid.splitAt (Yi.Braid.length x) y of | ||
-- Re-using ‘x’ seems to gain us a minor performance | ||
-- boost. | ||
(_, y') -> (x, y') | ||
|
||
-- | Just like 'Yi.Braid.span' but with the predicate negated. | ||
break :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> (Braid v a, Braid v a) | ||
break p = Yi.Braid.span (not . p) | ||
|
||
-- | Concatenates the list of 'YiString's after inserting the | ||
-- user-provided 'YiString' between the elements. | ||
-- | ||
-- Empty 'YiString's are not ignored and will end up as strings of | ||
-- length 1. If you don't want this, it's up to you to pre-process the | ||
-- list. Just as with 'Yi.Braid.intersperse', it is up to the user to | ||
-- pre-process the list. | ||
intercalate :: (ValidBraid v a) => Braid v a -> [Braid v a] -> Braid v a | ||
intercalate _ [] = mempty | ||
intercalate (Braid t') (Braid s:ss) = Braid $ go s ss | ||
where | ||
go !acc [] = acc | ||
go acc (Braid t : ts') = go (acc >< t' >< t) ts' | ||
|
||
-- | Intersperses the given character between the 'YiString's. This is | ||
-- useful when you have a bunch of strings you just want to separate | ||
-- something with, comma or a dash. Note that it only inserts the | ||
-- character between the elements. | ||
-- | ||
-- What's more, the result is a single 'YiString'. You can easily | ||
-- achieve a version that blindly inserts elements to the back by | ||
-- mapping over the list instead of using this function. | ||
-- | ||
-- You can think of it as a specialised version of | ||
-- 'Yi.Braid.intercalate'. Note that what this does __not__ do is | ||
-- intersperse characters into the underlying text, you should convert | ||
-- and use 'TX.intersperse' for that instead. | ||
intersperse :: (ValidBraid v a) => S.Segment a -> [Braid v a] -> Braid v a | ||
intersperse _ [] = mempty | ||
intersperse c (t:ts) = go t ts | ||
where | ||
go !acc [] = acc | ||
go acc (t':ts') = go (acc <> (c `cons` t')) ts' | ||
|
||
-- | Add a 'Char' in front of a 'YiString'. | ||
cons :: (ValidBraid v a) => S.Segment a -> Braid v a -> Braid v a | ||
cons c (Braid t) = case viewl t of | ||
EmptyL -> Yi.Braid.singleton c | ||
Chunk l x :< ts | l < defaultChunkSize -> Braid $ Chunk (l + 1) (c `S.cons` x) <| ts | ||
_ -> Braid $ Chunk 1 (S.singleton c) <| t | ||
|
||
-- | Add a 'Char' in the back of a 'YiString'. | ||
snoc :: (ValidBraid v a) => Braid v a -> S.Segment a -> Braid v a | ||
snoc (Braid t) c = case viewr t of | ||
EmptyR -> Yi.Braid.singleton c | ||
ts :> Chunk l x | l < defaultChunkSize -> Braid $ ts |> Chunk (l + 1) (x `S.snoc` c) | ||
_ -> Braid $ t |> Chunk 1 (S.singleton c) | ||
|
||
-- | Single character 'YiString'. Consider whether it's worth creating | ||
-- this, maybe you can use 'cons' or 'snoc' instead? | ||
singleton :: (ValidBraid v a) => S.Segment a -> Braid v a | ||
singleton c = Braid . T.singleton $ Chunk 1 (S.singleton c) | ||
|
||
-- | 'YiString' specialised @any@. | ||
-- | ||
-- Implementation note: this currently just does any by doing ‘TX.Text’ | ||
-- conversions upon consecutive chunks. We should be able to speed it | ||
-- up by running it in parallel over multiple chunks. | ||
any :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> Bool | ||
any p = go . fromBraid | ||
where | ||
go x = case viewl x of | ||
EmptyL -> False | ||
Chunk _ t :< ts -> S.any p t || go ts | ||
|
||
-- | 'YiString' specialised @all@. | ||
-- | ||
-- See the implementation note for 'Yi.Braid.any'. | ||
all :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> Bool | ||
all p = go . fromBraid | ||
where | ||
go x = case viewl x of | ||
EmptyL -> True | ||
Chunk _ t :< ts -> S.all p t && go ts | ||
|
||
-- | Filters the characters from the underlying string. | ||
-- | ||
-- >>> filter (/= 'a') "bac" | ||
-- "bc" | ||
filter :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> Braid v a | ||
filter p = Braid . go . fromBraid | ||
where | ||
go t = case viewl t of | ||
EmptyL -> T.empty | ||
Chunk _ x :< ts -> mkChunk S.length (S.filter p x) -| go ts | ||
|
||
-- | Maps the characters over the underlying string. | ||
map :: (ValidBraid v a) => (S.Segment a -> S.Segment a) -> Braid v a -> Braid v a | ||
map f = Braid . go . fromBraid | ||
where | ||
go t = case viewl t of | ||
EmptyL -> T.empty | ||
Chunk l x :< ts -> Chunk l (S.map f x) <| go ts | ||
|
||
-- | Splits the 'YiString' on characters matching the predicate, like | ||
-- 'TX.split'. | ||
-- | ||
-- For splitting on newlines use 'Yi.Braid.lines' or 'Yi.Braid.lines'' | ||
-- instead. | ||
-- | ||
-- Implementation note: GHC actually makes this naive implementation | ||
-- about as fast and in cases with lots of splits, faster, as a | ||
-- hand-rolled version on chunks with appends which is quite amazing | ||
-- in itself. | ||
split :: (ValidBraid v a) => (S.Segment a -> Bool) -> Braid v a -> [Braid v a] | ||
split p = fmap toBraid . S.split p . extractBraid | ||
|
||
-- | Left fold. | ||
-- | ||
-- Benchmarks show that folding is actually Pretty Damn Slow™: consider | ||
-- whether folding is really the best thing to use in your scenario. | ||
foldl' :: (ValidBraid v a) => (b -> S.Segment a -> b) -> b -> Braid v a -> b | ||
foldl' f a = go a . fromBraid | ||
where | ||
go acc t = case viewl t of | ||
EmptyL -> acc | ||
Chunk _ x :< ts -> let r = S.foldl f acc x | ||
in r `seq` go r ts | ||
|
||
-- | Replicate the given YiString set number of times, concatenating | ||
-- the results. Also see 'Yi.Braid.replicateChar'. | ||
replicate :: (ValidBraid v a) => Int -> Braid v a -> Braid v a | ||
replicate n t | n <= 0 = mempty | ||
| otherwise = t <> Yi.Braid.replicate (n - 1) t | ||
|
||
-- | Replicate the given character set number of times and pack the | ||
-- result into a 'YiString'. | ||
-- | ||
-- >>> replicateChar 4 ' ' | ||
-- " " | ||
replicateSegment :: (ValidBraid v a) => Int -> S.Segment a -> Braid v a | ||
replicateSegment n = toBraid . S.replicate n . S.singleton | ||
|
||
-- | Helper function doing conversions of to and from underlying | ||
-- 'TX.Text'. You should aim to implement everything in terms of | ||
-- 'YiString' instead. | ||
-- | ||
-- Please note that this maps over each __chunk__ so this can only be | ||
-- used with layout-agnostic functions. For example | ||
-- | ||
-- >>> let t = 'fromString' "abc" <> 'fromString' "def" | ||
-- >>> 'toString' $ 'withText' 'TX.reverse' t | ||
-- "cbafed" | ||
-- | ||
-- Probably doesn't do what you wanted, but 'TX.toUpper' would. | ||
-- Specifically, for any @f : 'TX.Text' → 'TX.Text'@, 'withText' will | ||
-- only do the ‘expected’ thing iff | ||
-- | ||
-- @f x <> f y ≡ f (x <> y)@ | ||
-- | ||
-- which should look very familiar. | ||
withChunk :: (ValidBraid v a) => (a -> a) -> Braid v a -> Braid v a | ||
withChunk f = Braid . T.fmap' (mkChunk S.length . f . _fromChunk) . fromBraid | ||
|
||
-- | Maps over each __chunk__ which means this function is UNSAFE! If | ||
-- you use this with functions which don't preserve 'Size', that is | ||
-- the chunk length and number of newlines, things will break really, | ||
-- really badly. You should not need to use this. | ||
-- | ||
-- Also see 'T.unsafeFmap' | ||
unsafeWithChunk :: (ValidBraid v a) => (a -> a) -> Braid v a -> Braid v a | ||
unsafeWithChunk f = Braid . T.unsafeFmap g . fromBraid | ||
where | ||
g (Chunk l t) = Chunk l (f t) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
{-# language OverloadedStrings #-} | ||
{-# language ScopedTypeVariables #-} | ||
{-# language ViewPatterns #-} | ||
{-# language FlexibleInstances #-} | ||
{-# options_haddock show-extensions #-} | ||
|
||
-- | | ||
|
@@ -69,7 +70,6 @@ import qualified Data.ByteString.Lazy as BSL | |
import Data.Char (isSpace) | ||
import qualified Data.FingerTree as T | ||
import Data.FingerTree hiding (null, empty, reverse, split) | ||
import qualified Data.List as L (foldl') | ||
import Data.Maybe | ||
import Data.Monoid | ||
import Data.String (IsString(..)) | ||
|
@@ -81,18 +81,21 @@ import qualified Data.Text.IO as TXIO (writeFile) | |
import Data.Typeable | ||
import Prelude hiding (drop) | ||
|
||
import qualified Yi.Braid as B | ||
|
||
-- | Used to cache the size of the strings. | ||
data Size = Indices { charIndex :: {-# UNPACK #-} !Int | ||
-- ^ How many characters under here? | ||
, lineIndex :: Int | ||
-- ^ How many lines under here? | ||
} deriving (Eq, Show, Typeable) | ||
|
||
instance B.HasSize Size where | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This typeclass would allow us to extract the size of a chunk's measure for custom measures. |
||
getSize = charIndex | ||
|
||
-- | A chunk storing the string of the type it is indexed by. It | ||
-- caches the length of stored string. | ||
data YiChunk = Chunk { chunkSize :: {-# UNPACK #-} !Int | ||
, _fromChunk :: {-# UNPACK #-} !TX.Text | ||
} deriving (Show, Eq, Typeable) | ||
type YiChunk = B.Chunk TX.Text | ||
|
||
-- | Makes a chunk from a given string. We allow for an arbitrary | ||
-- length function here to allow us to bypass the calculation with | ||
|
@@ -103,13 +106,7 @@ data YiChunk = Chunk { chunkSize :: {-# UNPACK #-} !Int | |
mkChunk :: (TX.Text -> Int) -- ^ The length function to use. | ||
-> TX.Text | ||
-> YiChunk | ||
mkChunk l t = Chunk (l t) t | ||
|
||
-- | Transform the chunk content. It's vital that the transformation | ||
-- preserves the length of the content. | ||
overChunk :: (TX.Text -> TX.Text) -- ^ Length-preserving content transformation. | ||
-> YiChunk -> YiChunk | ||
overChunk f (Chunk l t) = Chunk l (f t) | ||
mkChunk = B.mkChunk | ||
|
||
-- | Counts number of newlines in the given 'TX.Text'. | ||
countNl :: TX.Text -> Int | ||
|
@@ -120,64 +117,26 @@ instance Monoid Size where | |
Indices c l `mappend` Indices c' l' = Indices (c + c') (l + l') | ||
|
||
instance Measured Size YiChunk where | ||
measure (Chunk l t) = Indices l (countNl t) | ||
measure (B.Chunk l t) = Indices l (countNl t) | ||
|
||
-- | A 'YiString' is a 'FingerTree' with cached char and line counts | ||
-- over chunks of 'TX.Text'. | ||
newtype YiString = YiString { fromRope :: FingerTree Size YiChunk } | ||
deriving (Show, Typeable) | ||
type YiString = B.Braid Size TX.Text | ||
|
||
-- | Two 'YiString's are equal if their underlying text is. | ||
-- | ||
-- Implementation note: This just uses 'TX.Text' equality as there is | ||
-- no real opportunity for optimisation here except for a cached | ||
-- length check first. We could unroll the trees and mess around with | ||
-- matching prefixes but the overhead would be higher than a simple | ||
-- conversion and relying on GHC optimisation. | ||
-- | ||
-- The derived Eq implementation for the underlying tree only passes | ||
-- the equality check if the chunks are the same too which is not what | ||
-- we want. | ||
instance Eq YiString where | ||
t == t' = Yi.Rope.length t == Yi.Rope.length t' && toText t == toText t' | ||
fromRope :: B.Braid v a -> FingerTree v (B.Chunk a) | ||
fromRope = B.fromBraid | ||
|
||
instance NFData Size where | ||
rnf (Indices !c !l) = c `seq` l `seq` () | ||
|
||
instance NFData YiChunk where | ||
rnf (Chunk !i !t) = i `seq` rnf t | ||
|
||
instance NFData YiString where | ||
rnf = rnf . toText | ||
|
||
instance IsString YiString where | ||
fromString = Yi.Rope.fromString | ||
|
||
instance Monoid YiString where | ||
mempty = Yi.Rope.empty | ||
mappend = Yi.Rope.append | ||
mconcat = Yi.Rope.concat | ||
|
||
instance Ord YiString where | ||
compare x y = toText x `compare` toText y | ||
|
||
(-|) :: YiChunk -> FingerTree Size YiChunk -> FingerTree Size YiChunk | ||
b -| t | chunkSize b == 0 = t | ||
| otherwise = b <| t | ||
(-|) = (B.-|) | ||
|
||
(|-) :: FingerTree Size YiChunk -> YiChunk -> FingerTree Size YiChunk | ||
t |- b | chunkSize b == 0 = t | ||
| otherwise = t |> b | ||
|
||
-- | Default size chunk to use. Currently @1200@ as this is what | ||
-- benchmarks suggest. | ||
-- | ||
-- This makes the biggest difference with 'lines'-like and | ||
-- 'concat'-like functions. Bigger chunks make 'concat' (much) faster | ||
-- but 'lines' slower. In general it seems that we benefit more from | ||
-- larger chunks and 1200 seems to be the sweet spot. | ||
defaultChunkSize :: Int | ||
defaultChunkSize = 1200 | ||
(|-) = (B.|-) | ||
|
||
-- | Reverse the whole underlying string. | ||
-- | ||
|
@@ -188,7 +147,7 @@ defaultChunkSize = 1200 | |
-- (but never more than the default size), perhaps we should | ||
-- periodically rechunk the tree to recover nice sizes? | ||
reverse :: YiString -> YiString | ||
reverse = YiString . fmap' (overChunk TX.reverse) . T.reverse . fromRope | ||
reverse = B.reverse | ||
|
||
-- | See 'fromText'. | ||
fromString :: String -> YiString | ||
|
@@ -214,38 +173,19 @@ toReverseString = TX.unpack . toReverseText | |
-- chunk size to be used. Uses 'defaultChunkSize' if the given | ||
-- size is <= 0. | ||
fromText' :: Int -> TX.Text -> YiString | ||
fromText' n | n <= 0 = fromText' defaultChunkSize | ||
| otherwise = YiString . r T.empty . f | ||
where | ||
f = TX.chunksOf n | ||
|
||
-- Convert the given string into chunks in the tree. We have a | ||
-- special case for a single element case: because we split on | ||
-- predetermined chunk size, we know that all chunks but the last | ||
-- one will be the specified size so we can optimise here instead | ||
-- of having to recompute chunk size at creation. | ||
r :: FingerTree Size YiChunk -> [TX.Text] -> FingerTree Size YiChunk | ||
r !tr [] = tr | ||
r !tr (t:[]) = tr |- mkChunk TX.length t | ||
r !tr (t:ts) = let r' = tr |- mkChunk (const n) t | ||
in r r' ts | ||
fromText' = B.toBraid' | ||
|
||
-- | Converts a 'TX.Text' into a 'YiString' using | ||
-- 'defaultChunkSize'-sized chunks for the underlying tree. | ||
fromText :: TX.Text -> YiString | ||
fromText = fromText' defaultChunkSize | ||
fromText = B.toBraid | ||
|
||
fromLazyText :: TXL.Text -> YiString | ||
fromLazyText = YiString . T.fromList . fmap (mkChunk TX.length) . TXL.toChunks | ||
fromLazyText = B.Braid . T.fromList . fmap (mkChunk TX.length) . TXL.toChunks | ||
|
||
-- | Consider whether you really need to use this! | ||
toText :: YiString -> TX.Text | ||
toText = TX.concat . go . fromRope | ||
where | ||
go :: FingerTree Size YiChunk -> [TX.Text] | ||
go t = case viewl t of | ||
Chunk _ !c :< cs -> c : go cs | ||
EmptyL -> [] | ||
toText = B.extractBraid | ||
|
||
-- | Spits out the underlying string, reversed. | ||
-- | ||
|
@@ -258,23 +198,23 @@ toReverseText = TX.reverse . toText | |
|
||
-- | Checks if the given 'YiString' is actually empty. | ||
null :: YiString -> Bool | ||
null = T.null . fromRope | ||
null = B.null | ||
|
||
-- | Creates an empty 'YiString'. | ||
empty :: YiString | ||
empty = YiString T.empty | ||
empty = B.empty | ||
|
||
-- | Length of the whole underlying string. | ||
-- | ||
-- Amortized constant time. | ||
length :: YiString -> Int | ||
length = charIndex . measure . fromRope | ||
length = B.length | ||
|
||
-- | Count the number of newlines in the underlying string. This is | ||
-- actually amortized constant time as we cache this information in | ||
-- the underlying tree. | ||
countNewLines :: YiString -> Int | ||
countNewLines = lineIndex . measure . fromRope | ||
countNewLines = lineIndex . measure . B.fromBraid | ||
|
||
-- | Append two 'YiString's. | ||
-- | ||
|
@@ -288,45 +228,29 @@ countNewLines = lineIndex . measure . fromRope | |
-- I suspect that this pays for itself as we'd spend more time | ||
-- computing over all the little chunks than few large ones anyway. | ||
append :: YiString -> YiString -> YiString | ||
append (YiString t) (YiString t') = case (viewr t, viewl t') of | ||
(EmptyR, _) -> YiString t' | ||
(_, EmptyL) -> YiString t | ||
(ts :> Chunk l x, Chunk l' x' :< ts') -> | ||
let len = l + l' in case compare len defaultChunkSize of | ||
GT -> YiString (t <> t') | ||
_ -> YiString (ts |- Chunk len (x <> x') <> ts') | ||
append = B.append | ||
|
||
-- | Concat a list of 'YiString's. | ||
concat :: [YiString] -> YiString | ||
concat = L.foldl' append empty | ||
concat = B.concat | ||
|
||
-- | Take the first character of the underlying string if possible. | ||
head :: YiString -> Maybe Char | ||
head (YiString t) = case viewl t of | ||
EmptyL -> Nothing | ||
Chunk _ x :< _ -> if TX.null x then Nothing else Just (TX.head x) | ||
head = B.head | ||
|
||
-- | Take the last character of the underlying string if possible. | ||
last :: YiString -> Maybe Char | ||
last (YiString t) = case viewr t of | ||
EmptyR -> Nothing | ||
_ :> Chunk _ x -> if TX.null x then Nothing else Just (TX.last x) | ||
last = B.last | ||
|
||
-- | Takes every character but the last one: returns Nothing on empty | ||
-- string. | ||
init :: YiString -> Maybe YiString | ||
init (YiString t) = case viewr t of | ||
EmptyR -> Nothing | ||
ts :> Chunk 0 _ -> Yi.Rope.init (YiString ts) | ||
ts :> Chunk l x -> Just . YiString $ ts |- Chunk (l - 1) (TX.init x) | ||
init = B.init | ||
|
||
-- | Takes the tail of the underlying string. If the string is empty | ||
-- to begin with, returns Nothing. | ||
tail :: YiString -> Maybe YiString | ||
tail (YiString t) = case viewl t of | ||
EmptyL -> Nothing | ||
Chunk 0 _ :< ts -> Yi.Rope.tail (YiString ts) | ||
Chunk l x :< ts -> Just . YiString $ Chunk (l - 1) (TX.tail x) -| ts | ||
tail = B.tail | ||
|
||
-- | Splits the string at given character position. | ||
-- | ||
|
@@ -349,105 +273,31 @@ tail (YiString t) = case viewl t of | |
-- the underlying 'TX.Text'. It turns out to be fairly fast all | ||
-- together. | ||
splitAt :: Int -> YiString -> (YiString, YiString) | ||
splitAt n (YiString t) | ||
| n <= 0 = (mempty, YiString t) | ||
| otherwise = case viewl s of | ||
Chunk l x :< ts | n' /= 0 -> | ||
let (lx, rx) = TX.splitAt n' x | ||
in (YiString $ f |> Chunk n' lx, | ||
YiString $ Chunk (l - n') rx -| ts) | ||
_ -> (YiString f, YiString s) | ||
where | ||
(f, s) = T.split ((> n) . charIndex) t | ||
n' = n - charIndex (measure f) | ||
splitAt = B.splitAt | ||
|
||
-- | Takes the first n given characters. | ||
take :: Int -> YiString -> YiString | ||
take 1 = maybe mempty Yi.Rope.singleton . Yi.Rope.head | ||
take n = fst . Yi.Rope.splitAt n | ||
take = B.take | ||
|
||
-- | Drops the first n characters. | ||
drop :: Int -> YiString -> YiString | ||
drop 1 = fromMaybe mempty . Yi.Rope.tail | ||
drop n = snd . Yi.Rope.splitAt n | ||
drop = B.drop | ||
|
||
-- | The usual 'Prelude.dropWhile' optimised for 'YiString's. | ||
dropWhile :: (Char -> Bool) -> YiString -> YiString | ||
dropWhile p = YiString . go . fromRope | ||
where | ||
go t = case viewl t of | ||
EmptyL -> T.empty | ||
Chunk 0 _ :< ts -> go ts | ||
Chunk l x :< ts -> | ||
let r = TX.dropWhile p x | ||
l' = TX.length r | ||
in case compare l' l of | ||
-- We dropped nothing so we must be done. | ||
EQ -> t | ||
-- We dropped something, if it was everything then drop from | ||
-- next chunk. | ||
LT | TX.null r -> go ts | ||
-- It wasn't everything and we have left-overs, we must be done. | ||
| otherwise -> Chunk l' r <| ts | ||
-- We shouldn't really get here or it would mean that | ||
-- dropping stuff resulted in more content than we had. This | ||
-- can only happen if unsafe functions don't preserve the | ||
-- chunk size and it goes out of sync with the text length. | ||
-- Preserve this abomination, it may be useful for | ||
-- debugging. | ||
_ -> Chunk l' r -| ts | ||
dropWhile = B.dropWhile | ||
|
||
-- | As 'Yi.Rope.dropWhile' but drops from the end instead. | ||
dropWhileEnd :: (Char -> Bool) -> YiString -> YiString | ||
dropWhileEnd p = YiString . go . fromRope | ||
where | ||
go t = case viewr t of | ||
EmptyR -> T.empty | ||
ts :> Chunk 0 _ -> go ts | ||
ts :> Chunk l x -> | ||
let r = TX.dropWhileEnd p x | ||
l' = TX.length r | ||
in case compare l' l of | ||
EQ -> t | ||
LT | TX.null r -> go ts | ||
| otherwise -> ts |> Chunk l' r | ||
_ -> ts |- Chunk l' r | ||
dropWhileEnd = B.dropWhileEnd | ||
|
||
-- | The usual 'Prelude.takeWhile' optimised for 'YiString's. | ||
takeWhile :: (Char -> Bool) -> YiString -> YiString | ||
takeWhile p = YiString . go . fromRope | ||
where | ||
go t = case viewl t of | ||
EmptyL -> T.empty | ||
Chunk 0 _ :< ts -> go ts | ||
Chunk l x :< ts -> | ||
let r = TX.takeWhile p x | ||
l' = TX.length r | ||
in case compare l' l of | ||
-- We took the whole chunk, keep taking more. | ||
EQ -> Chunk l x -| go ts | ||
-- We took some stuff but not everything so we're done. | ||
-- Alternatively, we took more than the size chunk so | ||
-- preserve this wonder. This should only ever happen if you | ||
-- use unsafe functions and Chunk size goes out of sync with | ||
-- actual text length. | ||
_ -> T.singleton $ Chunk l' r | ||
takeWhile = B.takeWhile | ||
|
||
-- | Like 'Yi.Rope.takeWhile' but takes from the end instead. | ||
takeWhileEnd :: (Char -> Bool) -> YiString -> YiString | ||
takeWhileEnd p = YiString . go . fromRope | ||
where | ||
go t = case viewr t of | ||
EmptyR -> T.empty | ||
ts :> Chunk 0 _ -> go ts | ||
ts :> Chunk l x -> case compare l' l of | ||
EQ -> go ts |> Chunk l x | ||
_ -> T.singleton $ Chunk l' r | ||
where | ||
-- no TX.takeWhileEnd – https://github.com/bos/text/issues/89 | ||
r = TX.reverse . TX.takeWhile p . TX.reverse $ x | ||
l' = TX.length r | ||
|
||
takeWhileEnd = B.takeWhileEnd | ||
|
||
-- | Returns a pair whose first element is the longest prefix | ||
-- (possibly empty) of t of elements that satisfy p, and whose second | ||
|
@@ -456,15 +306,11 @@ takeWhileEnd p = YiString . go . fromRope | |
-- This implementation uses 'Yi.Rope.splitAt' which actually is just | ||
-- as fast as hand-unrolling the tree. GHC sure is great! | ||
span :: (Char -> Bool) -> YiString -> (YiString, YiString) | ||
span p y = let x = Yi.Rope.takeWhile p y | ||
in case Yi.Rope.splitAt (Yi.Rope.length x) y of | ||
-- Re-using ‘x’ seems to gain us a minor performance | ||
-- boost. | ||
(_, y') -> (x, y') | ||
span = B.span | ||
|
||
-- | Just like 'Yi.Rope.span' but with the predicate negated. | ||
break :: (Char -> Bool) -> YiString -> (YiString, YiString) | ||
break p = Yi.Rope.span (not . p) | ||
break = B.break | ||
|
||
-- | Concatenates the list of 'YiString's after inserting the | ||
-- user-provided 'YiString' between the elements. | ||
|
@@ -474,11 +320,7 @@ break p = Yi.Rope.span (not . p) | |
-- list. Just as with 'Yi.Rope.intersperse', it is up to the user to | ||
-- pre-process the list. | ||
intercalate :: YiString -> [YiString] -> YiString | ||
intercalate _ [] = mempty | ||
intercalate (YiString t') (YiString s:ss) = YiString $ go s ss | ||
where | ||
go !acc [] = acc | ||
go acc (YiString t : ts') = go (acc >< t' >< t) ts' | ||
intercalate = B.intercalate | ||
|
||
-- | Intersperses the given character between the 'YiString's. This is | ||
-- useful when you have a bunch of strings you just want to separate | ||
|
@@ -494,30 +336,20 @@ intercalate (YiString t') (YiString s:ss) = YiString $ go s ss | |
-- intersperse characters into the underlying text, you should convert | ||
-- and use 'TX.intersperse' for that instead. | ||
intersperse :: Char -> [YiString] -> YiString | ||
intersperse _ [] = mempty | ||
intersperse c (t:ts) = go t ts | ||
where | ||
go !acc [] = acc | ||
go acc (t':ts') = go (acc <> (c `cons` t')) ts' | ||
intersperse = B.intersperse | ||
|
||
-- | Add a 'Char' in front of a 'YiString'. | ||
cons :: Char -> YiString -> YiString | ||
cons c (YiString t) = case viewl t of | ||
EmptyL -> Yi.Rope.singleton c | ||
Chunk l x :< ts | l < defaultChunkSize -> YiString $ Chunk (l + 1) (c `TX.cons` x) <| ts | ||
_ -> YiString $ Chunk 1 (TX.singleton c) <| t | ||
cons = B.cons | ||
|
||
-- | Add a 'Char' in the back of a 'YiString'. | ||
snoc :: YiString -> Char -> YiString | ||
snoc (YiString t) c = case viewr t of | ||
EmptyR -> Yi.Rope.singleton c | ||
ts :> Chunk l x | l < defaultChunkSize -> YiString $ ts |> Chunk (l + 1) (x `TX.snoc` c) | ||
_ -> YiString $ t |> Chunk 1 (TX.singleton c) | ||
snoc = B.snoc | ||
|
||
-- | Single character 'YiString'. Consider whether it's worth creating | ||
-- this, maybe you can use 'cons' or 'snoc' instead? | ||
singleton :: Char -> YiString | ||
singleton c = YiString . T.singleton $ Chunk 1 (TX.singleton c) | ||
singleton = B.singleton | ||
|
||
-- | Splits the underlying string before the given line number. | ||
-- Zero-indexed lines. | ||
|
@@ -540,13 +372,13 @@ splitAtLine n r | n <= 0 = (empty, r) | |
-- now looking for extra newlines in the next chunk rather than extra | ||
-- characters. | ||
splitAtLine' :: Int -> YiString -> (YiString, YiString) | ||
splitAtLine' p (YiString tr) = case viewl s of | ||
ch@(Chunk _ x) :< r -> | ||
splitAtLine' p (B.Braid tr) = case viewl s of | ||
ch@(B.Chunk _ x) :< r -> | ||
let excess = lineIndex (measure f) + lineIndex (measure ch) - p - 1 | ||
(lx, rx) = cutExcess excess x | ||
in (YiString $ f |- mkChunk TX.length lx, | ||
YiString $ mkChunk TX.length rx -| r) | ||
_ -> (YiString f, YiString s) | ||
in (B.Braid $ f |- mkChunk TX.length lx, | ||
B.Braid $ mkChunk TX.length rx -| r) | ||
_ -> (B.Braid f, B.Braid s) | ||
where | ||
(f, s) = T.split ((p <) . lineIndex) tr | ||
|
||
|
@@ -575,13 +407,13 @@ splitAtLine' p (YiString tr) = case viewl s of | |
lines :: YiString -> [YiString] | ||
lines = Prelude.map dropNl . lines' | ||
where | ||
dropNl (YiString t) = case viewr t of | ||
dropNl (B.Braid t) = case viewr t of | ||
EmptyR -> Yi.Rope.empty | ||
ts :> ch@(Chunk l tx) -> | ||
YiString $ ts |- if TX.null tx | ||
ts :> ch@(B.Chunk l tx) -> | ||
B.Braid $ ts |- if TX.null tx | ||
then ch | ||
else case TX.last tx of | ||
'\n' -> Chunk (l - 1) (TX.init tx) | ||
'\n' -> B.Chunk (l - 1) (TX.init tx) | ||
_ -> ch | ||
|
||
-- | Splits the 'YiString' into a list of 'YiString' each containing a | ||
|
@@ -600,10 +432,10 @@ lines = Prelude.map dropNl . lines' | |
-- but the underlying structure might change: notably, chunks will | ||
-- most likely change sizes. | ||
lines' :: YiString -> [YiString] | ||
lines' t = let (YiString f, YiString s) = splitAtLine' 0 t | ||
lines' t = let (B.Braid f, B.Braid s) = splitAtLine' 0 t | ||
in if T.null s | ||
then if T.null f then [] else [YiString f] | ||
else YiString f : lines' (YiString s) | ||
then if T.null f then [] else [B.Braid f] | ||
else B.Braid f : lines' (B.Braid s) | ||
|
||
-- | Joins up lines by a newline character. It does not leave a | ||
-- newline after the last line. If you want a more classical | ||
|
@@ -618,21 +450,13 @@ unlines = Yi.Rope.intersperse '\n' | |
-- conversions upon consecutive chunks. We should be able to speed it | ||
-- up by running it in parallel over multiple chunks. | ||
any :: (Char -> Bool) -> YiString -> Bool | ||
any p = go . fromRope | ||
where | ||
go x = case viewl x of | ||
EmptyL -> False | ||
Chunk _ t :< ts -> TX.any p t || go ts | ||
any = B.any | ||
|
||
-- | 'YiString' specialised @all@. | ||
-- | ||
-- See the implementation note for 'Yi.Rope.any'. | ||
all :: (Char -> Bool) -> YiString -> Bool | ||
all p = go . fromRope | ||
where | ||
go x = case viewl x of | ||
EmptyL -> True | ||
Chunk _ t :< ts -> TX.all p t && go ts | ||
all = B.all | ||
|
||
-- | To serialise a 'YiString', we turn it into a regular 'String' | ||
-- first. | ||
|
@@ -677,19 +501,11 @@ readFile fp = BSL.readFile fp >>= go decoders | |
-- >>> filter (/= 'a') "bac" | ||
-- "bc" | ||
filter :: (Char -> Bool) -> YiString -> YiString | ||
filter p = YiString . go . fromRope | ||
where | ||
go t = case viewl t of | ||
EmptyL -> T.empty | ||
Chunk _ x :< ts -> mkChunk TX.length (TX.filter p x) -| go ts | ||
filter = B.filter | ||
|
||
-- | Maps the characters over the underlying string. | ||
map :: (Char -> Char) -> YiString -> YiString | ||
map f = YiString . go . fromRope | ||
where | ||
go t = case viewl t of | ||
EmptyL -> T.empty | ||
Chunk l x :< ts -> Chunk l (TX.map f x) <| go ts | ||
map = B.map | ||
|
||
-- | Join given 'YiString's with a space. Empty lines will be filtered | ||
-- out first. | ||
|
@@ -713,33 +529,27 @@ words = Prelude.filter (not . Yi.Rope.null) . Yi.Rope.split isSpace | |
-- hand-rolled version on chunks with appends which is quite amazing | ||
-- in itself. | ||
split :: (Char -> Bool) -> YiString -> [YiString] | ||
split p = fmap fromText . TX.split p . toText | ||
split = B.split | ||
|
||
-- | Left fold. | ||
-- | ||
-- Benchmarks show that folding is actually Pretty Damn Slow™: consider | ||
-- whether folding is really the best thing to use in your scenario. | ||
foldl' :: (a -> Char -> a) -> a -> YiString -> a | ||
foldl' f a = go a . fromRope | ||
where | ||
go acc t = case viewl t of | ||
EmptyL -> acc | ||
Chunk _ x :< ts -> let r = TX.foldl' f acc x | ||
in r `seq` go r ts | ||
foldl' = B.foldl' | ||
|
||
-- | Replicate the given YiString set number of times, concatenating | ||
-- the results. Also see 'Yi.Rope.replicateChar'. | ||
-- the results. Also see 'Yi.Braid.replicateChar'. | ||
replicate :: Int -> YiString -> YiString | ||
replicate n t | n <= 0 = mempty | ||
| otherwise = t <> Yi.Rope.replicate (n - 1) t | ||
replicate = B.replicate | ||
|
||
-- | Replicate the given character set number of times and pack the | ||
-- result into a 'YiString'. | ||
-- | ||
-- >>> replicateChar 4 ' ' | ||
-- " " | ||
replicateChar :: Int -> Char -> YiString | ||
replicateChar n = fromText . TX.replicate n . TX.singleton | ||
replicateChar = B.replicateSegment | ||
|
||
-- | Helper function doing conversions of to and from underlying | ||
-- 'TX.Text'. You should aim to implement everything in terms of | ||
|
@@ -760,7 +570,7 @@ replicateChar n = fromText . TX.replicate n . TX.singleton | |
-- | ||
-- which should look very familiar. | ||
withText :: (TX.Text -> TX.Text) -> YiString -> YiString | ||
withText f = YiString . T.fmap' (mkChunk TX.length . f . _fromChunk) . fromRope | ||
withText = B.withChunk | ||
|
||
-- | Maps over each __chunk__ which means this function is UNSAFE! If | ||
-- you use this with functions which don't preserve 'Size', that is | ||
|
@@ -769,6 +579,4 @@ withText f = YiString . T.fmap' (mkChunk TX.length . f . _fromChunk) . fromRope | |
-- | ||
-- Also see 'T.unsafeFmap' | ||
unsafeWithText :: (TX.Text -> TX.Text) -> YiString -> YiString | ||
unsafeWithText f = YiString . T.unsafeFmap g . fromRope | ||
where | ||
g (Chunk l t) = Chunk l (f t) | ||
unsafeWithText = B.unsafeWithChunk |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
{-# language TypeFamilies #-} | ||
module Yi.Segment where | ||
|
||
import qualified Data.Text as TX | ||
|
||
class (Monoid t) => Segmented t where | ||
type Segment t | ||
length :: t -> Int | ||
null :: t -> Bool | ||
head :: t -> Segment t | ||
last :: t -> Segment t | ||
init :: t -> t | ||
tail :: t -> t | ||
take :: Int -> t -> t | ||
drop :: Int -> t -> t | ||
singleton :: Segment t -> t | ||
splitAt :: Int -> t -> (t, t) | ||
cons :: Segment t -> t -> t | ||
filter :: (Segment t -> Bool) -> t -> t | ||
any :: (Segment t -> Bool) -> t -> Bool | ||
all :: (Segment t -> Bool) -> t -> Bool | ||
reverse :: t -> t | ||
takeWhile :: (Segment t -> Bool) -> t -> t | ||
dropWhile :: (Segment t -> Bool) -> t -> t | ||
dropWhileEnd :: (Segment t -> Bool) -> t -> t | ||
concat :: [t] -> t | ||
split :: (Segment t -> Bool) -> t -> [t] | ||
chunksOf :: Int -> t -> [t] | ||
snoc :: t -> Segment t -> t | ||
foldl :: (a -> Segment t -> a) -> a -> t -> a | ||
map :: (Segment t -> Segment t) -> t -> t | ||
replicate :: Int -> t -> t | ||
|
||
instance Segmented TX.Text where | ||
type Segment TX.Text = Char | ||
length = TX.length | ||
null = TX.null | ||
head = TX.head | ||
last = TX.last | ||
init = TX.init | ||
tail = TX.tail | ||
take = TX.take | ||
drop = TX.drop | ||
cons = TX.cons | ||
filter = TX.filter | ||
any = TX.any | ||
all = TX.all | ||
reverse = TX.reverse | ||
takeWhile = TX.takeWhile | ||
dropWhile = TX.dropWhile | ||
dropWhileEnd = TX.dropWhileEnd | ||
concat = TX.concat | ||
split = TX.split | ||
chunksOf = TX.chunksOf | ||
snoc = TX.snoc | ||
foldl = TX.foldl' | ||
map = TX.map | ||
replicate = TX.replicate |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
resolver: lts-8.18 | ||
|
||
nix: | ||
enable: true | ||
packages: | ||
- libcxx | ||
- icu | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we dropped icu dependency in 0.10 |
||
- gcc | ||
- ncurses | ||
|
||
packages: | ||
- '.' | ||
|
||
extra-deps: [] | ||
|
||
flags: {} | ||
|
||
extra-package-dbs: [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ library | |
|
||
exposed-modules: | ||
Yi.Rope | ||
Yi.Segment | ||
|
||
build-depends: | ||
base >=4.8 && <5 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is unfortunate; it's required for the Eq, Ord, etc. instances. I don't really see why they're undecidable so it's possible I'm missing something and this could be removed somehow.