addfile ./ByteStringParser.hs hunk ./ByteStringParser.hs 1 +----------------------------------------------------------------------------- +-- | +-- Module : ByteStringParser +-- Copyright : (c) Jeremy Shaw 2006 +-- Copyright : (c) Daan Leijen 1999-2001 +-- License : BSD-style (see the file libraries/parsec/LICENSE) +-- +-- Maintainer : jeremy@n-heptane.com +-- Stability : experimental +-- Portability : unknown +-- +-- Primitive parser combinators for ByteStrings loosely based on Parsec. +-- +----------------------------------------------------------------------------- +module ByteStringParser where + +import Data.Char +import Data.Word +import Control.Monad +import qualified Data.ByteString.Char8 as C + +-- * Parser + +type ParserError state = (state, String) + +-- * Parser Monad + +newtype Parser state a = Parser { unParser :: (state -> Either (state,[String]) (a, state)) } + +instance Functor (Parser state) where + fmap f (Parser p) = + Parser $ \st -> + case p st of + Right (a, st') -> Right (f a, st') + Left err -> Left err + +instance Monad (Parser state) where + return a = Parser (\s -> Right (a,s)) + m >>= f = + Parser $ \state -> + let r = (unParser m) state in + case r of + Right (a,state') -> unParser (f a) $ state' + (Left (st, msgs)) -> (Left (st, msgs)) + +instance MonadPlus (Parser state) where + mzero = Parser (\st -> (Left (st, []))) + mplus (Parser p1) (Parser p2) = + Parser (\s -> case p1 s of + (Left (_, msgs1)) -> + case p2 s of + Left (_, msgs2) -> Left (s, (msgs1 ++ msgs2)) + o -> o + o -> o + ) + +-- |Always succeed +pSucceed :: a -> Parser state a +pSucceed = return + +-- |Always fail +pFail :: Parser state a +pFail = Parser (\st -> Left (st, [])) + +infix 0 +infixr 1 <|> + +-- |choice +(<|>) :: Parser state a -> Parser state a -> Parser state a +(<|>) = mplus + +-- |name the parser +() :: Parser state a -> String -> Parser state a +p msg = + Parser $ \st -> + case (unParser p) st of + (Left _) -> Left (st, [msg]) + ok -> ok + +-- |character parser +satisfy :: (Char -> Bool) -> Parser C.ByteString Char +satisfy f = + Parser $ \bs -> + if C.null bs + then Left (bs, []) + else let (s,ss) = (C.head bs, C.tail bs) in + if (f s) + then Right (s,ss) + else Left (bs, []) + +-- |satisfy a specific character +pChar :: Char -> Parser C.ByteString Char +pChar c = satisfy (== c) [c] + +-- |detect 'end of file' +pEOF :: Parser C.ByteString () +pEOF = + Parser $ \bs -> if C.null bs then Right ((),bs) else (Left (bs, ["EOF"])) + +-- |pTakeWhile take characters while the predicate is true +pTakeWhile :: (Char -> Bool) -> Parser C.ByteString C.ByteString +pTakeWhile f = + Parser $ \bs -> Right (C.span f bs) + +-- |pSkipWhile skip over characters while the predicate is true +pSkipWhile :: (Char -> Bool) -> Parser C.ByteString () +pSkipWhile p = + Parser $ \bs -> Right ((), C.dropWhile p bs) + +-- |'pMany' - take zero or more instances of the parser +pMany :: Parser st a -> Parser st [a] +pMany p + = scan id + where + scan f = do x <- p + scan (\tail -> f (x:tail)) + <|> return (f []) + +-- |'pMany1' - take one or more instances of the parser +pMany1 :: Parser st a -> Parser st [a] +pMany1 p = + do x <- p + xs <- pMany p + return (x:xs) + +-- |'pSkipMany' - skip zero or many instances of the parser +pSkipMany :: Parser st a -> Parser st () +pSkipMany p = scan + where + scan = (p >> scan) <|> return () + +-- |'pSkipMany1' - skip one or many instances of the parser +pSkipMany1 :: Parser st a -> Parser st () +pSkipMany1 p = p >> pSkipMany p + +-- |'notEmpty' - tests that a parser returned a non-null ByteString +notEmpty :: Parser C.ByteString C.ByteString -> Parser C.ByteString C.ByteString +notEmpty (Parser p) = + Parser $ \s -> case p s of + o@(Right (a, s)) -> + if C.null a + then Left (a, ["notEmpty"]) + else o + x -> x + +-- |'parse' - run a parser +parse :: Parser state a -> state -> Either (ParserError state) (a, state) +parse p s = + case ((unParser p) s) of + Left (st, msg) -> Left (st, showError msg) + (Right r) -> (Right r) + where + showError [msg] = "Parser error, expected:\n" ++ msg ++ "\n" + showError msgs = "Parser error, expected one of:\n" ++ unlines msgs + +parseTest p s = + case parse p s of + Left (st, msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st + Right (r,_) -> print r hunk ./ByteStringParser.hs 4 --- Copyright : (c) Jeremy Shaw 2006 --- Copyright : (c) Daan Leijen 1999-2001 +-- Copyright : (c) Daan Leijen 1999-2001, Jeremy Shaw 2006 hunk ./ByteStringParser.hs 79 + +-- * Things like in @Parsec.Char@ + hunk ./ByteStringParser.hs 97 + +-- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@) + hunk ./ByteStringParser.hs 151 + +-- * Running parsers + hunk ./ByteStringParser.hs 79 +-- |get remaining input +getInput :: Parser C.ByteString C.ByteString +getInput = Parser (\st -> Right (st,st)) + hunk ./ByteStringParser.hs 155 +-- | parse some input with the given parser and return that input without copying it +pMatch :: Parser C.ByteString a -> Parser C.ByteString C.ByteString +pMatch p = do start <- getInput + p + end <- getInput + return (C.take (C.length start - C.length end) start) + adddir ./src addfile ./Setup.hs hunk ./Setup.hs 1 +import Distribution.Simple +main = defaultMain addfile ./bytestringparser.cabal hunk ./bytestringparser.cabal 1 +name: bytestringparser +version: 0.1 +license: BSD3 +license-file: LICENSE +author: Bryan O'Sullivan +synopsis: Combinator parsing with Data.ByteString.Lazy +build-depends: base +exposed-modules: Text.ParserCombinators.ByteStringParser +hs-source-dirs: src +ghc-options: -O -Wall -Werror hunk ./ByteStringParser.hs 4 --- Copyright : (c) Daan Leijen 1999-2001, Jeremy Shaw 2006 +-- Copyright : (c) Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007 hunk ./ByteStringParser.hs 14 -module ByteStringParser where +module Text.ParserCombinators.ByteStringParser where hunk ./ByteStringParser.hs 3 --- Module : ByteStringParser +-- Module : Text.ParserCombinators.ByteStringParser hunk ./ByteStringParser.hs 7 --- Maintainer : jeremy@n-heptane.com +-- Maintainer : bos@serpentine.com hunk ./ByteStringParser.hs 19 -import qualified Data.ByteString.Char8 as C +import qualified Data.ByteString.Lazy.Char8 as C hunk ./ByteStringParser.hs 29 +type CharParser = Parser C.ByteString Char + hunk ./ByteStringParser.hs 59 -pSucceed :: a -> Parser state a -pSucceed = return +succeed :: a -> Parser state a +succeed = return hunk ./ByteStringParser.hs 63 -pFail :: Parser state a -pFail = Parser (\st -> Left (st, [])) +fail :: Parser state a +fail = Parser (\st -> Left (st, [])) hunk ./ByteStringParser.hs 88 +{-# INLINE satisfy #-} + hunk ./ByteStringParser.hs 91 -satisfy :: (Char -> Bool) -> Parser C.ByteString Char +satisfy :: (Char -> Bool) -> CharParser + hunk ./ByteStringParser.hs 102 +letter :: CharParser + +letter = satisfy isLetter + +digit :: CharParser + +digit = satisfy isDigit + +anyChar :: CharParser + +anyChar = satisfy $ const True + +space :: CharParser + +space = satisfy isSpace + hunk ./ByteStringParser.hs 119 -pChar :: Char -> Parser C.ByteString Char -pChar c = satisfy (== c) [c] hunk ./ByteStringParser.hs 120 +char :: Char -> CharParser + +char c = satisfy (== c) [c] + +string :: String -> Parser C.ByteString String + +string = mapM char + +count :: Int -> Parser st a -> Parser st [a] + +count n p = sequence (replicate n p) hunk ./ByteStringParser.hs 134 +try :: Parser st a -> Parser st a + +try (Parser p) + = Parser $ \state -> case p state of + (Left (_, msgs)) -> Left (state, msgs) + ok -> ok + hunk ./ByteStringParser.hs 142 -pEOF :: Parser C.ByteString () -pEOF = +eOF :: Parser C.ByteString () +eOF = hunk ./ByteStringParser.hs 146 --- |pTakeWhile take characters while the predicate is true -pTakeWhile :: (Char -> Bool) -> Parser C.ByteString C.ByteString -pTakeWhile f = +-- |takeWhile take characters while the predicate is true +takeWhile :: (Char -> Bool) -> Parser C.ByteString C.ByteString +takeWhile f = hunk ./ByteStringParser.hs 151 --- |pSkipWhile skip over characters while the predicate is true -pSkipWhile :: (Char -> Bool) -> Parser C.ByteString () -pSkipWhile p = +-- |skipWhile skip over characters while the predicate is true +skipWhile :: (Char -> Bool) -> Parser C.ByteString () +skipWhile p = hunk ./ByteStringParser.hs 156 --- |'pMany' - take zero or more instances of the parser -pMany :: Parser st a -> Parser st [a] -pMany p - = scan id - where - scan f = do x <- p - scan (\tail -> f (x:tail)) - <|> return (f []) +-- |'many' - take zero or more instances of the parser +many :: Parser st a -> Parser st [a] +many p = scan id + where scan f = do x <- p + scan (\xs -> f (x:xs)) + <|> return (f []) hunk ./ByteStringParser.hs 163 --- |'pMany1' - take one or more instances of the parser -pMany1 :: Parser st a -> Parser st [a] -pMany1 p = +-- |'many1' - take one or more instances of the parser +many1 :: Parser st a -> Parser st [a] +many1 p = hunk ./ByteStringParser.hs 167 - xs <- pMany p + xs <- many p hunk ./ByteStringParser.hs 170 --- |'pSkipMany' - skip zero or many instances of the parser -pSkipMany :: Parser st a -> Parser st () -pSkipMany p = scan +manyTill :: Parser st a -> Parser st end -> Parser st [a] +manyTill p end = scan + where scan = do end + return [] + <|> + do x <- p + xs <- scan + return (x:xs) + +-- |'skipMany' - skip zero or many instances of the parser +skipMany :: Parser st a -> Parser st () +skipMany p = scan hunk ./ByteStringParser.hs 185 --- |'pSkipMany1' - skip one or many instances of the parser -pSkipMany1 :: Parser st a -> Parser st () -pSkipMany1 p = p >> pSkipMany p +-- |'skipMany1' - skip one or many instances of the parser +skipMany1 :: Parser st a -> Parser st () +skipMany1 p = p >> skipMany p hunk ./ByteStringParser.hs 193 - o@(Right (a, s)) -> + o@(Right (a, s_)) -> hunk ./ByteStringParser.hs 200 -pMatch :: Parser C.ByteString a -> Parser C.ByteString C.ByteString -pMatch p = do start <- getInput - p - end <- getInput - return (C.take (C.length start - C.length end) start) +match :: Parser C.ByteString a -> Parser C.ByteString C.ByteString +match p = do start <- getInput + p + end <- getInput + return (C.take (C.length start - C.length end) start) hunk ./ByteStringParser.hs 206 +lookAhead :: Parser C.ByteString a -> Parser C.ByteString a + +lookAhead (Parser p) + = Parser $ \state -> case p state of + Left (_, msgs) -> Left (state, msgs) + Right (m, _) -> Right (m, state) hunk ./ByteStringParser.hs 225 +parseTest :: Parser st a -> st -> IO () + hunk ./bytestringparser.cabal 10 -ghc-options: -O -Wall -Werror +ghc-options: -O -Wall adddir ./src/Text adddir ./src/Text/ParserCombinators move ./ByteStringParser.hs ./src/Text/ParserCombinators/ByteStringParser.hs hunk ./src/Text/ParserCombinators/ByteStringParser.hs 17 -import Data.Word hunk ./src/Text/ParserCombinators/ByteStringParser.hs 192 - o@(Right (a, s_)) -> + o@(Right (a, _)) -> hunk ./src/Text/ParserCombinators/ByteStringParser.hs 224 -parseTest :: Parser st a -> st -> IO () +parseTest :: (Show st, Show a) => Parser st a -> st -> IO () hunk ./bytestringparser.cabal 10 -ghc-options: -O -Wall +ghc-options: -O2 -Wall hunk ./src/Text/ParserCombinators/ByteStringParser.hs 125 -string = mapM char +string s = mapM char s + show s hunk ./src/Text/ParserCombinators/ByteStringParser.hs 142 -eOF :: Parser C.ByteString () -eOF = +eof :: Parser C.ByteString () +eof = hunk ./bytestringparser.cabal 2 -version: 0.1 +version: 0.2 hunk ./bytestringparser.cabal 5 +category: Text, Parsing hunk ./bytestringparser.cabal 7 +stability: experimental hunk ./bytestringparser.cabal 9 -build-depends: base -exposed-modules: Text.ParserCombinators.ByteStringParser -hs-source-dirs: src -ghc-options: -O2 -Wall +cabal-version: >= 1.2 + +flag bytestring-in-base + +library + if flag(bytestring-in-base) + -- bytestring was in base-2.0 and 2.1.1 + build-depends: base >= 2.0 && < 2.2 + else + -- in base 1.0 and 3.0 bytestring is a separate package + build-depends: base < 2.0 || >= 3, bytestring >= 0.9 + exposed-modules: Text.ParserCombinators.ByteStringParser + hs-source-dirs: src + ghc-options: -O2 -Wall -Werror hunk ./bytestringparser.cabal 12 +flag applicative-in-base hunk ./bytestringparser.cabal 21 + + if flag(applicative-in-base) + build-depends: base >= 2.0 + cpp-options: -DAPPLICATIVE_IN_BASE + else + build-depends: base < 2.0 + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 1 +{-# LANGUAGE CPP #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 15 -module Text.ParserCombinators.ByteStringParser where +module Text.ParserCombinators.ByteStringParser + ( + -- * Parser + ParseError + , Parser hunk ./src/Text/ParserCombinators/ByteStringParser.hs 21 -import Data.Char -import Control.Monad -import qualified Data.ByteString.Lazy.Char8 as C + -- * Running parsers + , parse + , parseTest + + -- * Combinators + , succeed + , (<|>) + , () + + -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@) + , try + , many + , many1 + , manyTill + , eof + , skipMany + , skipMany1 + + -- * Things like in @Parsec.Char@ + , satisfy + , letter + , digit + , anyChar + , space + , char + , string + + -- * Miscellaneous functions. + , getInput + , getConsumed + , takeWhile + , skipWhile + ) where + +#ifdef APPLICATIVE_IN_BASE +import Control.Applicative (Applicative(..)) +#endif hunk ./src/Text/ParserCombinators/ByteStringParser.hs 59 --- * Parser +import Control.Monad (MonadPlus(..), ap) +import qualified Data.ByteString.Lazy.Char8 as C +import Data.Char (isDigit, isLetter, isSpace) +import Data.Int (Int64) +import Prelude hiding (takeWhile) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 65 -type ParserError state = (state, String) +type ParseError = (C.ByteString, String) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 69 -newtype Parser state a = Parser { unParser :: (state -> Either (state,[String]) (a, state)) } +data S = S C.ByteString + {-# UNPACK #-} !Int64 hunk ./src/Text/ParserCombinators/ByteStringParser.hs 72 -type CharParser = Parser C.ByteString Char +newtype Parser a = Parser { + unParser :: S -> Either (C.ByteString, [String]) (a, S) + } hunk ./src/Text/ParserCombinators/ByteStringParser.hs 76 -instance Functor (Parser state) where - fmap f (Parser p) = - Parser $ \st -> - case p st of - Right (a, st') -> Right (f a, st') +instance Functor Parser where + fmap f p = + Parser $ \s -> + case unParser p s of + Right (a, s') -> Right (f a, s') hunk ./src/Text/ParserCombinators/ByteStringParser.hs 83 -instance Monad (Parser state) where - return a = Parser (\s -> Right (a,s)) - m >>= f = - Parser $ \state -> - let r = (unParser m) state in - case r of - Right (a,state') -> unParser (f a) $ state' - (Left (st, msgs)) -> (Left (st, msgs)) +instance Monad Parser where + return a = Parser $ \s -> Right (a, s) + m >>= f = Parser $ \s -> + case unParser m s of + Right (a, s') -> unParser (f a) s' + Left (s', msgs) -> Left (s', msgs) + fail err = Parser $ \(S bs _) -> Left (bs, [err]) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 91 -instance MonadPlus (Parser state) where - mzero = Parser (\st -> (Left (st, []))) - mplus (Parser p1) (Parser p2) = - Parser (\s -> case p1 s of - (Left (_, msgs1)) -> +instance MonadPlus Parser where + mzero = Parser $ \(S bs _) -> Left (bs, []) + Parser p1 `mplus` Parser p2 = + Parser $ \s@(S bs _) -> case p1 s of + Left (_, msgs1) -> hunk ./src/Text/ParserCombinators/ByteStringParser.hs 97 - Left (_, msgs2) -> Left (s, (msgs1 ++ msgs2)) - o -> o - o -> o - ) + Left (_, msgs2) -> Left (bs, (msgs1 ++ msgs2)) + ok -> ok + ok -> ok hunk ./src/Text/ParserCombinators/ByteStringParser.hs 101 --- |Always succeed -succeed :: a -> Parser state a -succeed = return +#ifdef APPLICATIVE_IN_BASE +instance Applicative Parser where + pure = return + (<*>) = ap +#endif hunk ./src/Text/ParserCombinators/ByteStringParser.hs 107 --- |Always fail -fail :: Parser state a -fail = Parser (\st -> Left (st, [])) +-- | Always succeed. +succeed :: a -> Parser a +succeed = return hunk ./src/Text/ParserCombinators/ByteStringParser.hs 114 --- |choice -(<|>) :: Parser state a -> Parser state a -> Parser state a +-- | Choice. +(<|>) :: Parser a -> Parser a -> Parser a hunk ./src/Text/ParserCombinators/ByteStringParser.hs 118 --- |name the parser -() :: Parser state a -> String -> Parser state a +-- | Name the parser. +() :: Parser a -> String -> Parser a hunk ./src/Text/ParserCombinators/ByteStringParser.hs 121 - Parser $ \st -> - case (unParser p) st of - (Left _) -> Left (st, [msg]) + Parser $ \s@(S bs _) -> + case unParser p s of + (Left _) -> Left (bs, [msg]) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 126 --- |get remaining input -getInput :: Parser C.ByteString C.ByteString -getInput = Parser (\st -> Right (st,st)) +-- | Get remaining input. +getInput :: Parser C.ByteString +getInput = Parser $ \s@(S bs _) -> Right (bs, s) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 130 +-- | Get remaining input. +getConsumed :: Parser Int64 +getConsumed = Parser $ \s@(S _ n) -> Right (n, s) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 134 --- * Things like in @Parsec.Char@ - -{-# INLINE satisfy #-} - --- |character parser -satisfy :: (Char -> Bool) -> CharParser hunk ./src/Text/ParserCombinators/ByteStringParser.hs 135 +-- | Character parser. +satisfy :: (Char -> Bool) -> Parser Char hunk ./src/Text/ParserCombinators/ByteStringParser.hs 138 - Parser $ \bs -> + Parser $ \(S bs n) -> hunk ./src/Text/ParserCombinators/ByteStringParser.hs 141 - else let (s,ss) = (C.head bs, C.tail bs) in - if (f s) - then Right (s,ss) + else let Just (s, bs') = C.uncons bs in + if f s + then Right (s, S bs' (n + 1)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 145 +{-# INLINE satisfy #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 147 -letter :: CharParser hunk ./src/Text/ParserCombinators/ByteStringParser.hs 148 +letter :: Parser Char hunk ./src/Text/ParserCombinators/ByteStringParser.hs 150 - -digit :: CharParser +{-# INLINE letter #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 152 +digit :: Parser Char hunk ./src/Text/ParserCombinators/ByteStringParser.hs 154 - -anyChar :: CharParser +{-# INLINE digit #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 156 +anyChar :: Parser Char hunk ./src/Text/ParserCombinators/ByteStringParser.hs 158 - -space :: CharParser +{-# INLINE anyChar #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 160 +space :: Parser Char hunk ./src/Text/ParserCombinators/ByteStringParser.hs 162 +{-# INLINE space #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 164 --- |satisfy a specific character - -char :: Char -> CharParser +-- | Satisfy a specific character. hunk ./src/Text/ParserCombinators/ByteStringParser.hs 166 +char :: Char -> Parser Char hunk ./src/Text/ParserCombinators/ByteStringParser.hs 168 +{-# INLINE char #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 170 -string :: String -> Parser C.ByteString String - -string s = mapM char s - show s - -count :: Int -> Parser st a -> Parser st [a] +string :: String -> Parser String +string s = mapM char s show s +{-# INLINE string #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 174 +count :: Int -> Parser a -> Parser [a] hunk ./src/Text/ParserCombinators/ByteStringParser.hs 176 +{-# INLINE count #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 178 --- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@) - -try :: Parser st a -> Parser st a - -try (Parser p) - = Parser $ \state -> case p state of - (Left (_, msgs)) -> Left (state, msgs) - ok -> ok +try :: Parser a -> Parser a +try p = Parser $ \s@(S bs _) -> + case unParser p s of + Left (_, msgs) -> Left (bs, msgs) + ok -> ok hunk ./src/Text/ParserCombinators/ByteStringParser.hs 184 --- |detect 'end of file' -eof :: Parser C.ByteString () -eof = - Parser $ \bs -> if C.null bs then Right ((),bs) else (Left (bs, ["EOF"])) +-- | Detect 'end of file'. +eof :: Parser () +eof = Parser $ \s@(S bs _) -> if C.null bs + then Right ((), s) + else Left (bs, ["EOF"]) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 190 --- |takeWhile take characters while the predicate is true -takeWhile :: (Char -> Bool) -> Parser C.ByteString C.ByteString -takeWhile f = - Parser $ \bs -> Right (C.span f bs) +-- | Consume characters while the predicate is true. +takeWhile :: (Char -> Bool) -> Parser C.ByteString +takeWhile f = Parser $ \(S bs n) -> + let (h, bs') = C.span f bs + in Right (h, S bs' (n + C.length h)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 196 --- |skipWhile skip over characters while the predicate is true -skipWhile :: (Char -> Bool) -> Parser C.ByteString () -skipWhile p = - Parser $ \bs -> Right ((), C.dropWhile p bs) +-- | Skip over characters while the predicate is true. +skipWhile :: (Char -> Bool) -> Parser () +skipWhile p = takeWhile p >> return () hunk ./src/Text/ParserCombinators/ByteStringParser.hs 200 --- |'many' - take zero or more instances of the parser -many :: Parser st a -> Parser st [a] +-- | Take zero or more instances of the parser. +many :: Parser a -> Parser [a] hunk ./src/Text/ParserCombinators/ByteStringParser.hs 204 - scan (\xs -> f (x:xs)) + scan (f . (x:)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 207 --- |'many1' - take one or more instances of the parser -many1 :: Parser st a -> Parser st [a] +-- | Take one or more instances of the parser. +many1 :: Parser a -> Parser [a] hunk ./src/Text/ParserCombinators/ByteStringParser.hs 214 -manyTill :: Parser st a -> Parser st end -> Parser st [a] +manyTill :: Parser a -> Parser b -> Parser [a] hunk ./src/Text/ParserCombinators/ByteStringParser.hs 216 - where scan = do end - return [] + where scan = do end; return [] hunk ./src/Text/ParserCombinators/ByteStringParser.hs 223 -skipMany :: Parser st a -> Parser st () +skipMany :: Parser a -> Parser () hunk ./src/Text/ParserCombinators/ByteStringParser.hs 229 -skipMany1 :: Parser st a -> Parser st () +skipMany1 :: Parser a -> Parser () hunk ./src/Text/ParserCombinators/ByteStringParser.hs 232 --- |'notEmpty' - tests that a parser returned a non-null ByteString -notEmpty :: Parser C.ByteString C.ByteString -> Parser C.ByteString C.ByteString -notEmpty (Parser p) = - Parser $ \s -> case p s of - o@(Right (a, _)) -> - if C.null a - then Left (a, ["notEmpty"]) - else o - x -> x +-- | Test that a parser returned a non-null ByteString. +notEmpty :: Parser C.ByteString -> Parser C.ByteString +notEmpty p = Parser $ \s -> + case unParser p s of + o@(Right (a, _)) -> + if C.null a + then Left (a, ["notEmpty"]) + else o + x -> x hunk ./src/Text/ParserCombinators/ByteStringParser.hs 242 --- | parse some input with the given parser and return that input without copying it -match :: Parser C.ByteString a -> Parser C.ByteString C.ByteString -match p = do start <- getInput +-- | Parse some input with the given parser and return that input +-- without copying it. +match :: Parser a -> Parser C.ByteString +match p = do bs <- getInput + start <- getConsumed hunk ./src/Text/ParserCombinators/ByteStringParser.hs 248 - end <- getInput - return (C.take (C.length start - C.length end) start) - -lookAhead :: Parser C.ByteString a -> Parser C.ByteString a + end <- getConsumed + return (C.take (end - start) bs) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 251 -lookAhead (Parser p) - = Parser $ \state -> case p state of - Left (_, msgs) -> Left (state, msgs) - Right (m, _) -> Right (m, state) +lookAhead :: Parser a -> Parser a hunk ./src/Text/ParserCombinators/ByteStringParser.hs 253 --- * Running parsers +lookAhead p = Parser $ \s@(S bs _) -> + case unParser p s of + Left (_, msgs) -> Left (bs, msgs) + Right (m, _) -> Right (m, s) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 258 --- |'parse' - run a parser -parse :: Parser state a -> state -> Either (ParserError state) (a, state) -parse p s = - case ((unParser p) s) of - Left (st, msg) -> Left (st, showError msg) - (Right r) -> (Right r) +-- | Run a parser. +parse :: Parser a -> C.ByteString + -> Either ParseError (a, C.ByteString) +parse p bs = + case unParser p (S bs 0) of + Left (bs', msg) -> Left (bs', showError msg) + Right (a, S bs' _) -> Right (a, bs') hunk ./src/Text/ParserCombinators/ByteStringParser.hs 269 -parseTest :: (Show st, Show a) => Parser st a -> st -> IO () - +parseTest :: (Show a) => Parser a -> C.ByteString -> IO () addfile ./LICENSE hunk ./LICENSE 1 +Copyright (c) Lennart Kolmodin + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. hunk ./bytestringparser.cabal 11 -flag bytestring-in-base +flag split-base hunk ./bytestringparser.cabal 15 - if flag(bytestring-in-base) + if flag(split-base) hunk ./bytestringparser.cabal 20 - build-depends: base < 2.0 || >= 3, bytestring >= 0.9 + build-depends: base < 2.0 || >= 3, bytestring >= 0.9, containers >= 0.1.0.1 hunk ./bytestringparser.cabal 28 + extensions: CPP hunk ./src/Text/ParserCombinators/ByteStringParser.hs 1 -{-# LANGUAGE CPP #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 11 --- Primitive parser combinators for ByteStrings loosely based on Parsec. +-- Simple, efficient parser combinators for lazy 'C.ByteString' +-- values, loosely based on 'Text.ParserCombinators.Parsec'. hunk ./src/Text/ParserCombinators/ByteStringParser.hs 23 + , parseAt hunk ./src/Text/ParserCombinators/ByteStringParser.hs 39 + , count + , lookAhead + , sepBy + , sepBy1 hunk ./src/Text/ParserCombinators/ByteStringParser.hs 52 + , stringCI + , byteString + , byteStringCI hunk ./src/Text/ParserCombinators/ByteStringParser.hs 60 + , takeWhile1 + , takeAll hunk ./src/Text/ParserCombinators/ByteStringParser.hs 63 + , skipSpace + , notEmpty + , match + , inClass + , notInClass hunk ./src/Text/ParserCombinators/ByteStringParser.hs 70 -#ifdef APPLICATIVE_IN_BASE hunk ./src/Text/ParserCombinators/ByteStringParser.hs 71 -#endif - -import Control.Monad (MonadPlus(..), ap) +import Control.Monad (MonadPlus(..), ap, liftM2) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 73 -import Data.Char (isDigit, isLetter, isSpace) +import Data.Char (isDigit, isLetter, isSpace, toLower) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 75 +import qualified Data.Set as S hunk ./src/Text/ParserCombinators/ByteStringParser.hs 130 +{-# INLINE (<|>) #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 139 +{-# INLINE () #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 145 --- | Get remaining input. +-- | Get number of bytes consumed so far. hunk ./src/Text/ParserCombinators/ByteStringParser.hs 149 - hunk ./src/Text/ParserCombinators/ByteStringParser.hs 153 - if C.null bs - then Left (bs, []) - else let Just (s, bs') = C.uncons bs in - if f s - then Right (s, S bs' (n + 1)) - else Left (bs, []) + case C.uncons bs of + Just (s, bs') | f s -> Right (s, S bs' (n + 1)) + _ -> Left (bs, []) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 158 - hunk ./src/Text/ParserCombinators/ByteStringParser.hs 159 -letter = satisfy isLetter +letter = satisfy isLetter "letter" hunk ./src/Text/ParserCombinators/ByteStringParser.hs 163 -digit = satisfy isDigit +digit = satisfy isDigit "digit" hunk ./src/Text/ParserCombinators/ByteStringParser.hs 171 -space = satisfy isSpace +space = satisfy isSpace "space" hunk ./src/Text/ParserCombinators/ByteStringParser.hs 175 - hunk ./src/Text/ParserCombinators/ByteStringParser.hs 179 +charClass :: String -> S.Set Char +charClass s = S.fromList (go s) + where go (a:'-':b:xs) = [a..b] ++ go xs + go (x:xs) = x : go xs + go _ = "" + +inClass :: String -> Char -> Bool +inClass s = (`S.member` set) + where set = charClass s + +notInClass :: String -> Char -> Bool +notInClass s = (`S.notMember` set) + where set = charClass s + +sepBy :: Parser a -> Parser s -> Parser [a] +sepBy p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) <|> return [] + +sepBy1 :: Parser a -> Parser s -> Parser [a] +sepBy1 p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) + +-- | Satisfy a literal string. +byteString :: C.ByteString -> Parser C.ByteString +byteString s = Parser $ \(S bs n) -> + let l = C.length s + (h, t) = C.splitAt l bs + in if s == h + then Right (s, S t (n + l)) + else Left (bs, []) +{-# INLINE byteString #-} + +-- | Satisfy a literal string. +byteStringCI :: C.ByteString -> Parser C.ByteString +byteStringCI s = Parser $ \(S bs n) -> + let l = C.length s + (h, t) = C.splitAt l bs + in if ls == C.map toLower h + then Right (s, S t (n + l)) + else Left (bs, []) + where ls = C.map toLower s +{-# INLINE byteStringCI #-} + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 221 -string s = mapM char s show s +string s = byteString (C.pack s) >> return s hunk ./src/Text/ParserCombinators/ByteStringParser.hs 224 +stringCI :: String -> Parser String +stringCI s = byteStringCI (C.pack s) >> return s +{-# INLINE stringCI #-} + +-- | Apply the given parser repeatedly, returning every parse result. hunk ./src/Text/ParserCombinators/ByteStringParser.hs 245 +takeAll :: Parser C.ByteString +takeAll = Parser $ \(S bs n) -> Right (bs, S C.empty (n + C.length bs)) + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 253 +{-# INLINE takeWhile #-} + +takeWhile1 :: (Char -> Bool) -> Parser C.ByteString +takeWhile1 f = Parser $ \(S bs n) -> + let (h, bs') = C.span f bs + in if C.null h + then Left (bs, []) + else Right (h, S bs' (n + C.length h)) +{-# INLINE takeWhile1 #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 266 +{-# INLINE skipWhile #-} + +-- | Skip over white space. +skipSpace :: Parser () +skipSpace = takeWhile isSpace >> return () +{-# INLINE skipSpace #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 282 -many1 p = - do x <- p - xs <- many p - return (x:xs) +many1 p = liftM2 (:) p (many p) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 286 - where scan = do end; return [] - <|> - do x <- p - xs <- scan - return (x:xs) + where scan = (end >> return []) <|> liftM2 (:) p scan hunk ./src/Text/ParserCombinators/ByteStringParser.hs 291 - where - scan = (p >> scan) <|> return () + where scan = (p >> scan) <|> return () hunk ./src/Text/ParserCombinators/ByteStringParser.hs 316 -lookAhead :: Parser a -> Parser a - -lookAhead p = Parser $ \s@(S bs _) -> +lookAhead :: Parser a -> Parser (Maybe a) +lookAhead p = Parser $ \s -> hunk ./src/Text/ParserCombinators/ByteStringParser.hs 319 - Left (_, msgs) -> Left (bs, msgs) - Right (m, _) -> Right (m, s) + Right (m, _) -> Right (Just m, s) + _ -> Right (Nothing, s) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 322 --- | Run a parser. -parse :: Parser a -> C.ByteString - -> Either ParseError (a, C.ByteString) -parse p bs = - case unParser p (S bs 0) of +parseAt :: Parser a -> C.ByteString -> Int64 + -> Either ParseError (a, C.ByteString) +parseAt p bs n = + case unParser p (S bs n) of hunk ./src/Text/ParserCombinators/ByteStringParser.hs 332 +-- | Run a parser. +parse :: Parser a -> C.ByteString + -> Either ParseError (a, C.ByteString) +parse p bs = parseAt p bs 0 + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 41 + , peek hunk ./src/Text/ParserCombinators/ByteStringParser.hs 52 + , notChar hunk ./src/Text/ParserCombinators/ByteStringParser.hs 58 + -- * Parser converters. + , maybeP + , eitherP + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 67 + , takeTill hunk ./src/Text/ParserCombinators/ByteStringParser.hs 77 -import Control.Applicative (Applicative(..)) +import Control.Applicative (Applicative(..), (<$>), (<*)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 85 -type ParseError = (C.ByteString, String) +type ParseError = String hunk ./src/Text/ParserCombinators/ByteStringParser.hs 186 +-- | Satisfy a specific character. +notChar :: Char -> Parser Char +notChar c = satisfy (/= c) "not " ++ [c] +{-# INLINE notChar #-} + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 267 +takeTill :: (Char -> Bool) -> Parser C.ByteString +takeTill p = takeWhile (not . p) <* satisfy p +{-# INLINE takeTill #-} + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 332 -lookAhead :: Parser a -> Parser (Maybe a) +maybeP :: Parser a -> Parser (Maybe a) +maybeP p = (Just <$> p) <|> pure Nothing +{-# INLINE maybeP #-} + +eitherP :: Parser a -> Parser b -> Parser (Either a b) +eitherP a b = (Left <$> a) <|> (Right <$> b) +{-# INLINE eitherP #-} + +peek :: Parser a -> Parser (Maybe a) +peek p = Parser $ \s -> + case unParser p s of + Right (m, _) -> Right (Just m, s) + _ -> Right (Nothing, s) + +lookAhead :: Parser a -> Parser a hunk ./src/Text/ParserCombinators/ByteStringParser.hs 348 - case unParser p s of - Right (m, _) -> Right (Just m, s) - _ -> Right (Nothing, s) + case unParser p s of + Right (m, _) -> Right (m, s) + Left (e, bs) -> Left (e, bs) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 353 - -> Either ParseError (a, C.ByteString) + -> (C.ByteString, Either ParseError a) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 356 - Left (bs', msg) -> Left (bs', showError msg) - Right (a, S bs' _) -> Right (a, bs') + Left (bs', msg) -> (bs', Left $ showError msg) + Right (a, S bs' _) -> (bs', Right a) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 364 - -> Either ParseError (a, C.ByteString) + -> (C.ByteString, Either ParseError a) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 370 - Left (st, msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st - Right (r,_) -> print r + (st, Left msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st + (_, Right r) -> print r hunk ./src/Text/ParserCombinators/ByteStringParser.hs 79 -import qualified Data.ByteString.Lazy.Char8 as C +import qualified Data.ByteString.Lazy.Char8 as LB hunk ./src/Text/ParserCombinators/ByteStringParser.hs 89 -data S = S C.ByteString +data S = S LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 93 - unParser :: S -> Either (C.ByteString, [String]) (a, S) + unParser :: S -> Either (LB.ByteString, [String]) (a, S) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 149 -getInput :: Parser C.ByteString +getInput :: Parser LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 160 - case C.uncons bs of + case LB.uncons bs of hunk ./src/Text/ParserCombinators/ByteStringParser.hs 212 -byteString :: C.ByteString -> Parser C.ByteString +byteString :: LB.ByteString -> Parser LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 214 - let l = C.length s - (h, t) = C.splitAt l bs + let l = LB.length s + (h, t) = LB.splitAt l bs hunk ./src/Text/ParserCombinators/ByteStringParser.hs 222 -byteStringCI :: C.ByteString -> Parser C.ByteString +byteStringCI :: LB.ByteString -> Parser LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 224 - let l = C.length s - (h, t) = C.splitAt l bs - in if ls == C.map toLower h + let l = LB.length s + (h, t) = LB.splitAt l bs + in if ls == LB.map toLower h hunk ./src/Text/ParserCombinators/ByteStringParser.hs 229 - where ls = C.map toLower s + where ls = LB.map toLower s hunk ./src/Text/ParserCombinators/ByteStringParser.hs 233 -string s = byteString (C.pack s) >> return s +string s = byteString (LB.pack s) >> return s hunk ./src/Text/ParserCombinators/ByteStringParser.hs 237 -stringCI s = byteStringCI (C.pack s) >> return s +stringCI s = byteStringCI (LB.pack s) >> return s hunk ./src/Text/ParserCombinators/ByteStringParser.hs 253 -eof = Parser $ \s@(S bs _) -> if C.null bs +eof = Parser $ \s@(S bs _) -> if LB.null bs hunk ./src/Text/ParserCombinators/ByteStringParser.hs 257 -takeAll :: Parser C.ByteString -takeAll = Parser $ \(S bs n) -> Right (bs, S C.empty (n + C.length bs)) +takeAll :: Parser LB.ByteString +takeAll = Parser $ \(S bs n) -> Right (bs, S LB.empty (n + LB.length bs)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 261 -takeWhile :: (Char -> Bool) -> Parser C.ByteString +takeWhile :: (Char -> Bool) -> Parser LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 263 - let (h, bs') = C.span f bs - in Right (h, S bs' (n + C.length h)) + let (h, bs') = LB.span f bs + in Right (h, S bs' (n + LB.length h)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 267 -takeTill :: (Char -> Bool) -> Parser C.ByteString +takeTill :: (Char -> Bool) -> Parser LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 271 -takeWhile1 :: (Char -> Bool) -> Parser C.ByteString +takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 273 - let (h, bs') = C.span f bs - in if C.null h + let (h, bs') = LB.span f bs + in if LB.null h hunk ./src/Text/ParserCombinators/ByteStringParser.hs 276 - else Right (h, S bs' (n + C.length h)) + else Right (h, S bs' (n + LB.length h)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 314 -notEmpty :: Parser C.ByteString -> Parser C.ByteString +notEmpty :: Parser LB.ByteString -> Parser LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 318 - if C.null a + if LB.null a hunk ./src/Text/ParserCombinators/ByteStringParser.hs 325 -match :: Parser a -> Parser C.ByteString +match :: Parser a -> Parser LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 330 - return (C.take (end - start) bs) + return (LB.take (end - start) bs) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 352 -parseAt :: Parser a -> C.ByteString -> Int64 - -> (C.ByteString, Either ParseError a) +parseAt :: Parser a -> LB.ByteString -> Int64 + -> (LB.ByteString, Either ParseError a) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 363 -parse :: Parser a -> C.ByteString - -> (C.ByteString, Either ParseError a) +parse :: Parser a -> LB.ByteString + -> (LB.ByteString, Either ParseError a) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 367 -parseTest :: (Show a) => Parser a -> C.ByteString -> IO () +parseTest :: (Show a) => Parser a -> LB.ByteString -> IO () hunk ./src/Text/ParserCombinators/ByteStringParser.hs 11 --- Simple, efficient parser combinators for lazy 'C.ByteString' --- values, loosely based on 'Text.ParserCombinators.Parsec'. +-- Simple, efficient parser combinators for lazy 'LB.ByteString' +-- strings, loosely based on 'Text.ParserCombinators.Parsec'. hunk ./src/Text/ParserCombinators/ByteStringParser.hs 28 - , (<|>) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 32 - , many - , many1 hunk ./src/Text/ParserCombinators/ByteStringParser.hs 52 - , byteString - , byteStringCI hunk ./src/Text/ParserCombinators/ByteStringParser.hs 54 - , maybeP hunk ./src/Text/ParserCombinators/ByteStringParser.hs 71 -import Control.Applicative (Applicative(..), (<$>), (<*)) +import Control.Applicative (Alternative(..), Applicative(..), (<$>), (<*), (*>)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 73 +import qualified Data.ByteString.Char8 as SB hunk ./src/Text/ParserCombinators/ByteStringParser.hs 75 +import qualified Data.ByteString.Lazy.Internal as LB hunk ./src/Text/ParserCombinators/ByteStringParser.hs 85 -data S = S LB.ByteString +data S = S {-# UNPACK #-} !SB.ByteString + LB.ByteString hunk ./src/Text/ParserCombinators/ByteStringParser.hs 89 +mkState :: LB.ByteString -> Int64 -> S +mkState s = case s of + LB.Empty -> S SB.empty s + LB.Chunk x xs -> S x xs + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 105 +(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString +sb +: lb | SB.null sb = lb + | otherwise = LB.Chunk sb lb +{-# INLINE (+:) #-} + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 116 - fail err = Parser $ \(S bs _) -> Left (bs, [err]) + fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err]) + +zero :: Parser a +zero = Parser $ \(S sb lb _) -> Left (sb +: lb, []) +{-# INLINE zero #-} + +plus :: Parser a -> Parser a -> Parser a +plus p1 p2 = + Parser $ \s@(S sb lb _) -> + case unParser p1 s of + Left (_, msgs1) -> + case unParser p2 s of + Left (_, msgs2) -> Left (sb +: lb, (msgs1 ++ msgs2)) + ok -> ok + ok -> ok +{-# INLINE plus #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 134 - mzero = Parser $ \(S bs _) -> Left (bs, []) - Parser p1 `mplus` Parser p2 = - Parser $ \s@(S bs _) -> case p1 s of - Left (_, msgs1) -> - case p2 s of - Left (_, msgs2) -> Left (bs, (msgs1 ++ msgs2)) - ok -> ok - ok -> ok + mzero = zero + mplus = plus hunk ./src/Text/ParserCombinators/ByteStringParser.hs 141 + +instance Alternative Parser where + empty = zero + (<|>) = plus hunk ./src/Text/ParserCombinators/ByteStringParser.hs 152 -infixr 1 <|> - --- | Choice. -(<|>) :: Parser a -> Parser a -> Parser a -(<|>) = mplus -{-# INLINE (<|>) #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 156 - Parser $ \s@(S bs _) -> + Parser $ \s@(S sb lb _) -> hunk ./src/Text/ParserCombinators/ByteStringParser.hs 158 - (Left _) -> Left (bs, [msg]) + (Left _) -> Left (sb +: lb, [msg]) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 162 +nextChunk :: Parser () +nextChunk = Parser $ \(S _ lb n) -> + case lb of + LB.Chunk sb' lb' -> Right ((), S sb' lb' n) + LB.Empty -> Left (lb, []) + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 170 -getInput = Parser $ \s@(S bs _) -> Right (bs, s) +getInput = Parser $ \s@(S sb lb _) -> Right (sb +: lb, s) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 174 -getConsumed = Parser $ \s@(S _ n) -> Right (n, s) +getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 178 -satisfy f = - Parser $ \(S bs n) -> - case LB.uncons bs of - Just (s, bs') | f s -> Right (s, S bs' (n + 1)) - _ -> Left (bs, []) +satisfy p = + Parser $ \s@(S sb lb n) -> + case SB.uncons sb of + Just (c, sb') | p c -> Right (c, S sb' lb (n + 1)) + | otherwise -> Left (sb +: lb, []) + Nothing -> unParser (nextChunk >> satisfy p) s hunk ./src/Text/ParserCombinators/ByteStringParser.hs 233 -byteString :: LB.ByteString -> Parser LB.ByteString -byteString s = Parser $ \(S bs n) -> - let l = LB.length s +string :: LB.ByteString -> Parser LB.ByteString +string s = Parser $ \(S sb lb n) -> + let bs = sb +: lb + l = LB.length s hunk ./src/Text/ParserCombinators/ByteStringParser.hs 239 - then Right (s, S t (n + l)) + then Right (s, mkState t (n + l)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 241 -{-# INLINE byteString #-} +{-# INLINE string #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 243 --- | Satisfy a literal string. -byteStringCI :: LB.ByteString -> Parser LB.ByteString -byteStringCI s = Parser $ \(S bs n) -> - let l = LB.length s +-- | Satisfy a literal string, ignoring case. +stringCI :: LB.ByteString -> Parser LB.ByteString +stringCI s = Parser $ \(S sb lb n) -> + let bs = sb +: lb + l = LB.length s hunk ./src/Text/ParserCombinators/ByteStringParser.hs 250 - then Right (s, S t (n + l)) + then Right (s, mkState t (n + l)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 253 -{-# INLINE byteStringCI #-} - -string :: String -> Parser String -string s = byteString (LB.pack s) >> return s -{-# INLINE string #-} - -stringCI :: String -> Parser String -stringCI s = byteStringCI (LB.pack s) >> return s hunk ./src/Text/ParserCombinators/ByteStringParser.hs 261 -try p = Parser $ \s@(S bs _) -> +try p = Parser $ \s@(S sb lb _) -> hunk ./src/Text/ParserCombinators/ByteStringParser.hs 263 - Left (_, msgs) -> Left (bs, msgs) + Left (_, msgs) -> Left (sb +: lb, msgs) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 268 -eof = Parser $ \s@(S bs _) -> if LB.null bs - then Right ((), s) - else Left (bs, ["EOF"]) +eof = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb + then Right ((), s) + else Left (sb +: lb, ["EOF"]) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 273 -takeAll = Parser $ \(S bs n) -> Right (bs, S LB.empty (n + LB.length bs)) +takeAll = Parser $ \(S sb lb n) -> + let bs = sb +: lb + in Right (bs, mkState LB.empty (n + LB.length bs)) + +oneChunk :: SB.ByteString -> LB.ByteString +oneChunk s = LB.Chunk s LB.Empty + +length64 :: SB.ByteString -> Int64 +length64 = fromIntegral . SB.length hunk ./src/Text/ParserCombinators/ByteStringParser.hs 285 -takeWhile f = Parser $ \(S bs n) -> - let (h, bs') = LB.span f bs - in Right (h, S bs' (n + LB.length h)) +takeWhile p = Parser $ \s@(S sb lb n) -> + let (h, t) = SB.span p sb + in if SB.null t + then unParser ((h +:) <$> (nextChunk *> takeWhile p)) s + else Right (oneChunk h, S t lb (n + length64 h)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 297 -takeWhile1 f = Parser $ \(S bs n) -> - let (h, bs') = LB.span f bs - in if LB.null h - then Left (bs, []) - else Right (h, S bs' (n + LB.length h)) +takeWhile1 p = Parser $ \s@(S sb lb n) -> + let (h, t) = SB.span p sb + in if SB.null t + then case unParser (nextChunk >> takeWhile p) s of + Left err -> Left err + Right (xs, s') -> + let bs = h +: xs + in if LB.null bs + then Left (sb +: lb, []) + else Right (bs, s') + else Right (oneChunk h, S t lb (n + length64 h)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 320 --- | Take zero or more instances of the parser. -many :: Parser a -> Parser [a] -many p = scan id - where scan f = do x <- p - scan (f . (x:)) - <|> return (f []) - --- | Take one or more instances of the parser. -many1 :: Parser a -> Parser [a] -many1 p = liftM2 (:) p (many p) - hunk ./src/Text/ParserCombinators/ByteStringParser.hs 352 -maybeP :: Parser a -> Parser (Maybe a) -maybeP p = (Just <$> p) <|> pure Nothing -{-# INLINE maybeP #-} - hunk ./src/Text/ParserCombinators/ByteStringParser.hs 371 - case unParser p (S bs n) of + case unParser p (mkState bs n) of hunk ./src/Text/ParserCombinators/ByteStringParser.hs 373 - Right (a, S bs' _) -> (bs', Right a) + Right (a, S sb lb _) -> (sb +: lb, Right a) hunk ./bytestringparser.cabal 2 -version: 0.2 +version: 0.2.1 hunk ./bytestringparser.cabal 30 + Text.ParserCombinators.ByteStringParser.FastSet hunk ./src/Text/ParserCombinators/ByteStringParser.hs 4 --- Copyright : (c) Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007 --- License : BSD-style (see the file libraries/parsec/LICENSE) +-- Copyright : Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008 +-- License : BSD3 hunk ./src/Text/ParserCombinators/ByteStringParser.hs 78 -import qualified Data.Set as S +import Text.ParserCombinators.ByteStringParser.FastSet (FastSet, member, set) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 212 -charClass :: String -> S.Set Char -charClass s = S.fromList (go s) +charClass :: String -> FastSet +charClass = set . SB.pack . go hunk ./src/Text/ParserCombinators/ByteStringParser.hs 219 -inClass s = (`S.member` set) - where set = charClass s +inClass s = (`member` myset) + where myset = charClass s +{-# INLINE inClass #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 224 -notInClass s = (`S.notMember` set) - where set = charClass s +notInClass s = not . inClass s +{-# INLINE notInClass #-} adddir ./src/Text/ParserCombinators/ByteStringParser addfile ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 1 +----------------------------------------------------------------------------- +-- | +-- Module : Text.ParserCombinators.ByteStringParser.FastSet +-- Copyright : Bryan O'Sullivan 2008 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Fast 8-bit character set membership. +-- +----------------------------------------------------------------------------- +module Text.ParserCombinators.ByteStringParser.FastSet + ( + FastSet + , set + , member + ) where + +import Data.ByteString.Char8 as SB +import Data.ByteString.Internal as SB +import Data.ByteString.Unsafe as SB + +newtype FastSet = FastSet SB.ByteString + deriving (Eq, Ord, Show) + +set :: SB.ByteString -> FastSet +set = FastSet . SB.sort + +member :: Char -> FastSet -> Bool +member c (FastSet s) = search 0 (SB.length s - 1) + where w = SB.c2w c + search lo hi | hi < lo = False + | otherwise = + let mid = (lo + hi) `div` 2 + cur = SB.unsafeIndex s mid + in case compare cur w of + GT -> search lo (mid - 1) + LT -> search (mid + 1) hi + _ -> True hunk ./src/Text/ParserCombinators/ByteStringParser.hs 73 +import Control.Monad.Fix (MonadFix(..)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 119 +instance MonadFix Parser where + mfix f = Parser $ \s -> + let r = case r of + Right (a, _) -> unParser (f a) s + err -> err + in r + hunk ./src/Text/ParserCombinators/ByteStringParser.hs 27 - , succeed hunk ./src/Text/ParserCombinators/ByteStringParser.hs 83 --- * Parser Monad - hunk ./src/Text/ParserCombinators/ByteStringParser.hs 87 -mkState :: LB.ByteString -> Int64 -> S -mkState s = case s of - LB.Empty -> S SB.empty s - LB.Chunk x xs -> S x xs - hunk ./src/Text/ParserCombinators/ByteStringParser.hs 98 -(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString -sb +: lb | SB.null sb = lb - | otherwise = LB.Chunk sb lb -{-# INLINE (+:) #-} - hunk ./src/Text/ParserCombinators/ByteStringParser.hs 132 -#ifdef APPLICATIVE_IN_BASE hunk ./src/Text/ParserCombinators/ByteStringParser.hs 139 -#endif hunk ./src/Text/ParserCombinators/ByteStringParser.hs 140 --- | Always succeed. -succeed :: a -> Parser a -succeed = return +mkState :: LB.ByteString -> Int64 -> S +mkState s = case s of + LB.Empty -> S SB.empty s + LB.Chunk x xs -> S x xs + +-- | Turn our chunked representation back into a normal lazy +-- ByteString. +(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString +sb +: lb | SB.null sb = lb + | otherwise = LB.Chunk sb lb +{-# INLINE (+:) #-} hunk ./src/Text/ParserCombinators/ByteStringParser.hs 237 - let bs = sb +: lb - l = LB.length s - (h, t) = LB.splitAt l bs - in if s == h - then Right (s, mkState t (n + l)) - else Left (bs, []) + let bs = sb +: lb + l = LB.length s + (h, t) = LB.splitAt l bs + in if s == h + then Right (s, mkState t (n + l)) + else Left (bs, []) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 248 - let bs = sb +: lb - l = LB.length s - (h, t) = LB.splitAt l bs - in if ls == LB.map toLower h - then Right (s, mkState t (n + l)) - else Left (bs, []) + let bs = sb +: lb + l = LB.length s + (h, t) = LB.splitAt l bs + in if ls == LB.map toLower h + then Right (s, mkState t (n + l)) + else Left (bs, []) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 302 - then case unParser (nextChunk >> takeWhile p) s of + then case unParser (nextChunk *> takeWhile p) s of hunk ./src/Text/ParserCombinators/ByteStringParser.hs 371 - -> (LB.ByteString, Either ParseError a) + -> (LB.ByteString, Either ParseError (a, Int64)) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 375 - Right (a, S sb lb _) -> (sb +: lb, Right a) + Right (a, S sb lb n') -> (sb +: lb, Right (a, n')) hunk ./src/Text/ParserCombinators/ByteStringParser.hs 383 -parse p bs = parseAt p bs 0 +parse p bs = case parseAt p bs 0 of + (bs', Right (a, _)) -> (bs', Right a) + (bs', Left err) -> (bs', Left err) hunk ./bytestringparser.cabal 32 - ghc-options: -O2 -Wall -Werror + ghc-options: -O2 -Wall -Werror -funbox-strict-fields hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 1 +{-# LANGUAGE BangPatterns #-} + hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 18 + -- * Data type hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 20 + -- * Construction hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 22 + -- * Lookup hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 24 + , member8 + -- * Debugging + , fromSet hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 29 -import Data.ByteString.Char8 as SB -import Data.ByteString.Internal as SB -import Data.ByteString.Unsafe as SB +import qualified Data.ByteString as B +-- import Data.ByteString.Char8 (pack) +import qualified Data.ByteString.Internal as I +import qualified Data.ByteString.Unsafe as U +import Data.Word (Word8) +import Foreign.Storable (peekByteOff, pokeByteOff) + +data FastSet = Sorted { fromSet :: {-# UNPACK #-} !B.ByteString } + | Table { fromSet :: {-# UNPACK #-} !B.ByteString } + deriving (Eq, Ord) hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 40 -newtype FastSet = FastSet SB.ByteString - deriving (Eq, Ord, Show) +instance Show FastSet where + show (Sorted s) = "FastSet " ++ show s + show (Table t) = "FastSet " ++ fromTable t hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 44 -set :: SB.ByteString -> FastSet -set = FastSet . SB.sort +-- | The lower bound on the size of a lookup table. We choose this to +-- balance table density against performance. +tableCutoff :: Int +tableCutoff = 32 hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 49 +-- | Create a character set. +set :: B.ByteString -> FastSet +set s | B.length s < tableCutoff = Sorted . B.sort $ s + | otherwise = Table . mkTable $ s + +-- | Check the table for membership. hunk ./src/Text/ParserCombinators/ByteStringParser/FastSet.hs 56 -member c (FastSet s) = search 0 (SB.length s - 1) - where w = SB.c2w c - search lo hi | hi < lo = False - | otherwise = - let mid = (lo + hi) `div` 2 - cur = SB.unsafeIndex s mid - in case compare cur w of - GT -> search lo (mid - 1) - LT -> search (mid + 1) hi - _ -> True +member c = member8 (I.c2w c) + +-- | Check the table for membership. +member8 :: Word8 -> FastSet -> Bool +member8 w (Table t) = U.unsafeIndex t (fromIntegral w) == entry +member8 w (Sorted s) = search 0 (B.length s - 1) + where search !lo !hi + | hi < lo = False + | otherwise = + let mid = (lo + hi) `div` 2 + in case compare w (U.unsafeIndex s mid) of + GT -> search lo (mid - 1) + LT -> search (mid + 1) hi + _ -> True + +-- | The value in a table that indicates that a character is not +-- present. We avoid NUL to make the table representation printable. +noEntry :: Word8 +noEntry = 0x5f + +-- | The value in a table that indicates that a character is present. +-- We use a printable character for readability. +entry :: Word8 +entry = 0x21 + +mkTable :: B.ByteString -> B.ByteString +mkTable s = I.unsafeCreate 256 $ \t -> do + I.memset t noEntry 256 + U.unsafeUseAsCStringLen s $ \(p, l) -> + let loop n | n == l = return () + | otherwise = do + c <- peekByteOff p n :: IO Word8 + pokeByteOff t (fromIntegral c) entry + loop (n + 1) + in loop 0 + +-- | Turn the table representation into a string, for debugging. +fromTable :: B.ByteString -> String +fromTable = snd . B.foldr go (0xff, []) + where go c (!n, cs) | c == noEntry = (n - 1, cs) + | otherwise = (n - 1, I.w2c n:cs) hunk ./bytestringparser.cabal 2 -version: 0.2.1 +version: 0.2.2 move ./src/Text ./src/Data hunk ./bytestringparser.cabal 29 - exposed-modules: Text.ParserCombinators.ByteStringParser - Text.ParserCombinators.ByteStringParser.FastSet + exposed-modules: Data.ParserCombinators.ByteStringParser + Data.ParserCombinators.ByteStringParser.FastSet hunk ./src/Data/ParserCombinators/ByteStringParser/FastSet.hs 5 --- Module : Text.ParserCombinators.ByteStringParser.FastSet +-- Module : Data.ParserCombinators.ByteStringParser.FastSet hunk ./src/Data/ParserCombinators/ByteStringParser/FastSet.hs 16 -module Text.ParserCombinators.ByteStringParser.FastSet +module Data.ParserCombinators.ByteStringParser.FastSet hunk ./src/Data/ParserCombinators/ByteStringParser/FastSet.hs 23 - , member - , member8 + , memberChar + , memberWord8 hunk ./src/Data/ParserCombinators/ByteStringParser/FastSet.hs 55 -member :: Char -> FastSet -> Bool -member c = member8 (I.c2w c) +memberChar :: Char -> FastSet -> Bool +memberChar c = memberWord8 (I.c2w c) hunk ./src/Data/ParserCombinators/ByteStringParser/FastSet.hs 59 -member8 :: Word8 -> FastSet -> Bool -member8 w (Table t) = U.unsafeIndex t (fromIntegral w) == entry -member8 w (Sorted s) = search 0 (B.length s - 1) +memberWord8 :: Word8 -> FastSet -> Bool +memberWord8 w (Table t) = U.unsafeIndex t (fromIntegral w) == entry +memberWord8 w (Sorted s) = search 0 (B.length s - 1) hunk ./src/Data/ParserCombinators/ByteStringParser.hs 3 --- Module : Text.ParserCombinators.ByteStringParser +-- Module : Data.ParserCombinators.ByteStringParser hunk ./src/Data/ParserCombinators/ByteStringParser.hs 15 -module Text.ParserCombinators.ByteStringParser +module Data.ParserCombinators.ByteStringParser hunk ./src/Data/ParserCombinators/ByteStringParser.hs 70 -import Control.Applicative (Alternative(..), Applicative(..), (<$>), (<*), (*>)) +import Control.Applicative + (Alternative(..), Applicative(..), (<$>), (<*), (*>)) hunk ./src/Data/ParserCombinators/ByteStringParser.hs 79 -import Text.ParserCombinators.ByteStringParser.FastSet (FastSet, member, set) +import Data.ParserCombinators.ByteStringParser.FastSet + (FastSet, memberChar, set) hunk ./src/Data/ParserCombinators/ByteStringParser.hs 222 -inClass s = (`member` myset) +inClass s = (`memberChar` myset) addfile ./src/Data/ParserCombinators/ByteStringParser/Internal.hs hunk ./bytestringparser.cabal 31 + Data.ParserCombinators.ByteStringParser.Internal hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 1 +----------------------------------------------------------------------------- +-- | +-- Module : Data.ParserCombinators.ByteStringParser.Internal +-- Copyright : Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient parser combinators for lazy 'LB.ByteString' +-- strings, loosely based on 'Text.ParserCombinators.Parsec'. +-- +----------------------------------------------------------------------------- +module Data.ParserCombinators.ByteStringParser.Internal + ( + -- * Parser + ParseError + , Parser + + -- * Running parsers + , parse + , parseAt + , parseTest + + -- * Combinators + , () + + -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@) + , try + , manyTill + , eof + , skipMany + , skipMany1 + , count + , lookAhead + , peek + , sepBy + , sepBy1 + + -- * Things like in @Parsec.Char@ + , satisfy + , letter + , digit + , anyChar + , space + , char + , notChar + , string + , stringCI + + -- * Parser converters. + , eitherP + + -- * Miscellaneous functions. + , getInput + , getConsumed + , takeWhile + , takeWhile1 + , takeTill + , takeAll + , skipWhile + , skipSpace + , notEmpty + , match + , inClass + , notInClass + ) where + +import Control.Applicative + (Alternative(..), Applicative(..), (<$>), (<*), (*>)) +import Control.Monad (MonadPlus(..), ap, liftM2) +import Control.Monad.Fix (MonadFix(..)) +import qualified Data.ByteString.Char8 as SB +import qualified Data.ByteString.Lazy.Char8 as LB +import qualified Data.ByteString.Lazy.Internal as LB +import Data.Char (isDigit, isLetter, isSpace, toLower) +import Data.Int (Int64) +import Data.ParserCombinators.ByteStringParser.FastSet + (FastSet, memberChar, set) +import Prelude hiding (takeWhile) + +type ParseError = String + +data S = S {-# UNPACK #-} !SB.ByteString + LB.ByteString + {-# UNPACK #-} !Int64 + +newtype Parser a = Parser { + unParser :: S -> Either (LB.ByteString, [String]) (a, S) + } + +instance Functor Parser where + fmap f p = + Parser $ \s -> + case unParser p s of + Right (a, s') -> Right (f a, s') + Left err -> Left err + +instance Monad Parser where + return a = Parser $ \s -> Right (a, s) + m >>= f = Parser $ \s -> + case unParser m s of + Right (a, s') -> unParser (f a) s' + Left (s', msgs) -> Left (s', msgs) + fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err]) + +instance MonadFix Parser where + mfix f = Parser $ \s -> + let r = case r of + Right (a, _) -> unParser (f a) s + err -> err + in r + +zero :: Parser a +zero = Parser $ \(S sb lb _) -> Left (sb +: lb, []) +{-# INLINE zero #-} + +plus :: Parser a -> Parser a -> Parser a +plus p1 p2 = + Parser $ \s@(S sb lb _) -> + case unParser p1 s of + Left (_, msgs1) -> + case unParser p2 s of + Left (_, msgs2) -> Left (sb +: lb, (msgs1 ++ msgs2)) + ok -> ok + ok -> ok +{-# INLINE plus #-} + +instance MonadPlus Parser where + mzero = zero + mplus = plus + +instance Applicative Parser where + pure = return + (<*>) = ap + +instance Alternative Parser where + empty = zero + (<|>) = plus + +mkState :: LB.ByteString -> Int64 -> S +mkState s = case s of + LB.Empty -> S SB.empty s + LB.Chunk x xs -> S x xs + +-- | Turn our chunked representation back into a normal lazy +-- ByteString. +(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString +sb +: lb | SB.null sb = lb + | otherwise = LB.Chunk sb lb +{-# INLINE (+:) #-} + +infix 0 + +-- | Name the parser. +() :: Parser a -> String -> Parser a +p msg = + Parser $ \s@(S sb lb _) -> + case unParser p s of + (Left _) -> Left (sb +: lb, [msg]) + ok -> ok +{-# INLINE () #-} + +nextChunk :: Parser () +nextChunk = Parser $ \(S _ lb n) -> + case lb of + LB.Chunk sb' lb' -> Right ((), S sb' lb' n) + LB.Empty -> Left (lb, []) + +-- | Get remaining input. +getInput :: Parser LB.ByteString +getInput = Parser $ \s@(S sb lb _) -> Right (sb +: lb, s) + +-- | Get number of bytes consumed so far. +getConsumed :: Parser Int64 +getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s) + +-- | Character parser. +satisfy :: (Char -> Bool) -> Parser Char +satisfy p = + Parser $ \s@(S sb lb n) -> + case SB.uncons sb of + Just (c, sb') | p c -> Right (c, S sb' lb (n + 1)) + | otherwise -> Left (sb +: lb, []) + Nothing -> unParser (nextChunk >> satisfy p) s +{-# INLINE satisfy #-} + +letter :: Parser Char +letter = satisfy isLetter "letter" +{-# INLINE letter #-} + +digit :: Parser Char +digit = satisfy isDigit "digit" +{-# INLINE digit #-} + +anyChar :: Parser Char +anyChar = satisfy $ const True +{-# INLINE anyChar #-} + +space :: Parser Char +space = satisfy isSpace "space" +{-# INLINE space #-} + +-- | Satisfy a specific character. +char :: Char -> Parser Char +char c = satisfy (== c) [c] +{-# INLINE char #-} + +-- | Satisfy a specific character. +notChar :: Char -> Parser Char +notChar c = satisfy (/= c) "not " ++ [c] +{-# INLINE notChar #-} + +charClass :: String -> FastSet +charClass = set . SB.pack . go + where go (a:'-':b:xs) = [a..b] ++ go xs + go (x:xs) = x : go xs + go _ = "" + +inClass :: String -> Char -> Bool +inClass s = (`memberChar` myset) + where myset = charClass s +{-# INLINE inClass #-} + +notInClass :: String -> Char -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +sepBy :: Parser a -> Parser s -> Parser [a] +sepBy p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) <|> return [] + +sepBy1 :: Parser a -> Parser s -> Parser [a] +sepBy1 p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) + +-- | Satisfy a literal string. +string :: LB.ByteString -> Parser LB.ByteString +string s = Parser $ \(S sb lb n) -> + let bs = sb +: lb + l = LB.length s + (h, t) = LB.splitAt l bs + in if s == h + then Right (s, mkState t (n + l)) + else Left (bs, []) +{-# INLINE string #-} + +-- | Satisfy a literal string, ignoring case. +stringCI :: LB.ByteString -> Parser LB.ByteString +stringCI s = Parser $ \(S sb lb n) -> + let bs = sb +: lb + l = LB.length s + (h, t) = LB.splitAt l bs + in if ls == LB.map toLower h + then Right (s, mkState t (n + l)) + else Left (bs, []) + where ls = LB.map toLower s +{-# INLINE stringCI #-} + +-- | Apply the given parser repeatedly, returning every parse result. +count :: Int -> Parser a -> Parser [a] +count n p = sequence (replicate n p) +{-# INLINE count #-} + +try :: Parser a -> Parser a +try p = Parser $ \s@(S sb lb _) -> + case unParser p s of + Left (_, msgs) -> Left (sb +: lb, msgs) + ok -> ok + +-- | Detect 'end of file'. +eof :: Parser () +eof = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb + then Right ((), s) + else Left (sb +: lb, ["EOF"]) + +takeAll :: Parser LB.ByteString +takeAll = Parser $ \(S sb lb n) -> + let bs = sb +: lb + in Right (bs, mkState LB.empty (n + LB.length bs)) + +oneChunk :: SB.ByteString -> LB.ByteString +oneChunk s = LB.Chunk s LB.Empty + +length64 :: SB.ByteString -> Int64 +length64 = fromIntegral . SB.length + +-- | Consume characters while the predicate is true. +takeWhile :: (Char -> Bool) -> Parser LB.ByteString +takeWhile p = Parser $ \s@(S sb lb n) -> + let (h, t) = SB.span p sb + in if SB.null t + then unParser ((h +:) <$> (nextChunk *> takeWhile p)) s + else Right (oneChunk h, S t lb (n + length64 h)) +{-# INLINE takeWhile #-} + +takeTill :: (Char -> Bool) -> Parser LB.ByteString +takeTill p = takeWhile (not . p) <* satisfy p +{-# INLINE takeTill #-} + +takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString +takeWhile1 p = Parser $ \s@(S sb lb n) -> + let (h, t) = SB.span p sb + in if SB.null t + then case unParser (nextChunk *> takeWhile p) s of + Left err -> Left err + Right (xs, s') -> + let bs = h +: xs + in if LB.null bs + then Left (sb +: lb, []) + else Right (bs, s') + else Right (oneChunk h, S t lb (n + length64 h)) +{-# INLINE takeWhile1 #-} + +-- | Skip over characters while the predicate is true. +skipWhile :: (Char -> Bool) -> Parser () +skipWhile p = takeWhile p >> return () +{-# INLINE skipWhile #-} + +-- | Skip over white space. +skipSpace :: Parser () +skipSpace = takeWhile isSpace >> return () +{-# INLINE skipSpace #-} + +manyTill :: Parser a -> Parser b -> Parser [a] +manyTill p end = scan + where scan = (end >> return []) <|> liftM2 (:) p scan + +-- |'skipMany' - skip zero or many instances of the parser +skipMany :: Parser a -> Parser () +skipMany p = scan + where scan = (p >> scan) <|> return () + +-- |'skipMany1' - skip one or many instances of the parser +skipMany1 :: Parser a -> Parser () +skipMany1 p = p >> skipMany p + +-- | Test that a parser returned a non-null ByteString. +notEmpty :: Parser LB.ByteString -> Parser LB.ByteString +notEmpty p = Parser $ \s -> + case unParser p s of + o@(Right (a, _)) -> + if LB.null a + then Left (a, ["notEmpty"]) + else o + x -> x + +-- | Parse some input with the given parser and return that input +-- without copying it. +match :: Parser a -> Parser LB.ByteString +match p = do bs <- getInput + start <- getConsumed + p + end <- getConsumed + return (LB.take (end - start) bs) + +eitherP :: Parser a -> Parser b -> Parser (Either a b) +eitherP a b = (Left <$> a) <|> (Right <$> b) +{-# INLINE eitherP #-} + +peek :: Parser a -> Parser (Maybe a) +peek p = Parser $ \s -> + case unParser p s of + Right (m, _) -> Right (Just m, s) + _ -> Right (Nothing, s) + +lookAhead :: Parser a -> Parser a +lookAhead p = Parser $ \s -> + case unParser p s of + Right (m, _) -> Right (m, s) + Left (e, bs) -> Left (e, bs) + +parseAt :: Parser a -> LB.ByteString -> Int64 + -> (LB.ByteString, Either ParseError (a, Int64)) +parseAt p bs n = + case unParser p (mkState bs n) of + Left (bs', msg) -> (bs', Left $ showError msg) + Right (a, S sb lb n') -> (sb +: lb, Right (a, n')) + where + showError [msg] = "Parser error, expected:\n" ++ msg ++ "\n" + showError msgs = "Parser error, expected one of:\n" ++ unlines msgs + +-- | Run a parser. +parse :: Parser a -> LB.ByteString + -> (LB.ByteString, Either ParseError a) +parse p bs = case parseAt p bs 0 of + (bs', Right (a, _)) -> (bs', Right a) + (bs', Left err) -> (bs', Left err) + +parseTest :: (Show a) => Parser a -> LB.ByteString -> IO () +parseTest p s = + case parse p s of + (st, Left msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st + (_, Right r) -> print r hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 43 - , letter - , digit hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 44 - , space hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 47 - , stringCI + , stringTransform hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 60 - , skipSpace hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 62 - , inClass - , notInClass hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 68 -import qualified Data.ByteString.Char8 as SB -import qualified Data.ByteString.Lazy.Char8 as LB +import qualified Data.ByteString as SB +import qualified Data.ByteString.Lazy as LB hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 71 -import Data.Char (isDigit, isLetter, isSpace, toLower) hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 72 -import Data.ParserCombinators.ByteStringParser.FastSet - (FastSet, memberChar, set) +import Data.Word (Word8) hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 172 -satisfy :: (Char -> Bool) -> Parser Char +satisfy :: (Word8 -> Bool) -> Parser Word8 hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 181 -letter :: Parser Char -letter = satisfy isLetter "letter" -{-# INLINE letter #-} - -digit :: Parser Char -digit = satisfy isDigit "digit" -{-# INLINE digit #-} - -anyChar :: Parser Char +anyChar :: Parser Word8 hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 185 -space :: Parser Char -space = satisfy isSpace "space" -{-# INLINE space #-} - hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 186 -char :: Char -> Parser Char -char c = satisfy (== c) [c] +char :: Word8 -> Parser Word8 +char c = satisfy (== c) show c hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 191 -notChar :: Char -> Parser Char -notChar c = satisfy (/= c) "not " ++ [c] +notChar :: Word8 -> Parser Word8 +notChar c = satisfy (/= c) "not " ++ show c hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 195 -charClass :: String -> FastSet -charClass = set . SB.pack . go - where go (a:'-':b:xs) = [a..b] ++ go xs - go (x:xs) = x : go xs - go _ = "" - -inClass :: String -> Char -> Bool -inClass s = (`memberChar` myset) - where myset = charClass s -{-# INLINE inClass #-} - -notInClass :: String -> Char -> Bool -notInClass s = not . inClass s -{-# INLINE notInClass #-} - hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 212 --- | Satisfy a literal string, ignoring case. -stringCI :: LB.ByteString -> Parser LB.ByteString -stringCI s = Parser $ \(S sb lb n) -> +-- | Satisfy a literal string, after applying a transformation to both +-- it and the matching text. +stringTransform :: (LB.ByteString -> LB.ByteString) -> LB.ByteString + -> Parser LB.ByteString +stringTransform f s = Parser $ \(S sb lb n) -> hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 220 - in if ls == LB.map toLower h + in if fs == f h hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 223 - where ls = LB.map toLower s -{-# INLINE stringCI #-} + where fs = f s +{-# INLINE stringTransform #-} hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 255 -takeWhile :: (Char -> Bool) -> Parser LB.ByteString +takeWhile :: (Word8 -> Bool) -> Parser LB.ByteString hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 263 -takeTill :: (Char -> Bool) -> Parser LB.ByteString +takeTill :: (Word8 -> Bool) -> Parser LB.ByteString hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 267 -takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString +takeWhile1 :: (Word8 -> Bool) -> Parser LB.ByteString hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 282 -skipWhile :: (Char -> Bool) -> Parser () +skipWhile :: (Word8 -> Bool) -> Parser () hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 286 --- | Skip over white space. -skipSpace :: Parser () -skipSpace = takeWhile isSpace >> return () -{-# INLINE skipSpace #-} - addfile ./src/Data/ParserCombinators/ByteStringParser/Char8.hs hunk ./bytestringparser.cabal 30 + Data.ParserCombinators.ByteStringParser.Char8 hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 1 +----------------------------------------------------------------------------- +-- | +-- Module : Data.ParserCombinators.ByteStringParser.Char8 +-- Copyright : Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient parser combinators for lazy 'LB.ByteString' +-- strings, loosely based on 'Text.ParserCombinators.Parsec'. +-- +----------------------------------------------------------------------------- +module Data.ParserCombinators.ByteStringParser.Char8 + ( + -- * Parser + ParseError + , Parser + + -- * Running parsers + , parse + , parseAt + , parseTest + + -- * Combinators + , () + + -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@) + , try + , manyTill + , eof + , skipMany + , skipMany1 + , count + , lookAhead + , peek + , sepBy + , sepBy1 + + -- * Things like in @Parsec.Char@ + , satisfy + , letter + , digit + , anyChar + , space + , char + , notChar + , string + , stringCI + + -- * Parser converters. + , eitherP + + -- * Miscellaneous functions. + , getInput + , getConsumed + , takeWhile + , takeWhile1 + , takeTill + , takeAll + , skipWhile + , skipSpace + , notEmpty + , match + , inClass + , notInClass + ) where + +import Control.Applicative + (Alternative(..), Applicative(..), (<$>), (<*), (*>)) +import Control.Monad (MonadPlus(..), ap, liftM2) +import Control.Monad.Fix (MonadFix(..)) +import qualified Data.ByteString.Char8 as SB +import qualified Data.ByteString.Lazy.Char8 as LB +import qualified Data.ByteString.Lazy.Internal as LB +import Data.Char (isDigit, isLetter, isSpace, toLower) +import Data.Int (Int64) +import Data.ParserCombinators.ByteStringParser.FastSet + (FastSet, memberChar, set) +import Prelude hiding (takeWhile) + +type ParseError = String + +data S = S {-# UNPACK #-} !SB.ByteString + LB.ByteString + {-# UNPACK #-} !Int64 + +newtype Parser a = Parser { + unParser :: S -> Either (LB.ByteString, [String]) (a, S) + } + +instance Functor Parser where + fmap f p = + Parser $ \s -> + case unParser p s of + Right (a, s') -> Right (f a, s') + Left err -> Left err + +instance Monad Parser where + return a = Parser $ \s -> Right (a, s) + m >>= f = Parser $ \s -> + case unParser m s of + Right (a, s') -> unParser (f a) s' + Left (s', msgs) -> Left (s', msgs) + fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err]) + +instance MonadFix Parser where + mfix f = Parser $ \s -> + let r = case r of + Right (a, _) -> unParser (f a) s + err -> err + in r + +zero :: Parser a +zero = Parser $ \(S sb lb _) -> Left (sb +: lb, []) +{-# INLINE zero #-} + +plus :: Parser a -> Parser a -> Parser a +plus p1 p2 = + Parser $ \s@(S sb lb _) -> + case unParser p1 s of + Left (_, msgs1) -> + case unParser p2 s of + Left (_, msgs2) -> Left (sb +: lb, (msgs1 ++ msgs2)) + ok -> ok + ok -> ok +{-# INLINE plus #-} + +instance MonadPlus Parser where + mzero = zero + mplus = plus + +instance Applicative Parser where + pure = return + (<*>) = ap + +instance Alternative Parser where + empty = zero + (<|>) = plus + +mkState :: LB.ByteString -> Int64 -> S +mkState s = case s of + LB.Empty -> S SB.empty s + LB.Chunk x xs -> S x xs + +-- | Turn our chunked representation back into a normal lazy +-- ByteString. +(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString +sb +: lb | SB.null sb = lb + | otherwise = LB.Chunk sb lb +{-# INLINE (+:) #-} + +infix 0 + +-- | Name the parser. +() :: Parser a -> String -> Parser a +p msg = + Parser $ \s@(S sb lb _) -> + case unParser p s of + (Left _) -> Left (sb +: lb, [msg]) + ok -> ok +{-# INLINE () #-} + +nextChunk :: Parser () +nextChunk = Parser $ \(S _ lb n) -> + case lb of + LB.Chunk sb' lb' -> Right ((), S sb' lb' n) + LB.Empty -> Left (lb, []) + +-- | Get remaining input. +getInput :: Parser LB.ByteString +getInput = Parser $ \s@(S sb lb _) -> Right (sb +: lb, s) + +-- | Get number of bytes consumed so far. +getConsumed :: Parser Int64 +getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s) + +-- | Character parser. +satisfy :: (Char -> Bool) -> Parser Char +satisfy p = + Parser $ \s@(S sb lb n) -> + case SB.uncons sb of + Just (c, sb') | p c -> Right (c, S sb' lb (n + 1)) + | otherwise -> Left (sb +: lb, []) + Nothing -> unParser (nextChunk >> satisfy p) s +{-# INLINE satisfy #-} + +letter :: Parser Char +letter = satisfy isLetter "letter" +{-# INLINE letter #-} + +digit :: Parser Char +digit = satisfy isDigit "digit" +{-# INLINE digit #-} + +anyChar :: Parser Char +anyChar = satisfy $ const True +{-# INLINE anyChar #-} + +space :: Parser Char +space = satisfy isSpace "space" +{-# INLINE space #-} + +-- | Satisfy a specific character. +char :: Char -> Parser Char +char c = satisfy (== c) [c] +{-# INLINE char #-} + +-- | Satisfy a specific character. +notChar :: Char -> Parser Char +notChar c = satisfy (/= c) "not " ++ [c] +{-# INLINE notChar #-} + +charClass :: String -> FastSet +charClass = set . SB.pack . go + where go (a:'-':b:xs) = [a..b] ++ go xs + go (x:xs) = x : go xs + go _ = "" + +inClass :: String -> Char -> Bool +inClass s = (`memberChar` myset) + where myset = charClass s +{-# INLINE inClass #-} + +notInClass :: String -> Char -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +sepBy :: Parser a -> Parser s -> Parser [a] +sepBy p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) <|> return [] + +sepBy1 :: Parser a -> Parser s -> Parser [a] +sepBy1 p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) + +-- | Satisfy a literal string. +string :: LB.ByteString -> Parser LB.ByteString +string s = Parser $ \(S sb lb n) -> + let bs = sb +: lb + l = LB.length s + (h, t) = LB.splitAt l bs + in if s == h + then Right (s, mkState t (n + l)) + else Left (bs, []) +{-# INLINE string #-} + +-- | Satisfy a literal string, ignoring case. +stringCI :: LB.ByteString -> Parser LB.ByteString +stringCI s = Parser $ \(S sb lb n) -> + let bs = sb +: lb + l = LB.length s + (h, t) = LB.splitAt l bs + in if ls == LB.map toLower h + then Right (s, mkState t (n + l)) + else Left (bs, []) + where ls = LB.map toLower s +{-# INLINE stringCI #-} + +-- | Apply the given parser repeatedly, returning every parse result. +count :: Int -> Parser a -> Parser [a] +count n p = sequence (replicate n p) +{-# INLINE count #-} + +try :: Parser a -> Parser a +try p = Parser $ \s@(S sb lb _) -> + case unParser p s of + Left (_, msgs) -> Left (sb +: lb, msgs) + ok -> ok + +-- | Detect 'end of file'. +eof :: Parser () +eof = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb + then Right ((), s) + else Left (sb +: lb, ["EOF"]) + +takeAll :: Parser LB.ByteString +takeAll = Parser $ \(S sb lb n) -> + let bs = sb +: lb + in Right (bs, mkState LB.empty (n + LB.length bs)) + +oneChunk :: SB.ByteString -> LB.ByteString +oneChunk s = LB.Chunk s LB.Empty + +length64 :: SB.ByteString -> Int64 +length64 = fromIntegral . SB.length + +-- | Consume characters while the predicate is true. +takeWhile :: (Char -> Bool) -> Parser LB.ByteString +takeWhile p = Parser $ \s@(S sb lb n) -> + let (h, t) = SB.span p sb + in if SB.null t + then unParser ((h +:) <$> (nextChunk *> takeWhile p)) s + else Right (oneChunk h, S t lb (n + length64 h)) +{-# INLINE takeWhile #-} + +takeTill :: (Char -> Bool) -> Parser LB.ByteString +takeTill p = takeWhile (not . p) <* satisfy p +{-# INLINE takeTill #-} + +takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString +takeWhile1 p = Parser $ \s@(S sb lb n) -> + let (h, t) = SB.span p sb + in if SB.null t + then case unParser (nextChunk *> takeWhile p) s of + Left err -> Left err + Right (xs, s') -> + let bs = h +: xs + in if LB.null bs + then Left (sb +: lb, []) + else Right (bs, s') + else Right (oneChunk h, S t lb (n + length64 h)) +{-# INLINE takeWhile1 #-} + +-- | Skip over characters while the predicate is true. +skipWhile :: (Char -> Bool) -> Parser () +skipWhile p = takeWhile p >> return () +{-# INLINE skipWhile #-} + +-- | Skip over white space. +skipSpace :: Parser () +skipSpace = takeWhile isSpace >> return () +{-# INLINE skipSpace #-} + +manyTill :: Parser a -> Parser b -> Parser [a] +manyTill p end = scan + where scan = (end >> return []) <|> liftM2 (:) p scan + +-- |'skipMany' - skip zero or many instances of the parser +skipMany :: Parser a -> Parser () +skipMany p = scan + where scan = (p >> scan) <|> return () + +-- |'skipMany1' - skip one or many instances of the parser +skipMany1 :: Parser a -> Parser () +skipMany1 p = p >> skipMany p + +-- | Test that a parser returned a non-null ByteString. +notEmpty :: Parser LB.ByteString -> Parser LB.ByteString +notEmpty p = Parser $ \s -> + case unParser p s of + o@(Right (a, _)) -> + if LB.null a + then Left (a, ["notEmpty"]) + else o + x -> x + +-- | Parse some input with the given parser and return that input +-- without copying it. +match :: Parser a -> Parser LB.ByteString +match p = do bs <- getInput + start <- getConsumed + p + end <- getConsumed + return (LB.take (end - start) bs) + +eitherP :: Parser a -> Parser b -> Parser (Either a b) +eitherP a b = (Left <$> a) <|> (Right <$> b) +{-# INLINE eitherP #-} + +peek :: Parser a -> Parser (Maybe a) +peek p = Parser $ \s -> + case unParser p s of + Right (m, _) -> Right (Just m, s) + _ -> Right (Nothing, s) + +lookAhead :: Parser a -> Parser a +lookAhead p = Parser $ \s -> + case unParser p s of + Right (m, _) -> Right (m, s) + Left (e, bs) -> Left (e, bs) + +parseAt :: Parser a -> LB.ByteString -> Int64 + -> (LB.ByteString, Either ParseError (a, Int64)) +parseAt p bs n = + case unParser p (mkState bs n) of + Left (bs', msg) -> (bs', Left $ showError msg) + Right (a, S sb lb n') -> (sb +: lb, Right (a, n')) + where + showError [msg] = "Parser error, expected:\n" ++ msg ++ "\n" + showError msgs = "Parser error, expected one of:\n" ++ unlines msgs + +-- | Run a parser. +parse :: Parser a -> LB.ByteString + -> (LB.ByteString, Either ParseError a) +parse p bs = case parseAt p bs 0 of + (bs', Right (a, _)) -> (bs', Right a) + (bs', Left err) -> (bs', Left err) + +parseTest :: (Show a) => Parser a -> LB.ByteString -> IO () +parseTest p s = + case parse p s of + (st, Left msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st + (_, Right r) -> print r hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 43 - , anyChar - , char - , notChar + , anyWord8 + , word8 + , notWord8 hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 181 -anyChar :: Parser Word8 -anyChar = satisfy $ const True -{-# INLINE anyChar #-} +anyWord8 :: Parser Word8 +anyWord8 = satisfy $ const True +{-# INLINE anyWord8 #-} hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 186 -char :: Word8 -> Parser Word8 -char c = satisfy (== c) show c -{-# INLINE char #-} +word8 :: Word8 -> Parser Word8 +word8 c = satisfy (== c) show c +{-# INLINE word8 #-} hunk ./src/Data/ParserCombinators/ByteStringParser/Internal.hs 191 -notChar :: Word8 -> Parser Word8 -notChar c = satisfy (/= c) "not " ++ show c -{-# INLINE notChar #-} +notWord8 :: Word8 -> Parser Word8 +notWord8 c = satisfy (/= c) "not " ++ show c +{-# INLINE notWord8 #-} hunk ./src/Data/ParserCombinators/ByteStringParser.hs 43 - , letter - , digit - , anyChar - , space - , char - , notChar + , anyWord8 + , word8 + , notWord8 hunk ./src/Data/ParserCombinators/ByteStringParser.hs 47 - , stringCI + , stringTransform hunk ./src/Data/ParserCombinators/ByteStringParser.hs 60 - , skipSpace hunk ./src/Data/ParserCombinators/ByteStringParser.hs 62 - , inClass - , notInClass hunk ./src/Data/ParserCombinators/ByteStringParser.hs 64 -import Control.Applicative - (Alternative(..), Applicative(..), (<$>), (<*), (*>)) -import Control.Monad (MonadPlus(..), ap, liftM2) -import Control.Monad.Fix (MonadFix(..)) -import qualified Data.ByteString.Char8 as SB -import qualified Data.ByteString.Lazy.Char8 as LB -import qualified Data.ByteString.Lazy.Internal as LB -import Data.Char (isDigit, isLetter, isSpace, toLower) -import Data.Int (Int64) -import Data.ParserCombinators.ByteStringParser.FastSet - (FastSet, memberChar, set) +import Data.ParserCombinators.ByteStringParser.Internal hunk ./src/Data/ParserCombinators/ByteStringParser.hs 67 -type ParseError = String - -data S = S {-# UNPACK #-} !SB.ByteString - LB.ByteString - {-# UNPACK #-} !Int64 - -newtype Parser a = Parser { - unParser :: S -> Either (LB.ByteString, [String]) (a, S) - } - -instance Functor Parser where - fmap f p = - Parser $ \s -> - case unParser p s of - Right (a, s') -> Right (f a, s') - Left err -> Left err - -instance Monad Parser where - return a = Parser $ \s -> Right (a, s) - m >>= f = Parser $ \s -> - case unParser m s of - Right (a, s') -> unParser (f a) s' - Left (s', msgs) -> Left (s', msgs) - fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err]) - -instance MonadFix Parser where - mfix f = Parser $ \s -> - let r = case r of - Right (a, _) -> unParser (f a) s - err -> err - in r - -zero :: Parser a -zero = Parser $ \(S sb lb _) -> Left (sb +: lb, []) -{-# INLINE zero #-} - -plus :: Parser a -> Parser a -> Parser a -plus p1 p2 = - Parser $ \s@(S sb lb _) -> - case unParser p1 s of - Left (_, msgs1) -> - case unParser p2 s of - Left (_, msgs2) -> Left (sb +: lb, (msgs1 ++ msgs2)) - ok -> ok - ok -> ok -{-# INLINE plus #-} - -instance MonadPlus Parser where - mzero = zero - mplus = plus - -instance Applicative Parser where - pure = return - (<*>) = ap - -instance Alternative Parser where - empty = zero - (<|>) = plus - -mkState :: LB.ByteString -> Int64 -> S -mkState s = case s of - LB.Empty -> S SB.empty s - LB.Chunk x xs -> S x xs - --- | Turn our chunked representation back into a normal lazy --- ByteString. -(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString -sb +: lb | SB.null sb = lb - | otherwise = LB.Chunk sb lb -{-# INLINE (+:) #-} - -infix 0 - --- | Name the parser. -() :: Parser a -> String -> Parser a -p msg = - Parser $ \s@(S sb lb _) -> - case unParser p s of - (Left _) -> Left (sb +: lb, [msg]) - ok -> ok -{-# INLINE () #-} - -nextChunk :: Parser () -nextChunk = Parser $ \(S _ lb n) -> - case lb of - LB.Chunk sb' lb' -> Right ((), S sb' lb' n) - LB.Empty -> Left (lb, []) - --- | Get remaining input. -getInput :: Parser LB.ByteString -getInput = Parser $ \s@(S sb lb _) -> Right (sb +: lb, s) - --- | Get number of bytes consumed so far. -getConsumed :: Parser Int64 -getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s) - --- | Character parser. -satisfy :: (Char -> Bool) -> Parser Char -satisfy p = - Parser $ \s@(S sb lb n) -> - case SB.uncons sb of - Just (c, sb') | p c -> Right (c, S sb' lb (n + 1)) - | otherwise -> Left (sb +: lb, []) - Nothing -> unParser (nextChunk >> satisfy p) s -{-# INLINE satisfy #-} - -letter :: Parser Char -letter = satisfy isLetter "letter" -{-# INLINE letter #-} - -digit :: Parser Char -digit = satisfy isDigit "digit" -{-# INLINE digit #-} - -anyChar :: Parser Char -anyChar = satisfy $ const True -{-# INLINE anyChar #-} - -space :: Parser Char -space = satisfy isSpace "space" -{-# INLINE space #-} - --- | Satisfy a specific character. -char :: Char -> Parser Char -char c = satisfy (== c) [c] -{-# INLINE char #-} - --- | Satisfy a specific character. -notChar :: Char -> Parser Char -notChar c = satisfy (/= c) "not " ++ [c] -{-# INLINE notChar #-} - -charClass :: String -> FastSet -charClass = set . SB.pack . go - where go (a:'-':b:xs) = [a..b] ++ go xs - go (x:xs) = x : go xs - go _ = "" - -inClass :: String -> Char -> Bool -inClass s = (`memberChar` myset) - where myset = charClass s -{-# INLINE inClass #-} - -notInClass :: String -> Char -> Bool -notInClass s = not . inClass s -{-# INLINE notInClass #-} - -sepBy :: Parser a -> Parser s -> Parser [a] -sepBy p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) <|> return [] - -sepBy1 :: Parser a -> Parser s -> Parser [a] -sepBy1 p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) - --- | Satisfy a literal string. -string :: LB.ByteString -> Parser LB.ByteString -string s = Parser $ \(S sb lb n) -> - let bs = sb +: lb - l = LB.length s - (h, t) = LB.splitAt l bs - in if s == h - then Right (s, mkState t (n + l)) - else Left (bs, []) -{-# INLINE string #-} - --- | Satisfy a literal string, ignoring case. -stringCI :: LB.ByteString -> Parser LB.ByteString -stringCI s = Parser $ \(S sb lb n) -> - let bs = sb +: lb - l = LB.length s - (h, t) = LB.splitAt l bs - in if ls == LB.map toLower h - then Right (s, mkState t (n + l)) - else Left (bs, []) - where ls = LB.map toLower s -{-# INLINE stringCI #-} - --- | Apply the given parser repeatedly, returning every parse result. -count :: Int -> Parser a -> Parser [a] -count n p = sequence (replicate n p) -{-# INLINE count #-} - -try :: Parser a -> Parser a -try p = Parser $ \s@(S sb lb _) -> - case unParser p s of - Left (_, msgs) -> Left (sb +: lb, msgs) - ok -> ok - --- | Detect 'end of file'. -eof :: Parser () -eof = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb - then Right ((), s) - else Left (sb +: lb, ["EOF"]) - -takeAll :: Parser LB.ByteString -takeAll = Parser $ \(S sb lb n) -> - let bs = sb +: lb - in Right (bs, mkState LB.empty (n + LB.length bs)) - -oneChunk :: SB.ByteString -> LB.ByteString -oneChunk s = LB.Chunk s LB.Empty - -length64 :: SB.ByteString -> Int64 -length64 = fromIntegral . SB.length - --- | Consume characters while the predicate is true. -takeWhile :: (Char -> Bool) -> Parser LB.ByteString -takeWhile p = Parser $ \s@(S sb lb n) -> - let (h, t) = SB.span p sb - in if SB.null t - then unParser ((h +:) <$> (nextChunk *> takeWhile p)) s - else Right (oneChunk h, S t lb (n + length64 h)) -{-# INLINE takeWhile #-} - -takeTill :: (Char -> Bool) -> Parser LB.ByteString -takeTill p = takeWhile (not . p) <* satisfy p -{-# INLINE takeTill #-} - -takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString -takeWhile1 p = Parser $ \s@(S sb lb n) -> - let (h, t) = SB.span p sb - in if SB.null t - then case unParser (nextChunk *> takeWhile p) s of - Left err -> Left err - Right (xs, s') -> - let bs = h +: xs - in if LB.null bs - then Left (sb +: lb, []) - else Right (bs, s') - else Right (oneChunk h, S t lb (n + length64 h)) -{-# INLINE takeWhile1 #-} - --- | Skip over characters while the predicate is true. -skipWhile :: (Char -> Bool) -> Parser () -skipWhile p = takeWhile p >> return () -{-# INLINE skipWhile #-} - --- | Skip over white space. -skipSpace :: Parser () -skipSpace = takeWhile isSpace >> return () -{-# INLINE skipSpace #-} - -manyTill :: Parser a -> Parser b -> Parser [a] -manyTill p end = scan - where scan = (end >> return []) <|> liftM2 (:) p scan - --- |'skipMany' - skip zero or many instances of the parser -skipMany :: Parser a -> Parser () -skipMany p = scan - where scan = (p >> scan) <|> return () - --- |'skipMany1' - skip one or many instances of the parser -skipMany1 :: Parser a -> Parser () -skipMany1 p = p >> skipMany p - --- | Test that a parser returned a non-null ByteString. -notEmpty :: Parser LB.ByteString -> Parser LB.ByteString -notEmpty p = Parser $ \s -> - case unParser p s of - o@(Right (a, _)) -> - if LB.null a - then Left (a, ["notEmpty"]) - else o - x -> x - --- | Parse some input with the given parser and return that input --- without copying it. -match :: Parser a -> Parser LB.ByteString -match p = do bs <- getInput - start <- getConsumed - p - end <- getConsumed - return (LB.take (end - start) bs) - -eitherP :: Parser a -> Parser b -> Parser (Either a b) -eitherP a b = (Left <$> a) <|> (Right <$> b) -{-# INLINE eitherP #-} - -peek :: Parser a -> Parser (Maybe a) -peek p = Parser $ \s -> - case unParser p s of - Right (m, _) -> Right (Just m, s) - _ -> Right (Nothing, s) - -lookAhead :: Parser a -> Parser a -lookAhead p = Parser $ \s -> - case unParser p s of - Right (m, _) -> Right (m, s) - Left (e, bs) -> Left (e, bs) - -parseAt :: Parser a -> LB.ByteString -> Int64 - -> (LB.ByteString, Either ParseError (a, Int64)) -parseAt p bs n = - case unParser p (mkState bs n) of - Left (bs', msg) -> (bs', Left $ showError msg) - Right (a, S sb lb n') -> (sb +: lb, Right (a, n')) - where - showError [msg] = "Parser error, expected:\n" ++ msg ++ "\n" - showError msgs = "Parser error, expected one of:\n" ++ unlines msgs - --- | Run a parser. -parse :: Parser a -> LB.ByteString - -> (LB.ByteString, Either ParseError a) -parse p bs = case parseAt p bs 0 of - (bs', Right (a, _)) -> (bs', Right a) - (bs', Left err) -> (bs', Left err) - -parseTest :: (Show a) => Parser a -> LB.ByteString -> IO () -parseTest p s = - case parse p s of - (st, Left msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st - (_, Right r) -> print r - hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 70 -import Control.Applicative - (Alternative(..), Applicative(..), (<$>), (<*), (*>)) -import Control.Monad (MonadPlus(..), ap, liftM2) -import Control.Monad.Fix (MonadFix(..)) +import Control.Applicative ((<$>)) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 73 -import qualified Data.ByteString.Lazy.Internal as LB +import Data.ByteString.Internal (w2c) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 75 -import Data.Int (Int64) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 77 +import qualified Data.ParserCombinators.ByteStringParser.Internal as I +import Data.ParserCombinators.ByteStringParser.Internal + (Parser, ParseError, (), parse, parseAt, parseTest, try, manyTill, eof, + skipMany, skipMany1, count, lookAhead, peek, sepBy, sepBy1, string, + eitherP, getInput, getConsumed, takeAll, notEmpty, match) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 84 -type ParseError = String - -data S = S {-# UNPACK #-} !SB.ByteString - LB.ByteString - {-# UNPACK #-} !Int64 - -newtype Parser a = Parser { - unParser :: S -> Either (LB.ByteString, [String]) (a, S) - } - -instance Functor Parser where - fmap f p = - Parser $ \s -> - case unParser p s of - Right (a, s') -> Right (f a, s') - Left err -> Left err - -instance Monad Parser where - return a = Parser $ \s -> Right (a, s) - m >>= f = Parser $ \s -> - case unParser m s of - Right (a, s') -> unParser (f a) s' - Left (s', msgs) -> Left (s', msgs) - fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err]) - -instance MonadFix Parser where - mfix f = Parser $ \s -> - let r = case r of - Right (a, _) -> unParser (f a) s - err -> err - in r - -zero :: Parser a -zero = Parser $ \(S sb lb _) -> Left (sb +: lb, []) -{-# INLINE zero #-} - -plus :: Parser a -> Parser a -> Parser a -plus p1 p2 = - Parser $ \s@(S sb lb _) -> - case unParser p1 s of - Left (_, msgs1) -> - case unParser p2 s of - Left (_, msgs2) -> Left (sb +: lb, (msgs1 ++ msgs2)) - ok -> ok - ok -> ok -{-# INLINE plus #-} - -instance MonadPlus Parser where - mzero = zero - mplus = plus - -instance Applicative Parser where - pure = return - (<*>) = ap - -instance Alternative Parser where - empty = zero - (<|>) = plus - -mkState :: LB.ByteString -> Int64 -> S -mkState s = case s of - LB.Empty -> S SB.empty s - LB.Chunk x xs -> S x xs - --- | Turn our chunked representation back into a normal lazy --- ByteString. -(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString -sb +: lb | SB.null sb = lb - | otherwise = LB.Chunk sb lb -{-# INLINE (+:) #-} - -infix 0 - --- | Name the parser. -() :: Parser a -> String -> Parser a -p msg = - Parser $ \s@(S sb lb _) -> - case unParser p s of - (Left _) -> Left (sb +: lb, [msg]) - ok -> ok -{-# INLINE () #-} - -nextChunk :: Parser () -nextChunk = Parser $ \(S _ lb n) -> - case lb of - LB.Chunk sb' lb' -> Right ((), S sb' lb' n) - LB.Empty -> Left (lb, []) - --- | Get remaining input. -getInput :: Parser LB.ByteString -getInput = Parser $ \s@(S sb lb _) -> Right (sb +: lb, s) - --- | Get number of bytes consumed so far. -getConsumed :: Parser Int64 -getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s) - hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 86 -satisfy p = - Parser $ \s@(S sb lb n) -> - case SB.uncons sb of - Just (c, sb') | p c -> Right (c, S sb' lb (n + 1)) - | otherwise -> Left (sb +: lb, []) - Nothing -> unParser (nextChunk >> satisfy p) s +satisfy p = w2c <$> I.satisfy (p . w2c) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 130 -sepBy :: Parser a -> Parser s -> Parser [a] -sepBy p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) <|> return [] - -sepBy1 :: Parser a -> Parser s -> Parser [a] -sepBy1 p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) - --- | Satisfy a literal string. -string :: LB.ByteString -> Parser LB.ByteString -string s = Parser $ \(S sb lb n) -> - let bs = sb +: lb - l = LB.length s - (h, t) = LB.splitAt l bs - in if s == h - then Right (s, mkState t (n + l)) - else Left (bs, []) -{-# INLINE string #-} - hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 132 -stringCI s = Parser $ \(S sb lb n) -> - let bs = sb +: lb - l = LB.length s - (h, t) = LB.splitAt l bs - in if ls == LB.map toLower h - then Right (s, mkState t (n + l)) - else Left (bs, []) - where ls = LB.map toLower s +stringCI = I.stringTransform (LB.map toLower) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 135 --- | Apply the given parser repeatedly, returning every parse result. -count :: Int -> Parser a -> Parser [a] -count n p = sequence (replicate n p) -{-# INLINE count #-} - -try :: Parser a -> Parser a -try p = Parser $ \s@(S sb lb _) -> - case unParser p s of - Left (_, msgs) -> Left (sb +: lb, msgs) - ok -> ok - --- | Detect 'end of file'. -eof :: Parser () -eof = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb - then Right ((), s) - else Left (sb +: lb, ["EOF"]) - -takeAll :: Parser LB.ByteString -takeAll = Parser $ \(S sb lb n) -> - let bs = sb +: lb - in Right (bs, mkState LB.empty (n + LB.length bs)) - -oneChunk :: SB.ByteString -> LB.ByteString -oneChunk s = LB.Chunk s LB.Empty - -length64 :: SB.ByteString -> Int64 -length64 = fromIntegral . SB.length - hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 137 -takeWhile p = Parser $ \s@(S sb lb n) -> - let (h, t) = SB.span p sb - in if SB.null t - then unParser ((h +:) <$> (nextChunk *> takeWhile p)) s - else Right (oneChunk h, S t lb (n + length64 h)) +takeWhile p = I.takeWhile (p . w2c) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 141 -takeTill p = takeWhile (not . p) <* satisfy p +takeTill p = I.takeTill (p . w2c) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 145 -takeWhile1 p = Parser $ \s@(S sb lb n) -> - let (h, t) = SB.span p sb - in if SB.null t - then case unParser (nextChunk *> takeWhile p) s of - Left err -> Left err - Right (xs, s') -> - let bs = h +: xs - in if LB.null bs - then Left (sb +: lb, []) - else Right (bs, s') - else Right (oneChunk h, S t lb (n + length64 h)) +takeWhile1 p = I.takeWhile1 (p . w2c) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 150 -skipWhile p = takeWhile p >> return () +skipWhile p = I.skipWhile (p . w2c) hunk ./src/Data/ParserCombinators/ByteStringParser/Char8.hs 158 -manyTill :: Parser a -> Parser b -> Parser [a] -manyTill p end = scan - where scan = (end >> return []) <|> liftM2 (:) p scan - --- |'skipMany' - skip zero or many instances of the parser -skipMany :: Parser a -> Parser () -skipMany p = scan - where scan = (p >> scan) <|> return () - --- |'skipMany1' - skip one or many instances of the parser -skipMany1 :: Parser a -> Parser () -skipMany1 p = p >> skipMany p - --- | Test that a parser returned a non-null ByteString. -notEmpty :: Parser LB.ByteString -> Parser LB.ByteString -notEmpty p = Parser $ \s -> - case unParser p s of - o@(Right (a, _)) -> - if LB.null a - then Left (a, ["notEmpty"]) - else o - x -> x - --- | Parse some input with the given parser and return that input --- without copying it. -match :: Parser a -> Parser LB.ByteString -match p = do bs <- getInput - start <- getConsumed - p - end <- getConsumed - return (LB.take (end - start) bs) - -eitherP :: Parser a -> Parser b -> Parser (Either a b) -eitherP a b = (Left <$> a) <|> (Right <$> b) -{-# INLINE eitherP #-} - -peek :: Parser a -> Parser (Maybe a) -peek p = Parser $ \s -> - case unParser p s of - Right (m, _) -> Right (Just m, s) - _ -> Right (Nothing, s) - -lookAhead :: Parser a -> Parser a -lookAhead p = Parser $ \s -> - case unParser p s of - Right (m, _) -> Right (m, s) - Left (e, bs) -> Left (e, bs) - -parseAt :: Parser a -> LB.ByteString -> Int64 - -> (LB.ByteString, Either ParseError (a, Int64)) -parseAt p bs n = - case unParser p (mkState bs n) of - Left (bs', msg) -> (bs', Left $ showError msg) - Right (a, S sb lb n') -> (sb +: lb, Right (a, n')) - where - showError [msg] = "Parser error, expected:\n" ++ msg ++ "\n" - showError msgs = "Parser error, expected one of:\n" ++ unlines msgs - --- | Run a parser. -parse :: Parser a -> LB.ByteString - -> (LB.ByteString, Either ParseError a) -parse p bs = case parseAt p bs 0 of - (bs', Right (a, _)) -> (bs', Right a) - (bs', Left err) -> (bs', Left err) - -parseTest :: (Show a) => Parser a -> LB.ByteString -> IO () -parseTest p s = - case parse p s of - (st, Left msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st - (_, Right r) -> print r - move ./src/Data/ParserCombinators/ByteStringParser ./src/Data/ParserCombinators/Attoparsec move ./src/Data/ParserCombinators/ByteStringParser.hs ./src/Data/ParserCombinators/Attoparsec.hs hunk ./bytestringparser.cabal 29 - exposed-modules: Data.ParserCombinators.ByteStringParser - Data.ParserCombinators.ByteStringParser.Char8 - Data.ParserCombinators.ByteStringParser.FastSet - Data.ParserCombinators.ByteStringParser.Internal + exposed-modules: Data.ParserCombinators.Attoparsec + Data.ParserCombinators.Attoparsec.Char8 + Data.ParserCombinators.Attoparsec.FastSet + Data.ParserCombinators.Attoparsec.Internal hunk ./src/Data/ParserCombinators/Attoparsec/Char8.hs 3 --- Module : Data.ParserCombinators.ByteStringParser.Char8 +-- Module : Data.ParserCombinators.Attoparsec.Char8 hunk ./src/Data/ParserCombinators/Attoparsec/Char8.hs 15 -module Data.ParserCombinators.ByteStringParser.Char8 +module Data.ParserCombinators.Attoparsec.Char8 hunk ./src/Data/ParserCombinators/Attoparsec/Char8.hs 75 -import Data.ParserCombinators.ByteStringParser.FastSet +import Data.ParserCombinators.Attoparsec.FastSet hunk ./src/Data/ParserCombinators/Attoparsec/Char8.hs 77 -import qualified Data.ParserCombinators.ByteStringParser.Internal as I -import Data.ParserCombinators.ByteStringParser.Internal +import qualified Data.ParserCombinators.Attoparsec.Internal as I +import Data.ParserCombinators.Attoparsec.Internal hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 5 --- Module : Data.ParserCombinators.ByteStringParser.FastSet +-- Module : Data.ParserCombinators.Attoparsec.FastSet hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 16 -module Data.ParserCombinators.ByteStringParser.FastSet +module Data.ParserCombinators.Attoparsec.FastSet hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 62 - where search !lo !hi + where search lo hi hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 95 - where go c (!n, cs) | c == noEntry = (n - 1, cs) - | otherwise = (n - 1, I.w2c n:cs) + where go c (n, cs) | c == noEntry = flip (,) cs $! n - 1 + | otherwise = flip (,) (I.w2c n:cs) $! n - 1 hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 3 --- Module : Data.ParserCombinators.ByteStringParser.Internal +-- Module : Data.ParserCombinators.Attoparsec.Internal hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 15 -module Data.ParserCombinators.ByteStringParser.Internal +module Data.ParserCombinators.Attoparsec.Internal hunk ./src/Data/ParserCombinators/Attoparsec.hs 3 --- Module : Data.ParserCombinators.ByteStringParser +-- Module : Data.ParserCombinators.Attoparsec hunk ./src/Data/ParserCombinators/Attoparsec.hs 15 -module Data.ParserCombinators.ByteStringParser +module Data.ParserCombinators.Attoparsec hunk ./src/Data/ParserCombinators/Attoparsec.hs 64 -import Data.ParserCombinators.ByteStringParser.Internal +import Data.ParserCombinators.Attoparsec.Internal hunk ./bytestringparser.cabal 2 -version: 0.2.2 +version: 0.3 hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 13 --- Fast 8-bit character set membership. +-- Fast set membership tests for 'Word8' and 8-bit 'Char' values. The +-- set representation is unboxed for efficiency. For sets of fewer +-- than 32 elements, we test for membership using a binary search. +-- For larger sets, we use a lookup table. hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 52 --- | Create a character set. +-- | Create a set. hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 57 --- | Check the table for membership. -memberChar :: Char -> FastSet -> Bool -memberChar c = memberWord8 (I.c2w c) - --- | Check the table for membership. +-- | Check the set for membership. hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 70 +-- | Check the set for membership. Only works with 8-bit characters: +-- characters above code point 255 will give wrong answers. +memberChar :: Char -> FastSet -> Bool +memberChar c = memberWord8 (I.c2w c) + hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 32 +import Data.Bits ((.&.), (.|.), shiftR) hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 34 --- import Data.ByteString.Char8 (pack) hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 44 - show (Sorted s) = "FastSet " ++ show s - show (Table t) = "FastSet " ++ fromTable t + show _ = "FastSet" hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 49 -tableCutoff = 32 +tableCutoff = 8 hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 58 -memberWord8 w (Table t) = U.unsafeIndex t (fromIntegral w) == entry +memberWord8 w (Table t) = + let i = fromIntegral w + in U.unsafeIndex t (i `shiftR` 3) .&. fromIntegral (i .&. 7) == 1 hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 76 --- | The value in a table that indicates that a character is not --- present. We avoid NUL to make the table representation printable. -noEntry :: Word8 -noEntry = 0x5f - --- | The value in a table that indicates that a character is present. --- We use a printable character for readability. -entry :: Word8 -entry = 0x21 - hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 77 -mkTable s = I.unsafeCreate 256 $ \t -> do - I.memset t noEntry 256 +mkTable s = I.unsafeCreate 32 $ \t -> do + I.memset t 0 32 hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 83 - pokeByteOff t (fromIntegral c) entry + let i = fromIntegral c + o = i `shiftR` 3 + prev <- peekByteOff t o + pokeByteOff t o (prev .|. (i .&. 7)) hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 90 --- | Turn the table representation into a string, for debugging. -fromTable :: B.ByteString -> String -fromTable = snd . B.foldr go (0xff, []) - where go c (n, cs) | c == noEntry = flip (,) cs $! n - 1 - | otherwise = flip (,) (I.w2c n:cs) $! n - 1 - hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 64 -import Control.Applicative - (Alternative(..), Applicative(..), (<$>), (<*), (*>)) +import Control.Applicative (Alternative(..), Applicative(..), (<$>)) hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 69 -import qualified Data.ByteString.Lazy.Internal as LB +import qualified Data.ByteString.Lazy.Internal as I hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 135 - LB.Empty -> S SB.empty s - LB.Chunk x xs -> S x xs + I.Empty -> S SB.empty s + I.Chunk x xs -> S x xs hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 142 - | otherwise = LB.Chunk sb lb + | otherwise = I.Chunk sb lb hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 159 - LB.Chunk sb' lb' -> Right ((), S sb' lb' n) - LB.Empty -> Left (lb, []) + I.Chunk sb' lb' -> Right ((), S sb' lb' n) + I.Empty -> Left (lb, []) hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 247 -oneChunk :: SB.ByteString -> LB.ByteString -oneChunk s = LB.Chunk s LB.Empty - -length64 :: SB.ByteString -> Int64 -length64 = fromIntegral . SB.length - hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 249 -takeWhile p = Parser $ \s@(S sb lb n) -> - let (h, t) = SB.span p sb - in if SB.null t - then unParser ((h +:) <$> (nextChunk *> takeWhile p)) s - else Right (oneChunk h, S t lb (n + length64 h)) +takeWhile p = + Parser $ \(S sb lb n) -> + case LB.span p (sb +: lb) of + (h,t) -> Right (h, mkState t (n + LB.length h)) hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 256 -takeTill p = takeWhile (not . p) <* satisfy p +takeTill p = + Parser $ \(S sb lb n) -> + case LB.span (not . p) (sb +: lb) of + (h,t) | LB.null t -> Left (h, []) + | otherwise -> Right (h, mkState t (n + LB.length h)) hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 264 -takeWhile1 p = Parser $ \s@(S sb lb n) -> - let (h, t) = SB.span p sb - in if SB.null t - then case unParser (nextChunk *> takeWhile p) s of - Left err -> Left err - Right (xs, s') -> - let bs = h +: xs - in if LB.null bs - then Left (sb +: lb, []) - else Right (bs, s') - else Right (oneChunk h, S t lb (n + length64 h)) +takeWhile1 p = + Parser $ \(S sb lb n) -> + case LB.span p (sb +: lb) of + (h,t) | LB.null h -> Left (t, []) + | otherwise -> Right (h, mkState t (n + LB.length h)) hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 331 + showError [""] = "Parser error\n" hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 333 + showError [] = "Parser error\n" move ./bytestringparser.cabal ./attoparsec.cabal hunk ./attoparsec.cabal 1 -name: bytestringparser -version: 0.3 +name: attoparsec +version: 0.4 hunk ./attoparsec.cabal 34 - ghc-options: -O2 -Wall -Werror -funbox-strict-fields + ghc-options: -O2 -Wall -funbox-strict-fields + -fliberate-case-threshold=1000 hunk ./attoparsec.cabal 10 +build-type: Simple hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 33 +import qualified Data.ByteString.Char8 as B8 hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 45 - show _ = "FastSet" + show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s) + show (Table _) = "FastSet Table" hunk ./src/Data/ParserCombinators/Attoparsec/FastSet.hs 69 - GT -> search lo (mid - 1) - LT -> search (mid + 1) hi + GT -> search (mid + 1) hi + LT -> search lo (mid - 1) hunk ./attoparsec.cabal 29 + build-depends: bytestring-lexing >= 0.2 + hunk ./src/Data/ParserCombinators/Attoparsec.hs 31 + , many + , many1 hunk ./src/Data/ParserCombinators/Attoparsec/Char8.hs 31 + , many + , many1 hunk ./src/Data/ParserCombinators/Attoparsec/Char8.hs 57 + -- * Numeric parsers. + , int + , integer + , double + hunk ./src/Data/ParserCombinators/Attoparsec/Char8.hs 75 + , endOfLine hunk ./src/Data/ParserCombinators/Attoparsec/Char8.hs 89 - eitherP, getInput, getConsumed, takeAll, notEmpty, match) + eitherP, getInput, getConsumed, takeAll, notEmpty, match, endOfLine, + setInput, many, many1) +import Data.ByteString.Lex.Lazy.Double (readDouble) hunk ./src/Data/ParserCombinators/Attoparsec/Char8.hs 167 + +numeric :: (LB.ByteString -> Maybe (a,LB.ByteString)) -> Parser a +numeric f = do + s <- getInput + case f s of + Nothing -> fail "integer" + Just (i,s') -> setInput s' >> return i + +-- | Parse an integer. The position counter is not updated. +int :: Parser Int +int = numeric LB.readInt + +-- | Parse an integer. The position counter is not updated. +integer :: Parser Integer +integer = numeric LB.readInteger + +-- | Parse a Double. The position counter is not updated. +double :: Parser Double +double = numeric readDouble hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 31 + , many + , many1 hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 57 + , setInput hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 65 + , endOfLine hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 73 -import qualified Data.ByteString.Lazy.Internal as I +import qualified Data.ByteString.Lazy.Char8 as L8 +import qualified Data.ByteString.Unsafe as U +import qualified Data.ByteString.Internal as I +import qualified Data.ByteString.Lazy.Internal as LB hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 83 +-- State invariants: +-- * If the strict bytestring is empty, the entire input is considered +-- to be empty. +-- * Otherwise, the strict bytestring must not be empty. hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 146 - I.Empty -> S SB.empty s - I.Chunk x xs -> S x xs + LB.Empty -> S SB.empty s + LB.Chunk x xs -> S x xs hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 153 - | otherwise = I.Chunk sb lb + | otherwise = LB.Chunk sb lb hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 170 - I.Chunk sb' lb' -> Right ((), S sb' lb' n) - I.Empty -> Left (lb, []) + LB.Chunk sb' lb' -> Right ((), S sb' lb' n) + LB.Empty -> Left (lb, []) hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 177 +-- | Set the remaining input. +setInput :: LB.ByteString -> Parser () +setInput bs = Parser $ \(S _ _ n) -> Right ((), mkState bs n) + hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 220 - (h, t) = LB.splitAt l bs + (h,t) = LB.splitAt l bs hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 226 +endOfLine :: Parser () +endOfLine = Parser $ \(S sb lb n) -> + let bs = sb +: lb + in case I.w2c (U.unsafeHead sb) of + '\n' -> Right ((), mkState (LB.tail bs) (n + 1)) + '\r' -> let (h,t) = LB.splitAt 2 bs + rn = L8.pack "\r\n" + in if h == rn + then Right ((), mkState t (n + 2)) + else Right ((), mkState (LB.tail bs) (n + 1)) + _ -> Left (bs, ["EOL"]) + hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 285 - case LB.span (not . p) (sb +: lb) of + case LB.break p (sb +: lb) of hunk ./src/Data/ParserCombinators/Attoparsec/Internal.hs 307 +many :: Parser a -> Parser [a] +many p = ((:) <$> p <*> many p) <|> return [] + +many1 :: Parser a -> Parser [a] +many1 p = (:) <$> p <*> many p + hunk ./attoparsec.cabal 2 -version: 0.4 +version: 0.5 hunk ./attoparsec.cabal 7 +maintainer: Bryan O'Sullivan hunk ./attoparsec.cabal 9 -synopsis: Combinator parsing with Data.ByteString.Lazy +synopsis: Fast combinator parsing with Data.ByteString.Lazy +description: Fast combinator parsing with Data.ByteString.Lazy