{-|
Core functionality for processing new line delimited data

In the context of creating cohorts from event lines,
the functionality herein may be useful when prefiltering events
in order to reduce the size of the input data.
For example, if the cohort is consists only of females,
one could run a prefilter to remove any groups (subjects in this case)
of event lines for males.
-}
{-# LANGUAGE DeriveGeneric     #-}
{-# LANGUAGE OverloadedStrings #-}
module Hasklepias.LineFilterApp.ProcessLines.Logic
  ( -- * Processing newline delimited data
    --
    -- $processAppLines
    processAppLinesStrict
  , processAppLinesLazy
  , LineProcessor(..)
  , LineStatus(..)
  , lineAppErrorMessage
  ) where

import           Blammo.Logging
import qualified Data.ByteString            as BS
import           Data.ByteString.Builder
import qualified Data.ByteString.Char8      as BSC
import qualified Data.ByteString.Lazy       as BL
import qualified Data.ByteString.Lazy.Char8 as BLC
import           Data.Int
import           GHC.Generics

{-
INTERNAL
The type used in the processAppLines* functions to handle errors
-}
type LineAppMonad = Either LineAppError

{-
INTERNAL
The type of errors in a line processing application
-}
data LineAppError =
    LineParseErrorA Int -- ^ indicates a failure of the @t -> Maybe a@ function
  | LineParseErrorID Int -- ^ indicates a failure of the @t -> Maybe id@ function
  deriving LineAppError -> LineAppError -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: LineAppError -> LineAppError -> Bool
$c/= :: LineAppError -> LineAppError -> Bool
== :: LineAppError -> LineAppError -> Bool
$c== :: LineAppError -> LineAppError -> Bool
Eq

instance Show LineAppError where
  show :: LineAppError -> String
show (LineParseErrorA Int
i) = String
"Line " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
i forall a. Semigroup a => a -> a -> a
<> String
": failed to decode line"
  show (LineParseErrorID Int
i) =
    String
"Line " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
i forall a. Semigroup a => a -> a -> a
<> String
": failed to decode identifier"

-- | Create a @'Blammo.Logging.Message'@ from a @'LineAppError'@.
lineAppErrorMessage :: LineAppError -> Message
lineAppErrorMessage :: LineAppError -> Message
lineAppErrorMessage (LineParseErrorA Int
n) =
  Text
"Failed to decode line" Text -> [SeriesElem] -> Message
:# [Key
"line-number" forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Int
n]
lineAppErrorMessage (LineParseErrorID Int
n) =
  Text
"Failed to decode identifier" Text -> [SeriesElem] -> Message
:# [Key
"line-number" forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Int
n]

{-
INTERNAL
Run a parser then a predicate,
returning `Nothing` if the parsing failed.
-}
parseThenPredicate :: (t -> Maybe a) -> (a -> Bool) -> t -> Maybe (a, Bool)
parseThenPredicate :: forall t a. (t -> Maybe a) -> (a -> Bool) -> t -> Maybe (a, Bool)
parseThenPredicate t -> Maybe a
psl a -> Bool
prd t
x = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> Bool
prd) forall a b. (a -> b) -> a -> b
$ (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> t -> Maybe a
psl t
x forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> t -> Maybe a
psl t
x

{-
INTERNAL
Function that lifts a @Maybe (a, Bool)@ to the App monad.
The `Int` input is the line number passed in the case of an error.
-}
handleLineParse :: Int -> Maybe (a, Bool) -> LineAppMonad (a, Bool)
handleLineParse :: forall a. Int -> Maybe (a, Bool) -> LineAppMonad (a, Bool)
handleLineParse Int
i Maybe (a, Bool)
x = case Maybe (a, Bool)
x of
  Maybe (a, Bool)
Nothing     -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ Int -> LineAppError
LineParseErrorA Int
i
  Just (a
x, Bool
b) -> forall a b. b -> Either a b
Right (a
x, Bool
b)


{-
INTERNAL
A data type containing functions used in the line processing functions.
The purpose of this type is be able to create versions
of the process* functions for both (e.g.)
strict and lazy bytestrings
without creating multiple versions of the same logic.
The index type for strict ByteString is Int,
but for lazy ByteString is Int64.
Hence we run into the issue of needing
functions specific to each string type.
I (BS) could not find a typeclass
that provides the functionality needed.
-}
data LineFunctions t i = MkLineFunctions
  { forall t i. LineFunctions t i -> t -> Bool
isEmpty     :: t -> Bool
  , forall t i. LineFunctions t i -> t -> Maybe i
findNewLine :: t -> Maybe i
  , forall t i. LineFunctions t i -> t -> i -> Maybe i -> t
takeSubset  :: t -> i -> Maybe i -> t
  , forall t i. LineFunctions t i -> t -> Builder
build       :: t -> Builder
  , forall t i. LineFunctions t i -> Builder -> t
runBuilder  :: Builder -> t
  }

lineFunctionsStrict :: LineFunctions BS.ByteString Int
lineFunctionsStrict :: LineFunctions ByteString Int
lineFunctionsStrict = MkLineFunctions
  { isEmpty :: ByteString -> Bool
isEmpty     = ByteString -> Bool
BS.null
  , takeSubset :: ByteString -> Int -> Maybe Int -> ByteString
takeSubset  = \ByteString
x Int
i Maybe Int
n -> case Maybe Int
n of
                    Maybe Int
Nothing -> Int -> ByteString -> ByteString
BS.drop Int
i ByteString
x
                    Just Int
j  -> Int -> ByteString -> ByteString
BS.take Int
j (Int -> ByteString -> ByteString
BS.drop Int
i ByteString
x)
  , findNewLine :: ByteString -> Maybe Int
findNewLine = Char -> ByteString -> Maybe Int
BSC.elemIndex Char
'\n'
  , build :: ByteString -> Builder
build       = ByteString -> Builder
byteString
  , runBuilder :: Builder -> ByteString
runBuilder  = ByteString -> ByteString
BL.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString
  }

lineFunctionsLazy :: LineFunctions BL.ByteString Int64
lineFunctionsLazy :: LineFunctions ByteString Int64
lineFunctionsLazy = MkLineFunctions
  { isEmpty :: ByteString -> Bool
isEmpty     = ByteString -> Bool
BL.null
  , takeSubset :: ByteString -> Int64 -> Maybe Int64 -> ByteString
takeSubset  = \ByteString
x Int64
i Maybe Int64
n -> case Maybe Int64
n of
                    Maybe Int64
Nothing -> Int64 -> ByteString -> ByteString
BL.drop Int64
i ByteString
x
                    Just Int64
j  -> Int64 -> ByteString -> ByteString
BL.take Int64
j (Int64 -> ByteString -> ByteString
BL.drop Int64
i ByteString
x)
  , findNewLine :: ByteString -> Maybe Int64
findNewLine = Char -> ByteString -> Maybe Int64
BLC.elemIndex Char
'\n'
  , build :: ByteString -> Builder
build       = ByteString -> Builder
lazyByteString
  , runBuilder :: Builder -> ByteString
runBuilder  = Builder -> ByteString
toLazyByteString
  }

{-------------------------------------------------------------------------------
   Across-group fold ("application") logic

This application processes streams of newline delimited text at two levels:

(1) Groups of lines defined by a parser.
For example, if the input is event lines data,
the application groups the input events by subject ID.

(2) Processing each group

-------------------------------------------------------------------------------}

{-
INTERNAL
Data tracking the state of the application.

NOTE:
The `Maybe` of the `lastLineID` is used in two senses of `Nothing`:
* as the the "start" of processing
* as an error in the parsing of a group identifier from a line
-}
data AppLines id i = MkAppLines
  { forall id i. AppLines id i -> Maybe id
lastLineID  :: Maybe id -- ^ the group ID at the previous line
  , forall id i. AppLines id i -> i
grpStart    :: i -- ^  the index at which the current group started
  , forall id i. AppLines id i -> Bool
grpStatus   :: Bool -- ^ the predicate status of a group
  , forall id i. AppLines id i -> Maybe Builder
grpAcc      :: Maybe Builder -- ^ group-level accumulator
  , forall id i. AppLines id i -> Maybe i
lastNewLine :: Maybe i -- ^ the index of the last new line
  , forall id i. AppLines id i -> Builder
builderAcc  :: Builder -- ^ the accumulated results as a ByteString Builder
  , forall id i. AppLines id i -> AppLinesLog
lineLog     :: AppLinesLog -- ^ a logger
  }

{-|
Log information about a line processing application.
-}
data AppLinesLog = MkAppLinesLog
  { AppLinesLog -> Int
groupsProcessed :: !Int -- ^ count of groups processed
  , AppLinesLog -> Int
groupsKept      :: !Int -- ^ count of groups dropped
  , AppLinesLog -> Int
linesProcessed  :: !Int -- ^ count of lines processed
  }
  deriving (AppLinesLog -> AppLinesLog -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: AppLinesLog -> AppLinesLog -> Bool
$c/= :: AppLinesLog -> AppLinesLog -> Bool
== :: AppLinesLog -> AppLinesLog -> Bool
$c== :: AppLinesLog -> AppLinesLog -> Bool
Eq, Int -> AppLinesLog -> ShowS
[AppLinesLog] -> ShowS
AppLinesLog -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AppLinesLog] -> ShowS
$cshowList :: [AppLinesLog] -> ShowS
show :: AppLinesLog -> String
$cshow :: AppLinesLog -> String
showsPrec :: Int -> AppLinesLog -> ShowS
$cshowsPrec :: Int -> AppLinesLog -> ShowS
Show, forall x. Rep AppLinesLog x -> AppLinesLog
forall x. AppLinesLog -> Rep AppLinesLog x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep AppLinesLog x -> AppLinesLog
$cfrom :: forall x. AppLinesLog -> Rep AppLinesLog x
Generic)

-- INTERNAL
incrementGroupCount :: AppLinesLog -> AppLinesLog
incrementGroupCount :: AppLinesLog -> AppLinesLog
incrementGroupCount (MkAppLinesLog Int
a Int
b Int
c) = Int -> Int -> Int -> AppLinesLog
MkAppLinesLog (Int
a forall a. Num a => a -> a -> a
+ Int
1) Int
b Int
c

-- INTERNAL
incrementGroupKept :: AppLinesLog -> AppLinesLog
incrementGroupKept :: AppLinesLog -> AppLinesLog
incrementGroupKept (MkAppLinesLog Int
a Int
b Int
c) = Int -> Int -> Int -> AppLinesLog
MkAppLinesLog Int
a (Int
b forall a. Num a => a -> a -> a
+ Int
1) Int
c

{-
INTERNAL
A type to avoid boolean blindness in processAppLinesInternal
-}
data GroupChange = SameGroup | NewGroup

checkGroupChange :: (Eq id) => id -> id -> GroupChange
checkGroupChange :: forall id. Eq id => id -> id -> GroupChange
checkGroupChange id
x id
y = if id
x forall a. Eq a => a -> a -> Bool
== id
y then GroupChange
SameGroup else GroupChange
NewGroup

{-|
This type's two functions are used in the @processAppLines*@ functions
to allow a developer to specify the logic of how to tranform lines.
The first function is the "transformer" which defines how to tranform the
@a@ into the output @b@ type,
allowing to specify whether to drop or keep a line using the @`LineStatus`@ type.
In the case the line is to be kept,
the second function, the "builder",
creates the line to be output
from the line identifer and the value of type @b@.
-}
data LineProcessor a b id =
    NoTransformation -- ^ Do not transform lines
  | TransformWith (a -> LineStatus b) -- ^ the transformer
                    ((id, b) -> Builder) -- ^ the builder

{-|
A type used to indicate whether to drop or keep a line
in the transformer function of a @'LineProcessor'@.
-}
data LineStatus b =
      DropLine -- ^ indicates a line should be dropped from the output
    | KeepLine b  -- ^ includes a line should be kept in the output
  deriving (LineStatus b -> LineStatus b -> Bool
forall b. Eq b => LineStatus b -> LineStatus b -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: LineStatus b -> LineStatus b -> Bool
$c/= :: forall b. Eq b => LineStatus b -> LineStatus b -> Bool
== :: LineStatus b -> LineStatus b -> Bool
$c== :: forall b. Eq b => LineStatus b -> LineStatus b -> Bool
Eq, Int -> LineStatus b -> ShowS
forall b. Show b => Int -> LineStatus b -> ShowS
forall b. Show b => [LineStatus b] -> ShowS
forall b. Show b => LineStatus b -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [LineStatus b] -> ShowS
$cshowList :: forall b. Show b => [LineStatus b] -> ShowS
show :: LineStatus b -> String
$cshow :: forall b. Show b => LineStatus b -> String
showsPrec :: Int -> LineStatus b -> ShowS
$cshowsPrec :: forall b. Show b => Int -> LineStatus b -> ShowS
Show)

instance Functor LineStatus where
  fmap :: forall a b. (a -> b) -> LineStatus a -> LineStatus b
fmap a -> b
f LineStatus a
DropLine     = forall b. LineStatus b
DropLine
  fmap a -> b
f (KeepLine a
x) = forall b. b -> LineStatus b
KeepLine (a -> b
f a
x)

{-
INTERNAL
The core recursive logic of processing the lines across groups.
The function "rolls" over the *single string* of new line delimited data
updating an `AppLines` value as it encounters new line characters
within that string.

IMPORTANT
All the lines for each group are assumed to be contiguous.

See @'processAppLinesStrict'@ for a description of arguments.

-}
processAppLinesInternal
  :: (Eq id, Show id, Num i, Monoid t, Show t)
  => LineFunctions t i
  -> (t -> Maybe id)
  -> (t -> Maybe a)
  -> (a -> Bool)
  -> LineProcessor a b id
  -> AppLines id i
  -> t
  -> LineAppMonad (AppLines id i)
processAppLinesInternal :: forall id i t a b.
(Eq id, Show id, Num i, Monoid t, Show t) =>
LineFunctions t i
-> (t -> Maybe id)
-> (t -> Maybe a)
-> (a -> Bool)
-> LineProcessor a b id
-> AppLines id i
-> t
-> LineAppMonad (AppLines id i)
processAppLinesInternal LineFunctions t i
fs t -> Maybe id
pri t -> Maybe a
psl a -> Bool
prd LineProcessor a b id
pro AppLines id i
status t
x =
  case forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a. Num a => a -> a -> a
+ i
1) (forall id i. AppLines id i -> Maybe i
lastNewLine AppLines id i
status) of
    -- If no new line then we're done!
    -- Simply update the accumulator for the last group.
    Maybe i
Nothing -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ AppLines id i
status
      { builderAcc :: Builder
builderAcc = if forall id i. AppLines id i -> Bool
grpStatus AppLines id i
status
                       then forall {id}. AppLines id i -> Maybe i -> Builder
updateAcc AppLines id i
status forall a. Maybe a
Nothing
                       else forall id i. AppLines id i -> Builder
builderAcc AppLines id i
status
      , lineLog :: AppLinesLog
lineLog    = if forall id i. AppLines id i -> Bool
grpStatus AppLines id i
status
                       then AppLinesLog -> AppLinesLog
incrementGroupKept (forall id i. AppLines id i -> AppLinesLog
lineLog AppLines id i
status)
                       else forall id i. AppLines id i -> AppLinesLog
lineLog AppLines id i
status
      }
    -- Otherwise take the index immediately after the newline character as `i`
    Just i
i -> do

      let getTail :: Maybe i -> t
getTail   = forall t i. LineFunctions t i -> t -> i -> Maybe i -> t
takeSubset LineFunctions t i
fs t
x i
i
      -- Is there another newline character after `i`?
          nl :: Maybe i
nl        = forall t i. LineFunctions t i -> t -> Maybe i
findNewLine LineFunctions t i
fs (Maybe i -> t
getTail forall a. Maybe a
Nothing)
          thisLine :: t
thisLine  = Maybe i -> t
getTail Maybe i
nl
          lineCount :: Int
lineCount = AppLinesLog -> Int
linesProcessed (forall id i. AppLines id i -> AppLinesLog
lineLog AppLines id i
status) forall a. Num a => a -> a -> a
+ Int
1
      -- always update the lastNewLine and count
          newStatus :: AppLines id i
newStatus = AppLines id i
status
            { lastNewLine :: Maybe i
lastNewLine = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (i
i forall a. Num a => a -> a -> a
+) Maybe i
nl
            , lineLog :: AppLinesLog
lineLog     = (forall id i. AppLines id i -> AppLinesLog
lineLog AppLines id i
status) { linesProcessed :: Int
linesProcessed = Int
lineCount }
            }

      if forall t i. LineFunctions t i -> t -> Bool
isEmpty LineFunctions t i
fs t
thisLine
        then AppLines id i -> LineAppMonad (AppLines id i)
go AppLines id i
newStatus

        -- When the ID has not changed,
        -- just update the index of the last newline.
        -- When the ID does change,
        -- then process the group for the last ID
        -- and update the ID in the accumulator
        else case t -> Maybe id
pri t
thisLine of
          Maybe id
Nothing -> forall a b. a -> Either a b
Left (Int -> LineAppError
LineParseErrorID Int
lineCount)
          Just id
thisLineID ->
            case
                ( forall id. Eq id => id -> id -> GroupChange
checkGroupChange (forall a. a -> Maybe a
Just id
thisLineID) (forall id i. AppLines id i -> Maybe id
lastLineID AppLines id i
status)
                , forall id i. AppLines id i -> Bool
grpStatus AppLines id i
status
                )
              of
              -- ID hasn't changed, predicate already satisfied ==> keep going
                (GroupChange
SameGroup, Bool
True) ->
                  Int -> t -> LineAppMonad (a, Bool)
checkLine Int
lineCount t
thisLine
                    forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (\(a
x, Bool
_) -> AppLines id i -> LineAppMonad (AppLines id i)
go AppLines id i
newStatus
                          { grpAcc :: Maybe Builder
grpAcc = forall {id} {i}. AppLines id i -> a -> id -> Maybe Builder
updateGrp AppLines id i
status a
x id
thisLineID
                          }
                        )
                -- ID hasn't changed, predicate not satisfied ==>
                -- update status with this line
                (GroupChange
SameGroup, Bool
False) ->
                  Int -> t -> LineAppMonad (a, Bool)
checkLine Int
lineCount t
thisLine
                    forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (\(a
x, Bool
b) -> AppLines id i -> LineAppMonad (AppLines id i)
go AppLines id i
newStatus
                          { grpStatus :: Bool
grpStatus = Bool
b
                          , grpAcc :: Maybe Builder
grpAcc    = forall {id} {i}. AppLines id i -> a -> id -> Maybe Builder
updateGrp AppLines id i
status a
x id
thisLineID
                          }
                        )
                -- ID has changed, predicate satisfied ==>
                -- add last group to builder and update
                (GroupChange
NewGroup, Bool
True) ->
                  Int -> t -> LineAppMonad (a, Bool)
checkLine Int
lineCount t
thisLine
                    forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (\(a
x, Bool
b) -> AppLines id i -> LineAppMonad (AppLines id i)
go AppLines id i
newStatus
                          { lastLineID :: Maybe id
lastLineID = forall a. a -> Maybe a
Just id
thisLineID
                          , grpStatus :: Bool
grpStatus  = Bool
b
                          , grpStart :: i
grpStart   = i
i
                          , grpAcc :: Maybe Builder
grpAcc     = forall a. LineStatus a -> Maybe a
toAcc forall a b. (a -> b) -> a -> b
$ a -> id -> LineStatus Builder
processLine a
x id
thisLineID
                          , builderAcc :: Builder
builderAcc = forall {id}. AppLines id i -> Maybe i -> Builder
updateAcc AppLines id i
status (forall a. a -> Maybe a
Just i
i)
                          , lineLog :: AppLinesLog
lineLog = (AppLinesLog -> AppLinesLog
incrementGroupKept forall b c a. (b -> c) -> (a -> b) -> a -> c
. AppLinesLog -> AppLinesLog
incrementGroupCount)
                                        (forall id i. AppLines id i -> AppLinesLog
lineLog AppLines id i
newStatus)
                          }
                        )

                -- ID has changed, predicate not satisfied ==>
                -- reset group accumulator and update
                (GroupChange
NewGroup, Bool
False) ->
                  Int -> t -> LineAppMonad (a, Bool)
checkLine Int
lineCount t
thisLine
                    forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (\(a
x, Bool
b) -> AppLines id i -> LineAppMonad (AppLines id i)
go AppLines id i
newStatus
                          { lastLineID :: Maybe id
lastLineID = forall a. a -> Maybe a
Just id
thisLineID
                          , grpStart :: i
grpStart   = i
i
                          , grpStatus :: Bool
grpStatus  = Bool
b
                          , grpAcc :: Maybe Builder
grpAcc     = forall a. LineStatus a -> Maybe a
toAcc forall a b. (a -> b) -> a -> b
$ a -> id -> LineStatus Builder
processLine a
x id
thisLineID
                          , lineLog :: AppLinesLog
lineLog    = AppLinesLog -> AppLinesLog
incrementGroupCount (forall id i. AppLines id i -> AppLinesLog
lineLog AppLines id i
newStatus)
                          }
                        )
 where
  go :: AppLines id i -> LineAppMonad (AppLines id i)
go = forall a b c. (a -> b -> c) -> b -> a -> c
flip (forall id i t a b.
(Eq id, Show id, Num i, Monoid t, Show t) =>
LineFunctions t i
-> (t -> Maybe id)
-> (t -> Maybe a)
-> (a -> Bool)
-> LineProcessor a b id
-> AppLines id i
-> t
-> LineAppMonad (AppLines id i)
processAppLinesInternal LineFunctions t i
fs t -> Maybe id
pri t -> Maybe a
psl a -> Bool
prd LineProcessor a b id
pro) t
x
  checkLine :: Int -> t -> LineAppMonad (a, Bool)
checkLine Int
i t
x = forall a. Int -> Maybe (a, Bool) -> LineAppMonad (a, Bool)
handleLineParse Int
i forall a b. (a -> b) -> a -> b
$ forall t a. (t -> Maybe a) -> (a -> Bool) -> t -> Maybe (a, Bool)
parseThenPredicate t -> Maybe a
psl a -> Bool
prd t
x

  -- The function that processes a line depends on whether the user
  -- provided a line processor argument.
  -- processLine :: a -> id -> LineStatus Builder
  processLine :: a -> id -> LineStatus Builder
processLine a
x id
y = case LineProcessor a b id
pro of
    TransformWith a -> LineStatus b
transformLine (id, b) -> Builder
buildLine ->
      forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\b
v -> (id, b) -> Builder
buildLine (id
y, b
v)) (a -> LineStatus b
transformLine a
x)
    LineProcessor a b id
NoTransformation -> forall b. LineStatus b
DropLine

  -- Cast a @LineStatus@ to a @Maybe@
  toAcc :: LineStatus a -> Maybe a
  toAcc :: forall a. LineStatus a -> Maybe a
toAcc LineStatus a
DropLine     = forall a. Maybe a
Nothing
  toAcc (KeepLine a
x) = forall a. a -> Maybe a
Just a
x

  -- A helper function to update the group accumulator,
  -- whose logic depends on whether a LineProcessor is provided.
  -- updateGrp :: AppLines id a -> a -> id1 ->  Maybe Builder
  updateGrp :: AppLines id i -> a -> id -> Maybe Builder
updateGrp AppLines id i
status a
line id
grpId = case LineProcessor a b id
pro of
    TransformWith a -> LineStatus b
_ (id, b) -> Builder
_ -> case a -> id -> LineStatus Builder
processLine a
line id
grpId of
      -- keep going if line is to be dropped
      LineStatus Builder
DropLine    -> forall id i. AppLines id i -> Maybe Builder
grpAcc AppLines id i
status
      -- handle the case that the accumulator may be @Nothing@
      -- and needs to be initialized
      KeepLine Builder
bu -> case forall id i. AppLines id i -> Maybe Builder
grpAcc AppLines id i
status of
        Maybe Builder
Nothing  -> forall a. a -> Maybe a
Just Builder
bu
        Just Builder
acc -> forall a. a -> Maybe a
Just (Builder
acc forall a. Semigroup a => a -> a -> a
<> Char -> Builder
char8 Char
'\n' forall a. Semigroup a => a -> a -> a
<> Builder
bu)
    LineProcessor a b id
NoTransformation -> forall a. Maybe a
Nothing

  -- A helper function to update the main accumulator,
  -- whose logic depends on whether a LineProcessor is provided.
  -- updateAcc :: AppLines id i -> Maybe i -> Builder
  updateAcc :: AppLines id i -> Maybe i -> Builder
updateAcc AppLines id i
status Maybe i
i = case LineProcessor a b id
pro of
    TransformWith a -> LineStatus b
_ (id, b) -> Builder
_ -> case forall id i. AppLines id i -> Maybe Builder
grpAcc AppLines id i
status of
      Maybe Builder
Nothing  -> forall id i. AppLines id i -> Builder
builderAcc AppLines id i
status
      Just Builder
grp -> forall id i. AppLines id i -> Builder
builderAcc AppLines id i
status forall a. Semigroup a => a -> a -> a
<> Builder
grp forall a. Semigroup a => a -> a -> a
<> Char -> Builder
char8 Char
'\n'
    LineProcessor a b id
NoTransformation -> forall id i. AppLines id i -> Builder
builderAcc AppLines id i
status forall a. Semigroup a => a -> a -> a
<> forall t i. LineFunctions t i -> t -> Builder
build
      LineFunctions t i
fs
      (forall t i. LineFunctions t i -> t -> i -> Maybe i -> t
takeSubset LineFunctions t i
fs t
x (forall id i. AppLines id i -> i
grpStart AppLines id i
status) (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\i
z -> i
z forall a. Num a => a -> a -> a
- forall id i. AppLines id i -> i
grpStart AppLines id i
status) Maybe i
i))
{-# INLINE processAppLinesInternal #-}

{-
INTERNAL
The function used to create a processAppLines* function
targeted for a specific type.

See @'processAppLinesStrict'@ for a description of arguments.
-}
processAppLines
  :: (Eq id, Show id, Num i, Monoid t, Show i, Show t)
  => LineFunctions t i
  -> (t -> Maybe id)
  -> (t -> Maybe a)
  -> (a -> Bool)
  -> LineProcessor a b id
  -> t
  -> LineAppMonad t
processAppLines :: forall id i t a b.
(Eq id, Show id, Num i, Monoid t, Show i, Show t) =>
LineFunctions t i
-> (t -> Maybe id)
-> (t -> Maybe a)
-> (a -> Bool)
-> LineProcessor a b id
-> t
-> LineAppMonad t
processAppLines LineFunctions t i
fs t -> Maybe id
pri t -> Maybe a
psl a -> Bool
prd LineProcessor a b id
pro t
x =
  let result :: LineAppMonad (AppLines id i)
result = forall id i t a b.
(Eq id, Show id, Num i, Monoid t, Show t) =>
LineFunctions t i
-> (t -> Maybe id)
-> (t -> Maybe a)
-> (a -> Bool)
-> LineProcessor a b id
-> AppLines id i
-> t
-> LineAppMonad (AppLines id i)
processAppLinesInternal
        LineFunctions t i
fs
        t -> Maybe id
pri
        t -> Maybe a
psl
        a -> Bool
prd
        LineProcessor a b id
pro
        (forall id i.
Maybe id
-> i
-> Bool
-> Maybe Builder
-> Maybe i
-> Builder
-> AppLinesLog
-> AppLines id i
MkAppLines forall a. Maybe a
Nothing
                    i
0
                    Bool
False
                    forall a. Monoid a => a
mempty
                    (forall a. a -> Maybe a
Just (-i
1))
                    forall a. Monoid a => a
mempty
                    (Int -> Int -> Int -> AppLinesLog
MkAppLinesLog Int
0 Int
0 Int
0)
        )
        t
x
  in  forall t i. LineFunctions t i -> Builder -> t
runBuilder LineFunctions t i
fs forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall id i. AppLines id i -> Builder
builderAcc forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> LineAppMonad (AppLines id i)
result
{-# INLINE processAppLines #-}

{- $processAppLines

The @processAppLines*@ functions conceptually splits a string
into groups identified by an indentifier parsed from each line,
and then check whether any line
within a group satisfies a predicate condition
and (optionally) transforms and possibly drops each line.
When no @'LineProcessor'@ logic is supplied,
a single string is returned
containing the lines from all groups
for which the predicate is satisfied.
If @'LineProcessor'@ logic is supplied,
then a single string is returned
containing the lines from all groups
for which the predicate is satisfied
AND those lines set to be kept by the @'LineProcessor'@ logic.

IMPORTANT:
All the lines for each group should be contiguous in the input.

Two variants of @processAppLines*@ are available:

* 'processAppLinesStrict' works on strict 'BS.ByteString'.
This version is generally faster,
but requires that the entire input be loaded into memory.
* 'processAppLinesLazy' works on lazy 'BL.ByteString'.

These functions are difficult to demostrate succinctly.
For a complete example,
see the source code in @Hasklepias.AppBuilder.ProcessLines.Tests@.

The logic of the @'LineProcessor'@ works in conjunction with the filtering logic
supplied as an argument.
For example, a group that has no line satisfying the given predicate
will have no output,
even if the status of all the groups lines is @KeepLine@.
On the other hand,
the logic of the tranformer function could specify to drop
lines that satisfy the predicate,
In short, developers can flexibly specify the logic of their application.

Related functions include:

* @'Hasklepias.AppBuilder.LineFilterApp.makeLineFilterApp'@:
exposes the filtering logic of @'processAppLinesStrict'@ as an @IO ()@.

-}

-- | Process a strict 'BS.ByteString'.
processAppLinesStrict
  :: (Eq id, Show id)
  => (BS.ByteString -> Maybe id) -- ^ parser of a group identifier from a line
  -> (BS.ByteString -> Maybe a) -- ^ parser of an @a@ from a line
  -> (a -> Bool) -- ^ predicate to apply to each line
  -> LineProcessor a b id -- ^ an optional @'LineProcessor'@
  -> BS.ByteString -- ^ input string to be split into lines
  -> LineAppMonad BS.ByteString
processAppLinesStrict :: forall id a b.
(Eq id, Show id) =>
(ByteString -> Maybe id)
-> (ByteString -> Maybe a)
-> (a -> Bool)
-> LineProcessor a b id
-> ByteString
-> LineAppMonad ByteString
processAppLinesStrict = forall id i t a b.
(Eq id, Show id, Num i, Monoid t, Show i, Show t) =>
LineFunctions t i
-> (t -> Maybe id)
-> (t -> Maybe a)
-> (a -> Bool)
-> LineProcessor a b id
-> t
-> LineAppMonad t
processAppLines LineFunctions ByteString Int
lineFunctionsStrict

-- | Process a lazy 'BL.ByteString'.
-- See @'processAppLinesStrict'@ for a description of arguments.
processAppLinesLazy
  :: (Eq id, Show id)
  => (BL.ByteString -> Maybe id)
  -> (BL.ByteString -> Maybe a)
  -> (a -> Bool)
  -> LineProcessor a b id
  -> BL.ByteString
  -> LineAppMonad BL.ByteString
processAppLinesLazy :: forall id a b.
(Eq id, Show id) =>
(ByteString -> Maybe id)
-> (ByteString -> Maybe a)
-> (a -> Bool)
-> LineProcessor a b id
-> ByteString
-> LineAppMonad ByteString
processAppLinesLazy = forall id i t a b.
(Eq id, Show id, Num i, Monoid t, Show i, Show t) =>
LineFunctions t i
-> (t -> Maybe id)
-> (t -> Maybe a)
-> (a -> Bool)
-> LineProcessor a b id
-> t
-> LineAppMonad t
processAppLines LineFunctions ByteString Int64
lineFunctionsLazy