{-|
Module      : AppUtilities for LineFilterApp
Description : Misc types and functions useful in creating Hasklepias applications.
-}
-- TODO This module is now needed only for the LineFilterApp. Some
-- consolidation of utilities might be possible after the LineFilterApp is
-- refactored as the CohortApp was.
{-# LANGUAGE DataKinds             #-}
{-# LANGUAGE GADTs                 #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE QuasiQuotes           #-}
{-# LANGUAGE TypeApplications      #-}

module Hasklepias.LineFilterApp.AppUtilities
  (
  -- * Types and functions for handling I/O
    Location(..)
  , showLocation
  , Input
  , Output
  , readData
  , readDataStrict
  , writeData
  , writeDataStrict
  , getS3Object
  , parseIOSpec
  , outputToLocation
  , IOSpec
  , outputLocation
  , inputLocation

  -- ** Compression handling
  , InputDecompression(..)
  , OutputCompression(..)

  -- ** CLI option parsers
  , inputParser
  , stdInputParser
  , fileInputParser
  , s3InputParser
  , outputParser
  , stdOutputParser
  , fileOutputParser
  , s3OutputParser
  , inputDecompressionParser
  , outputCompressionParser

  -- ** Environmental variable parsers and utilities
  , logSettingsParser
  , parseLogSettings
  , logSettingsHelpDoc

  -- * Processing indexed files
  , updateLocationWithPartitionIndex
  , partitionIndexParser
  , partitionIndexDoc
  , PartitionIndex
  ) where

import           Amazonka                   (LogLevel (Debug, Error), Region,
                                             ToBody (..), discover, newEnv,
                                             newLogger, runResourceT, send,
                                             sinkBody)
import           Amazonka.S3                (BucketName (..),
                                             ObjectCannedACL (ObjectCannedACL_Bucket_owner_full_control),
                                             ObjectKey (..), newGetObject,
                                             newPutObject)
import           Blammo.Logging.LogSettings
import           Codec.Compression.GZip     (CompressionLevel, compress,
                                             decompress)
import           Control.Arrow              ((&&&))
import           Control.Monad
import           Control.Monad.IO.Class
import           Data.Bifunctor             (bimap, first)
import qualified Data.ByteString            as BS
import qualified Data.ByteString.Char8      as BSC
import qualified Data.ByteString.Lazy       as BL hiding (putStrLn)
import           Data.Conduit.Binary        (sinkLbs)
import           Data.Generics.Product      (HasField (field))
import           Data.List.Split            (splitOn)
import           Data.Semigroup             (Endo (..))
import           Data.String                (IsString (fromString))
import           Data.String.Interpolate    (i)
import qualified Data.Text                  as T (Text, pack, unpack)
import qualified Data.Text.IO               as T (putStrLn)
import qualified Env
import           Formatting                 (formatToString, left)
import           Lens.Micro                 (set, (<&>), (^.))
import           Lens.Micro.Extras          (view)
import           Options.Applicative
import           Options.Applicative.Help
import           Safe                       (atMay, headMay)
import           System.IO                  (stderr)
import           Text.Read                  (readMaybe)

-- TODO REFACTOR could use a reorg. note duplication with CohortAppCLI module. this one is pretty messy, but there is value in having a shared module with the LineFilter app

-- | Type representing locations that data can be read from
data Location where
  Std   ::Location
  Local ::FilePath -> Location
  S3    ::Region -> BucketName -> ObjectKey -> Location
  deriving (Int -> Location -> ShowS
[Location] -> ShowS
Location -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Location] -> ShowS
$cshowList :: [Location] -> ShowS
show :: Location -> String
$cshow :: Location -> String
showsPrec :: Int -> Location -> ShowS
$cshowsPrec :: Int -> Location -> ShowS
Show)

-- | Use to print where data is from (or to)
showLocation :: Location -> T.Text
showLocation :: Location -> Text
showLocation Location
Std                                 = Text
"stdin/stdout"
showLocation (Local String
f)                           = String -> Text
T.pack String
f
showLocation (S3 Region
r (BucketName Text
b) (ObjectKey Text
o)) = Text
b forall a. Semigroup a => a -> a -> a
<> Text
"/" forall a. Semigroup a => a -> a -> a
<> Text
o

-- | Type to hold input information. Either from file or from S3.
data Input =
     StdInput
   | FileInput (Maybe FilePath) FilePath
   | S3Input  Region BucketName ObjectKey
   deriving (Int -> Input -> ShowS
[Input] -> ShowS
Input -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Input] -> ShowS
$cshowList :: [Input] -> ShowS
show :: Input -> String
$cshow :: Input -> String
showsPrec :: Int -> Input -> ShowS
$cshowsPrec :: Int -> Input -> ShowS
Show)

-- | Type to hold input information. Either from file or from S3.
data Output =
     StdOutput
   | FileOutput (Maybe FilePath) FilePath
   | S3Output  Region BucketName ObjectKey

-- | Flag for whether to decompress input
data InputDecompression = NoDecompress | Decompress deriving (Int -> InputDecompression -> ShowS
[InputDecompression] -> ShowS
InputDecompression -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [InputDecompression] -> ShowS
$cshowList :: [InputDecompression] -> ShowS
show :: InputDecompression -> String
$cshow :: InputDecompression -> String
showsPrec :: Int -> InputDecompression -> ShowS
$cshowsPrec :: Int -> InputDecompression -> ShowS
Show)

-- | Flag for whether to compress output
data OutputCompression = NoCompress | Compress deriving (Int -> OutputCompression -> ShowS
[OutputCompression] -> ShowS
OutputCompression -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [OutputCompression] -> ShowS
$cshowList :: [OutputCompression] -> ShowS
show :: OutputCompression -> String
$cshow :: OutputCompression -> String
showsPrec :: Int -> OutputCompression -> ShowS
$cshowsPrec :: Int -> OutputCompression -> ShowS
Show)

{-
An internal helper function to handle @InputDecompression@
for lazy Bytestrings.
-}
handleInputDecompression
  :: InputDecompression -> BL.ByteString -> BL.ByteString
handleInputDecompression :: InputDecompression -> ByteString -> ByteString
handleInputDecompression InputDecompression
d = case InputDecompression
d of
  InputDecompression
Decompress   -> ByteString -> ByteString
decompress
  InputDecompression
NoDecompress -> forall a. a -> a
id

{-
An internal helper function to handle @InputDecompression@
for lazy Bytestrings.
-}
handleOutputCompression :: OutputCompression -> BL.ByteString -> BL.ByteString
handleOutputCompression :: OutputCompression -> ByteString -> ByteString
handleOutputCompression OutputCompression
d = case OutputCompression
d of
  OutputCompression
Compress   -> ByteString -> ByteString
compress
  OutputCompression
NoCompress -> forall a. a -> a
id

{-
An internal helper function to handle @InputDecompression@
for strict Bytestrings.

NOTE: zlib operates on Lazy Bytestrings so this function goes
from Strict to Lazy and back to Strict.
This is likely to be inefficient.

TODO:
This could be made more efficient using the tip from the `DecompressParams` here:
https://hackage.haskell.org/package/zlib-0.6.3.0/docs/Codec-Compression-Zlib-Internal.html#g:5

    "One particular use case for setting the decompressBufferSize is
    if you know the exact size of the decompressed data
    and want to produce a strict ByteString.
    The compression and decompression functions use lazy ByteStrings
    but if you set the decompressBufferSize correctly
    then you can generate a lazy ByteString with exactly one chunk,
    which can be converted to a strict ByteString
    in O(1) time using concat . toChunks."

-}
handleInputDecompressionStrict
  :: InputDecompression -> BS.ByteString -> BS.ByteString
handleInputDecompressionStrict :: InputDecompression -> ByteString -> ByteString
handleInputDecompressionStrict InputDecompression
d = case InputDecompression
d of
  InputDecompression
Decompress   -> ByteString -> ByteString
BL.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
decompress forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.fromStrict
  InputDecompression
NoDecompress -> forall a. a -> a
id

{-
An internal helper function to handle @InputDecompression@
for lazy Bytestrings.
-}
handleOutputCompressionStrict
  :: OutputCompression -> BS.ByteString -> BS.ByteString
handleOutputCompressionStrict :: OutputCompression -> ByteString -> ByteString
handleOutputCompressionStrict OutputCompression
d = case OutputCompression
d of
  OutputCompression
Compress   -> ByteString -> ByteString
BL.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
compress forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.fromStrict
  OutputCompression
NoCompress -> forall a. a -> a
id

-- | Read data from a @Location@ to lazy @ByteString@
readData :: Location -> InputDecompression -> IO BL.ByteString
readData :: Location -> InputDecompression -> IO ByteString
readData Location
Std        InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompression InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO ByteString
BL.getContents
readData (Local String
x ) InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompression InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
BL.readFile String
x
readData (S3 Region
r BucketName
b ObjectKey
k) InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompression InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Region -> BucketName -> ObjectKey -> IO ByteString
getS3Object Region
r BucketName
b ObjectKey
k

-- | Write data from a @Location@ to lazy @ByteString@
writeData :: Location -> OutputCompression -> BL.ByteString -> IO ()
writeData :: Location -> OutputCompression -> ByteString -> IO ()
writeData Location
Std        OutputCompression
z ByteString
x = ByteString -> IO ()
BL.putStr (OutputCompression -> ByteString -> ByteString
handleOutputCompression OutputCompression
z ByteString
x)
writeData (Local String
f ) OutputCompression
z ByteString
x = String -> ByteString -> IO ()
BL.writeFile String
f (OutputCompression -> ByteString -> ByteString
handleOutputCompression OutputCompression
z ByteString
x)
writeData (S3 Region
r BucketName
b ObjectKey
k) OutputCompression
z ByteString
x = forall a.
PutS3 a =>
Region -> BucketName -> ObjectKey -> a -> IO ()
putS3Object Region
r BucketName
b ObjectKey
k (OutputCompression -> ByteString -> ByteString
handleOutputCompression OutputCompression
z ByteString
x)

-- | Read data from a @Location@ to strict @ByteString@.
readDataStrict :: Location -> InputDecompression -> IO BS.ByteString
readDataStrict :: Location -> InputDecompression -> IO ByteString
readDataStrict Location
Std       InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompressionStrict InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO ByteString
BS.getContents
readDataStrict (Local String
x) InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompressionStrict InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
BS.readFile String
x
readDataStrict (S3 Region
r BucketName
b ObjectKey
k) InputDecompression
d =
  InputDecompression -> ByteString -> ByteString
handleInputDecompressionStrict InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> ByteString
BL.toStrict (Region -> BucketName -> ObjectKey -> IO ByteString
getS3Object Region
r BucketName
b ObjectKey
k)

-- | Write data from a @Location@ to strict @ByteString@.
writeDataStrict :: Location -> OutputCompression -> BS.ByteString -> IO ()
writeDataStrict :: Location -> OutputCompression -> ByteString -> IO ()
writeDataStrict Location
Std OutputCompression
z ByteString
x = ByteString -> IO ()
BSC.putStrLn (OutputCompression -> ByteString -> ByteString
handleOutputCompressionStrict OutputCompression
z ByteString
x)
writeDataStrict (Local String
f) OutputCompression
z ByteString
x =
  String -> ByteString -> IO ()
BS.writeFile String
f (OutputCompression -> ByteString -> ByteString
handleOutputCompressionStrict OutputCompression
z ByteString
x)
writeDataStrict (S3 Region
r BucketName
b ObjectKey
k) OutputCompression
z ByteString
x =
  forall a.
PutS3 a =>
Region -> BucketName -> ObjectKey -> a -> IO ()
putS3Object Region
r BucketName
b ObjectKey
k (OutputCompression -> ByteString -> ByteString
handleOutputCompressionStrict OutputCompression
z ByteString
x)

-- | Get an object from S3.
getS3Object :: Region -> BucketName -> ObjectKey -> IO BL.ByteString
getS3Object :: Region -> BucketName -> ObjectKey -> IO ByteString
getS3Object Region
r BucketName
b ObjectKey
k = do
  LogLevel -> ByteStringBuilder -> IO ()
lgr <- forall (m :: * -> *).
MonadIO m =>
LogLevel -> Handle -> m (LogLevel -> ByteStringBuilder -> IO ())
newLogger LogLevel
Debug Handle
stderr
  Env' Identity
env <-
    forall (m :: * -> *).
MonadIO m =>
(EnvNoAuth -> m (Env' Identity)) -> m (Env' Identity)
newEnv forall (m :: * -> *) (withAuth :: * -> *).
(MonadCatch m, MonadIO m, Foldable withAuth) =>
Env' withAuth -> m (Env' Identity)
discover
    forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"envLogger") LogLevel -> ByteStringBuilder -> IO ()
lgr
    forall b c a. (b -> c) -> (a -> b) -> a -> c
.   forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"envRegion") Region
r
  forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT forall a b. (a -> b) -> a -> b
$ do
    GetObjectResponse
result <- forall (m :: * -> *) a.
(MonadResource m, AWSRequest a) =>
Env' Identity -> a -> m (AWSResponse a)
send Env' Identity
env (BucketName -> ObjectKey -> GetObject
newGetObject BucketName
b ObjectKey
k)
    forall a s. Getting a s a -> s -> a
view (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"body") GetObjectResponse
result forall (m :: * -> *) a.
MonadIO m =>
ResponseBody -> ConduitM ByteString Void (ResourceT IO) a -> m a
`sinkBody` forall (m :: * -> *) o.
Monad m =>
ConduitT ByteString o m ByteString
sinkLbs

-- | Put an object on S3.
--
-- NOTE: the put request uses the bucket-owner-full-control
-- <https://docs.aws.amazon.com/AmazonS3/latest/userguide/about-object-ownership.html canned ACL>.
--
class (ToBody a) => PutS3 a where
  putS3Object :: Region -> BucketName -> ObjectKey -> a -> IO ()
  putS3Object Region
r BucketName
b ObjectKey
k a
o = do
    LogLevel -> ByteStringBuilder -> IO ()
lgr <- forall (m :: * -> *).
MonadIO m =>
LogLevel -> Handle -> m (LogLevel -> ByteStringBuilder -> IO ())
newLogger LogLevel
Error Handle
stderr
    Env' Identity
env <-
      forall (m :: * -> *).
MonadIO m =>
(EnvNoAuth -> m (Env' Identity)) -> m (Env' Identity)
newEnv forall (m :: * -> *) (withAuth :: * -> *).
(MonadCatch m, MonadIO m, Foldable withAuth) =>
Env' withAuth -> m (Env' Identity)
discover
      forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"envLogger") LogLevel -> ByteStringBuilder -> IO ()
lgr
      forall b c a. (b -> c) -> (a -> b) -> a -> c
.   forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"envRegion") Region
r
    let obj :: PutObject
obj = forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"acl") (forall a. a -> Maybe a
Just ObjectCannedACL
ObjectCannedACL_Bucket_owner_full_control)
              (BucketName -> ObjectKey -> RequestBody -> PutObject
newPutObject BucketName
b ObjectKey
k (forall a. ToBody a => a -> RequestBody
toBody a
o))
    forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT forall a b. (a -> b) -> a -> b
$ do
      forall (f :: * -> *) a. Functor f => f a -> f ()
void forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) a.
(MonadResource m, AWSRequest a) =>
Env' Identity -> a -> m (AWSResponse a)
send Env' Identity
env forall a b. (a -> b) -> a -> b
$ PutObject
obj
      forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IO ()
T.putStrLn forall a b. (a -> b) -> a -> b
$
          Text
"Successfully Uploaded contents to "
        forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (forall a. Show a => a -> String
show BucketName
b)
        forall a. Semigroup a => a -> a -> a
<> Text
" - "
        forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (forall a. Show a => a -> String
show ObjectKey
k)

instance PutS3 BL.ByteString
instance PutS3 BS.ByteString

-- | Maps an @Input@ to a @Location@.
inputToLocation :: Input -> Location
inputToLocation :: Input -> Location
inputToLocation Input
StdInput        = Location
Std
inputToLocation (FileInput Maybe String
d String
f) = String -> Location
Local (ShowS
pre String
f)
 where
  pre :: ShowS
pre = case Maybe String
d of
    Maybe String
Nothing -> forall a. Semigroup a => a -> a -> a
(<>) String
""
    Just String
s  -> forall a. Semigroup a => a -> a -> a
(<>) (String
s forall a. Semigroup a => a -> a -> a
<> String
"/")
inputToLocation (S3Input Region
r BucketName
b ObjectKey
k) = Region -> BucketName -> ObjectKey -> Location
S3 Region
r BucketName
b ObjectKey
k

-- | Maps an @Input@ to a @Location@.
outputToLocation :: Output -> Location
outputToLocation :: Output -> Location
outputToLocation Output
StdOutput        = Location
Std
outputToLocation (FileOutput Maybe String
d String
f) = String -> Location
Local (ShowS
pre String
f)
 where
  pre :: ShowS
pre = case Maybe String
d of
    Maybe String
Nothing -> forall a. Semigroup a => a -> a -> a
(<>) String
""
    Just String
s  -> forall a. Semigroup a => a -> a -> a
(<>) (String
s forall a. Semigroup a => a -> a -> a
<> String
"/")
outputToLocation (S3Output Region
r BucketName
b ObjectKey
k) = Region -> BucketName -> ObjectKey -> Location
S3 Region
r BucketName
b ObjectKey
k

{-
    CLI option parsers
-}

-- | Parser @StdInput@.
stdInputParser :: Parser Input
stdInputParser :: Parser Input
stdInputParser = forall (f :: * -> *) a. Applicative f => a -> f a
pure Input
StdInput

-- | Parser @StdOutput@.
stdOutputParser :: Parser Output
stdOutputParser :: Parser Output
stdOutputParser = forall (f :: * -> *) a. Applicative f => a -> f a
pure Output
StdOutput

-- | Parser for @FileInput@.
fileInputParser :: Parser Input
fileInputParser :: Parser Input
fileInputParser =
  Maybe String -> String -> Input
FileInput
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional
          (forall s. IsString s => Mod OptionFields s -> Parser s
strOption forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"dir" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'd' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"DIRECTORY" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help
            String
"optional directory"
          )
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
          (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"file" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'f' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"INPUT" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"Input file")

-- | Parser for @FileInput@.
fileOutputParser :: Parser Output
fileOutputParser :: Parser Output
fileOutputParser =
  Maybe String -> String -> Output
FileOutput
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional
          (forall s. IsString s => Mod OptionFields s -> Parser s
strOption forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"outdir" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"DIRECTORY" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help
            String
"optional output directory"
          )
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
          (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"output" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'o' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"OUTPUT" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"Output file")

-- | Parser for @Input@.
inputParser :: Parser Input
inputParser :: Parser Input
inputParser = Parser Input
fileInputParser forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Input
s3InputParser forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Input
stdInputParser

-- | Parser for @S3Input@.
s3InputParser :: Parser Input
s3InputParser :: Parser Input
s3InputParser =
  Region -> BucketName -> ObjectKey -> Input
S3Input
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
          (  forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"region"
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'r'
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"REGION"
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasValue f => a -> Mod f a
value Region
"us-east-1"
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"AWS Region"
          )
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
          (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"bucket" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'b' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"BUCKET" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"S3 bucket")
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
          (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"key" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'k' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"KEY" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"S3 location")

-- | Parser for @S3Output@.
s3OutputParser :: Parser Output
s3OutputParser :: Parser Output
s3OutputParser =
  Region -> BucketName -> ObjectKey -> Output
S3Output
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
          (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"outregion" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"OUTREGION" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasValue f => a -> Mod f a
value Region
"us-east-1" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help
            String
"output AWS Region"
          )
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
          (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"outbucket" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"OUTBUCKET" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"output S3 bucket")
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
          (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"outkey" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"OUTPUTKEY" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"output S3 location")

-- | Parser for @Output@
outputParser :: Parser Output
outputParser :: Parser Output
outputParser = Parser Output
fileOutputParser forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Output
s3OutputParser forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Output
stdOutputParser

-- | Parser for @InputDecompression@
inputDecompressionParser :: Parser InputDecompression
inputDecompressionParser :: Parser InputDecompression
inputDecompressionParser =
  forall a. a -> Mod FlagFields a -> Parser a
flag' InputDecompression
Decompress
        (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"decompress" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'd' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"decompress gzipped input")
    forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure InputDecompression
NoDecompress

-- | Parser for @OutputDecompression@
outputCompressionParser :: Parser OutputCompression
outputCompressionParser :: Parser OutputCompression
outputCompressionParser =
  forall a. a -> Mod FlagFields a -> Parser a
flag' OutputCompression
Compress (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"gzip" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'z' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"compress output using gzip")
    forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure OutputCompression
NoCompress

{-
  IO Location utilities
-}

{- |
A type containing the @'Location'@s for input and output.
See @'parseIOSpec'@ for creating a term of this type.
-}
data IOSpec = MkIOSpec {
     -- | @'Location'@ for input
     IOSpec -> Location
inputLocation  :: Location
     -- | @'Location'@ for output
   , IOSpec -> Location
outputLocation :: Location
   } deriving (Int -> IOSpec -> ShowS
[IOSpec] -> ShowS
IOSpec -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [IOSpec] -> ShowS
$cshowList :: [IOSpec] -> ShowS
show :: IOSpec -> String
$cshow :: IOSpec -> String
showsPrec :: Int -> IOSpec -> ShowS
$cshowsPrec :: Int -> IOSpec -> ShowS
Show)

{- |
Creates an @'IOSpec'@ from an @'Input'@ and @'Output'@.
If a @PartitionIndex@ is provided, then the  @'Input'@ and @'Output'@
are parsed via @'updateLocationWithPartitionIndex'@.

Examples:

>>> parseIOSpec Nothing StdInput StdOutput
Right (MkIOSpec {inputLocation = Std, outputLocation = Std})
>>> parseIOSpec (Just (MkPartitionIndex 1)) StdInput StdOutput
Right (MkIOSpec {inputLocation = Std, outputLocation = Std})
>>> parseIOSpec (Just (MkPartitionIndex 1)) (FileInput Nothing "hasklepias-examples/exampleData/exampleData%%n%%.jsonl") StdOutput
Right (MkIOSpec {inputLocation = Local "hasklepias-examples/exampleData/exampleData1.jsonl", outputLocation = Std})
>>> parseIOSpec (Just (MkPartitionIndex 1)) (FileInput Nothing "hasklepias-examples/exampleData/exampleData%%n%%.jsonl") (FileOutput Nothing "hasklepias-examples/exampleData/exampleCohort%%n%%.json")
Right (MkIOSpec {inputLocation = Local "hasklepias-examples/exampleData/exampleData1.jsonl", outputLocation = Local "hasklepias-examples/exampleData/exampleCohort1.json"})
>>> parseIOSpec (Just (MkPartitionIndex 1)) (FileInput Nothing "hasklepias-examples/exampleData/exampleData%%n%%.jsonl") (FileOutput Nothing "hasklepias-examples/exampleData/exampleCohort.json")
Left FailedToSplitByDelimiter
>>> parseIOSpec (Just (MkPartitionIndex (-1))) (FileInput Nothing "hasklepias-examples/exampleData/exampleData%%n%%.jsonl") StdOutput
Left NOutOfBounds
>>> parseIOSpec (Just (MkPartitionIndex 10)) (FileInput Nothing "hasklepias-examples/exampleData/exampleData%%1%%.jsonl") StdOutput
Left NOutOfBounds

-}
parseIOSpec :: Maybe PartitionIndex
  -> Input
  -> Output
  -> Either PartitionTemplateError IOSpec
parseIOSpec :: Maybe PartitionIndex
-> Input -> Output -> Either PartitionTemplateError IOSpec
parseIOSpec Maybe PartitionIndex
Nothing Input
i Output
o = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Location -> Location -> IOSpec
MkIOSpec (Input -> Location
inputToLocation Input
i) (Output -> Location
outputToLocation Output
o)
parseIOSpec (Just PartitionIndex
pid) Input
i Output
o =
  forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Location -> Location -> IOSpec
MkIOSpec forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall {a} {a} {b}. (Either a a, Either a b) -> Either a (a, b)
involve (Location -> Either PartitionTemplateError Location
update forall a b. (a -> b) -> a -> b
$ Input -> Location
inputToLocation Input
i, Location -> Either PartitionTemplateError Location
update forall a b. (a -> b) -> a -> b
$ Output -> Location
outputToLocation Output
o)
  where update :: Location -> Either PartitionTemplateError Location
update = forall a b c. (a -> b -> c) -> b -> a -> c
flip Location
-> PartitionIndex -> Either PartitionTemplateError Location
updateLocationWithPartitionIndex PartitionIndex
pid
        -- TODO: I failed to think of an obvious (f a, f a) -> f (a, a) function
        involve :: (Either a a, Either a b) -> Either a (a, b)
involve (Either a a
x, Either a b
y) = case Either a a
x of
          Left a
e -> forall a b. a -> Either a b
Left a
e
          Right a
z1 -> case Either a b
y of
            Left a
e2  -> forall a b. a -> Either a b
Left a
e2
            Right b
z2 -> forall a b. b -> Either a b
Right (a
z1, b
z2)

{-
  BLAMMO logging utilities

  The logSettingsParser, parseLogSettings, and endo functions below
  are basically copied from
  https://hackage.haskell.org/package/Blammo-1.0.2.3/docs/src/Blammo.Logging.LogSettings.Env.html#parse.
  I (BS 2022-09-28) couldn't find an easy to change the default settings/parser
  without parsing the LOG_DESTINATION variable again,
  hence I just copied the functions over.
  I opened related issue here:
  https://github.com/freckle/blammo/issues/20
-}

-- | information about using logging
logSettingsHelpDoc :: Doc
logSettingsHelpDoc :: Doc
logSettingsHelpDoc =
  Doc -> Doc
dullblue (Doc -> Doc
bold Doc
"== Log Settings ==") forall a. Semigroup a => a -> a -> a
<> Doc
linebreak forall a. Semigroup a => a -> a -> a
<> [i|
  Users can control the logging behavior by setting environmental variables.
  The default for asclepias apps is to send logs to stderr in terminal format.
  To change the format to JSON (for example) set the format variable
  before calling the application, as in:

  ```
  export LOG_FORMAT=json
  ```

  For more information see,
  https://hackage.haskell.org/package/Blammo/docs/Blammo-Logging-LogSettings-Env.html

  Available environment variables:

  LOG_LEVEL                known log level (case insensitive)
  LOG_DESTINATION          stdout|stderr|@<path>
  LOG_FORMAT               tty|json
  LOG_COLOR                auto|always|never
  |]

-- | Sets the destination for the default Log settings to @stderr@,
-- rather than @stdout@.
ourDefaultLogSettings :: LogSettings
ourDefaultLogSettings :: LogSettings
ourDefaultLogSettings =
  LogDestination -> LogSettings -> LogSettings
setLogSettingsDestination LogDestination
LogDestinationStderr LogSettings
defaultLogSettings

-- | Environmental variable parser
logSettingsParser :: Env.Parser Env.Error LogSettings
logSettingsParser :: Parser Error LogSettings
logSettingsParser = (forall a b. (a -> b) -> a -> b
$ LogSettings
ourDefaultLogSettings) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Endo a -> a -> a
appEndo forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
sequenceA
  [ forall e a.
AsUnset e =>
Reader e a -> String -> Mod Var a -> Parser e a
Env.var (forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String LogLevels
readLogLevels LogLevels -> LogSettings -> LogSettings
setLogSettingsLevels) String
"LOG_LEVEL" (forall a. a -> Mod Var a
Env.def forall a. Monoid a => a
mempty)
  , forall e a.
AsUnset e =>
Reader e a -> String -> Mod Var a -> Parser e a
Env.var (forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String LogDestination
readLogDestination LogDestination -> LogSettings -> LogSettings
setLogSettingsDestination) String
"LOG_DESTINATION" (forall a. a -> Mod Var a
Env.def forall a. Monoid a => a
mempty)
  , forall e a.
AsUnset e =>
Reader e a -> String -> Mod Var a -> Parser e a
Env.var (forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String LogFormat
readLogFormat LogFormat -> LogSettings -> LogSettings
setLogSettingsFormat) String
"LOG_FORMAT" (forall a. a -> Mod Var a
Env.def forall a. Monoid a => a
mempty)
  , forall e a.
AsUnset e =>
Reader e a -> String -> Mod Var a -> Parser e a
Env.var (forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String LogColor
readLogColor LogColor -> LogSettings -> LogSettings
setLogSettingsColor) String
"LOG_COLOR" (forall a. a -> Mod Var a
Env.def forall a. Monoid a => a
mempty)
  ]

-- | Parse @'Blammo.Logging.LogSettings'@.
parseLogSettings :: IO LogSettings
parseLogSettings :: IO LogSettings
parseLogSettings = forall e a.
AsUnset e =>
(Info Error -> Info e) -> Parser e a -> IO a
Env.parse forall a. a -> a
id Parser Error LogSettings
logSettingsParser

endo
  :: Env.AsUnread e
  => (String -> Either String a)
  -- ^ How to parse the value
  -> (a -> b -> b)
  -- ^ How to turn the parsed value into a setter
  -> Env.Reader e (Endo b)
endo :: forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String a
reader a -> b -> b
setter String
x = forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first forall e. AsUnread e => String -> e
Env.unread forall a b. (a -> b) -> a -> b
$ forall a. (a -> a) -> Endo a
Endo forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> b -> b
setter forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Either String a
reader String
x

{-
  Utililties for processing (integer) indexed files, e.g.:
     file1, file2, file3, ...
-}

-- | Enumerates kinds of errors handled by @'parsePartitionFilePath'@.
data PartitionTemplateError =
    NOutOfBounds
  | NonPostiveWidth
  | FailedToParseWidth
  | FailedToSplitByDelimiter
  deriving (PartitionTemplateError -> PartitionTemplateError -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PartitionTemplateError -> PartitionTemplateError -> Bool
$c/= :: PartitionTemplateError -> PartitionTemplateError -> Bool
== :: PartitionTemplateError -> PartitionTemplateError -> Bool
$c== :: PartitionTemplateError -> PartitionTemplateError -> Bool
Eq, Int -> PartitionTemplateError -> ShowS
[PartitionTemplateError] -> ShowS
PartitionTemplateError -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PartitionTemplateError] -> ShowS
$cshowList :: [PartitionTemplateError] -> ShowS
show :: PartitionTemplateError -> String
$cshow :: PartitionTemplateError -> String
showsPrec :: Int -> PartitionTemplateError -> ShowS
$cshowsPrec :: Int -> PartitionTemplateError -> ShowS
Show)

-- | The possible pattern for a partition template
data PartitionTemplatePattern =
    -- ^ Partition has fixed width
    FixedWidth Int
    -- ^ Partition does not have fixed width
  | NotFixedWidth deriving (PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
$c/= :: PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
== :: PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
$c== :: PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
Eq, Int -> PartitionTemplatePattern -> ShowS
[PartitionTemplatePattern] -> ShowS
PartitionTemplatePattern -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PartitionTemplatePattern] -> ShowS
$cshowList :: [PartitionTemplatePattern] -> ShowS
show :: PartitionTemplatePattern -> String
$cshow :: PartitionTemplatePattern -> String
showsPrec :: Int -> PartitionTemplatePattern -> ShowS
$cshowsPrec :: Int -> PartitionTemplatePattern -> ShowS
Show)

-- internal function for @parsePartitionFilePath@
parsePartitionTemplate ::
     String
  -> FilePath
  -> Either PartitionTemplateError (String, PartitionTemplatePattern, String)
parsePartitionTemplate :: String
-> String
-> Either
     PartitionTemplateError (String, PartitionTemplatePattern, String)
parsePartitionTemplate String
delim =
    forall {a} {c}.
(Maybe a, Maybe PartitionTemplatePattern, Maybe c)
-> Either PartitionTemplateError (a, PartitionTemplatePattern, c)
checkParse
     forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall {a} {a} {b} {c}.
(a -> a) -> (a -> b) -> (a -> c) -> a -> (a, b, c)
call3 forall a. [a] -> Maybe a
headMay (\[String]
x -> ([String]
x forall a. [a] -> Int -> Maybe a
`atMay` Int
1) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= String -> Maybe PartitionTemplatePattern
readTemplatePattern) (forall a. [a] -> Int -> Maybe a
`atMay` Int
2))
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\[String]
x -> if forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
x forall a. Eq a => a -> a -> Bool
== Int
3 then forall a b. b -> Either a b
Right [String]
x else forall a b. a -> Either a b
Left PartitionTemplateError
FailedToSplitByDelimiter)
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
delim
  where call3 :: (a -> a) -> (a -> b) -> (a -> c) -> a -> (a, b, c)
call3 a -> a
f a -> b
g a -> c
h = (\((a
x, b
y), c
z) -> (a
x, b
y, c
z)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((a -> a
f forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& a -> b
g) forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& a -> c
h)

        readTemplatePattern :: String -> Maybe PartitionTemplatePattern
readTemplatePattern String
"n" = forall a. a -> Maybe a
Just PartitionTemplatePattern
NotFixedWidth
        readTemplatePattern String
x   = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int -> PartitionTemplatePattern
FixedWidth (forall a. Read a => String -> Maybe a
readMaybe String
x)

        checkParse :: (Maybe a, Maybe PartitionTemplatePattern, Maybe c)
-> Either PartitionTemplateError (a, PartitionTemplatePattern, c)
checkParse (Just a
x, Maybe PartitionTemplatePattern
y, Just c
z) = case Maybe PartitionTemplatePattern
y of
          Maybe PartitionTemplatePattern
Nothing -> forall a b. a -> Either a b
Left PartitionTemplateError
FailedToParseWidth
          Just PartitionTemplatePattern
pat -> case PartitionTemplatePattern
pat of
            PartitionTemplatePattern
NotFixedWidth -> forall a b. b -> Either a b
Right (a
x, PartitionTemplatePattern
pat, c
z)
            FixedWidth Int
i  -> if Int
i forall a. Ord a => a -> a -> Bool
<= Int
0 then forall a b. a -> Either a b
Left PartitionTemplateError
NonPostiveWidth else forall a b. b -> Either a b
Right (a
x, PartitionTemplatePattern
pat, c
z)
        checkParse (Maybe a
_, Maybe PartitionTemplatePattern
_, Maybe c
_) = forall a b. a -> Either a b
Left PartitionTemplateError
FailedToSplitByDelimiter

{- |
A utility for templating @'FilePath'@ with an integer value
This is useful for processing batches of numbered files.
The template pattern is @%%n%%@,
where @n@ is either an integer value or the character @n@.
If @n@ is a positive integer,
then the output is left-padded with zeros to a fixed width of @n@.
If the template variable simply @n@,
then the output is not left-padded.

The examples below show usage and possible errors that may occur.

>>> parsePartitionFilePath "file-%%n%%.json" 3
Right "file-3.json"
>>> parsePartitionFilePath "file-%%5%%.json" 3
Right "file-00003.json"
>>> parsePartitionFilePath "file-%%10%%.json" 3
Right "file-0000000003.json"
>>> parsePartitionFilePath "file-%%n%%.json" (-1)
Left NOutOfBounds
>>> parsePartitionFilePath "file-%%2%%.json" 99
Right "file-99.json"
>>> parsePartitionFilePath "file-%%2%%.json" 100
Left NOutOfBounds
>>> parsePartitionFilePath "file-%%ab%%.json" 3
Left FailedToParseWidth
>>> parsePartitionFilePath "file-%%-1%%.json" 3
Left NonPostiveWidth
>>> parsePartitionFilePath "file-%%1%%.json" 10
Left NOutOfBounds
>>> parsePartitionFilePath "file-%%1%%-%%.json" 10
Left FailedToSplitByDelimiter

-}
parsePartitionFilePath ::
     FilePath
  -> Int
  -> Either PartitionTemplateError FilePath
parsePartitionFilePath :: String -> Int -> Either PartitionTemplateError String
parsePartitionFilePath String
x Int
n =
   String
-> String
-> Either
     PartitionTemplateError (String, PartitionTemplatePattern, String)
parsePartitionTemplate String
"%%" String
x forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall {a}.
(Ord a, Num a, Buildable a, Show a) =>
a
-> (String, PartitionTemplatePattern, String)
-> Either PartitionTemplateError String
fillTemplate Int
n
   where
    fillTemplate :: a
-> (String, PartitionTemplatePattern, String)
-> Either PartitionTemplateError String
fillTemplate a
i (String
x, FixedWidth Int
n, String
z) =
      if a
0 forall a. Ord a => a -> a -> Bool
<= a
i Bool -> Bool -> Bool
&& a
i forall a. Ord a => a -> a -> Bool
<= a
10 forall a b. (Num a, Integral b) => a -> b -> a
^ Int
n forall a. Num a => a -> a -> a
- a
1
        then forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ String
x forall a. Semigroup a => a -> a -> a
<> forall a. Format String a -> a
formatToString (forall a r. Buildable a => Int -> Char -> Format r (a -> r)
left Int
n Char
'0') a
i forall a. Semigroup a => a -> a -> a
<> String
z
        else forall a b. a -> Either a b
Left PartitionTemplateError
NOutOfBounds
    fillTemplate a
i (String
x, PartitionTemplatePattern
NotFixedWidth, String
z) =
      if  a
i forall a. Ord a => a -> a -> Bool
< a
0
        then forall a b. a -> Either a b
Left PartitionTemplateError
NOutOfBounds
        else forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ String
x forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show a
i forall a. Semigroup a => a -> a -> a
<> String
z


-- | Newtype wrapper for creating Parser for the @partition-index@ cli option.
newtype PartitionIndex = MkPartitionIndex Int deriving (PartitionIndex -> PartitionIndex -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PartitionIndex -> PartitionIndex -> Bool
$c/= :: PartitionIndex -> PartitionIndex -> Bool
== :: PartitionIndex -> PartitionIndex -> Bool
$c== :: PartitionIndex -> PartitionIndex -> Bool
Eq, Int -> PartitionIndex -> ShowS
[PartitionIndex] -> ShowS
PartitionIndex -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PartitionIndex] -> ShowS
$cshowList :: [PartitionIndex] -> ShowS
show :: PartitionIndex -> String
$cshow :: PartitionIndex -> String
showsPrec :: Int -> PartitionIndex -> ShowS
$cshowsPrec :: Int -> PartitionIndex -> ShowS
Show)

-- | Parser for @PartitionIndex@.
partitionIndexParser :: Parser PartitionIndex
partitionIndexParser :: Parser PartitionIndex
partitionIndexParser =
  forall a. ReadM a -> Mod OptionFields a -> Parser a
option
  (forall a. (String -> Either String a) -> ReadM a
eitherReader
    (\String
s -> case forall a. Read a => String -> Maybe a
readMaybe String
s :: Maybe Int of
      Maybe Int
Nothing -> forall a b. a -> Either a b
Left String
"partition-index value must be an integer"
      Just Int
i ->
        if Int
i forall a. Ord a => a -> a -> Bool
< Int
0
          then forall a b. a -> Either a b
Left String
"partition-index value must be non-negative"
          else forall a b. b -> Either a b
Right (Int -> PartitionIndex
MkPartitionIndex Int
i)
    ))
    (  forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"partition-index"
    forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"INT"
    forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help
    (String
"Non-negative integer value to pass an index to templated input/output paths. " forall a. Semigroup a => a -> a -> a
<>
     String
"See Processing Indexed Files help in help text for more information.")
    )

-- | information on how to use the @PartitionIndex@ option
partitionIndexDoc :: Doc
partitionIndexDoc :: Doc
partitionIndexDoc =
  Doc -> Doc
dullblue (Doc -> Doc
bold Doc
"=== Processing Indexed Files ===")
  forall a. Semigroup a => a -> a -> a
<> Doc
linebreak
  forall a. Semigroup a => a -> a -> a
<> [i|
  The following options have a basic templating functionality for processing
  indexed filenames (such as file1.json, file2.json, file3.json, etc):

  * --file
  * --output
  * --key
  * --outkey

  The template pattern is @%%n%%@, where @n@ is either an integer value
  or the character 'n'. If n is a positive integer, then the output is
  left-padded with zeros to a fixed width of n. If the template variable is
  simply 'n', then the output is not left-padded.

  A filepath may contain at most one template.

  When templated paths are used, the --partition-index option is interpolated
  into the path(s). If both input and output go to one of the options listed,
  then both paths must be templated.

  **EXAMPLES**

   Options
    --file=path/to/in-%%5%%.ext
    --output=path/to/out-%%5%%.ext
    --partition-index=1
   -----------------------
   Interpretation
    input file = path/to/in-00001.ext
    output file = path/to/out-00001.ext

   Options
    --file=path/to/in%%n%%.ext
    --output=path/to/out%%n%%.ext
    --partition-index=1
   -----------------------
   Interpretation
    input file = path/to/in1.ext
    output file = path/to/out1.ext

   Options
    --file=path/to/in-%%5%%.ext
    --output=path/to/out-%%5%%.ext
    --partition-index=1
   -----------------------
   Interpretation
    input file = path/to/in-00001.ext
    output file = path/to/out-00001.ext

   Options
   --file=path/to/in%%n%%.ext
   --outkey=path/to/out%%n%%.ext
   --partition-index=1
   -----------------------
   Interpretation
    input file = path/to/in1.ext
    output S3 key = path/to/out1.ext

  |]

{- |
Modifies a @'Location'@ given a @'PartitionIndex'@ in the following ways:

* @Std@: ignores the @PartitionIndex@ and returns @Std@
* @Local@: modifies the @FilePath@ with @'parsePartitionFilePath'@
* @S3@: modifies the @'Amazonka.S3.ObjectKey'@ with @'parsePartitionFilePath'@

Examples:

>>> updateLocationWithPartitionIndex Std (MkPartitionIndex 1)
Right Std
>>> updateLocationWithPartitionIndex (Local "foo-%%2%%") (MkPartitionIndex 1)
Right (Local "foo-01")
>>> updateLocationWithPartitionIndex (Local "foo-%%%") (MkPartitionIndex 1)
Left FailedToSplitByDelimiter
>>> updateLocationWithPartitionIndex (S3 "us-east-1" "myBucket" "foo-%%5%%") (MkPartitionIndex 1)
Right (S3 (Region' {fromRegion = "us-east-1"}) (BucketName "myBucket") (ObjectKey "foo-00001"))
>>> updateLocationWithPartitionIndex ( Local "hasklepias-examples/exampleData/exampleData%%n%%.jsonl") (MkPartitionIndex 1)
Right (Local "hasklepias-examples/exampleData/exampleData1.jsonl")

-}
updateLocationWithPartitionIndex ::
     Location
  -> PartitionIndex
  -> Either PartitionTemplateError Location
updateLocationWithPartitionIndex :: Location
-> PartitionIndex -> Either PartitionTemplateError Location
updateLocationWithPartitionIndex Location
Std PartitionIndex
_ = forall a b. b -> Either a b
Right Location
Std
updateLocationWithPartitionIndex (Local String
f) (MkPartitionIndex Int
i) =
  case String -> Int -> Either PartitionTemplateError String
parsePartitionFilePath String
f Int
i of
    Left PartitionTemplateError
e  -> forall a b. a -> Either a b
Left PartitionTemplateError
e
    Right String
x -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ String -> Location
Local String
x
updateLocationWithPartitionIndex (S3 Region
r BucketName
b (ObjectKey Text
o)) (MkPartitionIndex Int
i) =
  case String -> Int -> Either PartitionTemplateError String
parsePartitionFilePath (Text -> String
T.unpack Text
o) Int
i of
    Left PartitionTemplateError
e  -> forall a b. a -> Either a b
Left PartitionTemplateError
e
    Right String
x -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Region -> BucketName -> ObjectKey -> Location
S3 Region
r BucketName
b (Text -> ObjectKey
ObjectKey (String -> Text
T.pack String
x))