{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TypeApplications #-}
module Hasklepias.LineFilterApp.AppUtilities
(
Location(..)
, showLocation
, Input
, Output
, readData
, readDataStrict
, writeData
, writeDataStrict
, getS3Object
, parseIOSpec
, outputToLocation
, IOSpec
, outputLocation
, inputLocation
, InputDecompression(..)
, OutputCompression(..)
, inputParser
, stdInputParser
, fileInputParser
, s3InputParser
, outputParser
, stdOutputParser
, fileOutputParser
, s3OutputParser
, inputDecompressionParser
, outputCompressionParser
, logSettingsParser
, parseLogSettings
, logSettingsHelpDoc
, updateLocationWithPartitionIndex
, partitionIndexParser
, partitionIndexDoc
, PartitionIndex
) where
import Amazonka (LogLevel (Debug, Error), Region,
ToBody (..), discover, newEnv,
newLogger, runResourceT, send,
sinkBody)
import Amazonka.S3 (BucketName (..),
ObjectCannedACL (ObjectCannedACL_Bucket_owner_full_control),
ObjectKey (..), newGetObject,
newPutObject)
import Blammo.Logging.LogSettings
import Codec.Compression.GZip (CompressionLevel, compress,
decompress)
import Control.Arrow ((&&&))
import Control.Monad
import Control.Monad.IO.Class
import Data.Bifunctor (bimap, first)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Lazy as BL hiding (putStrLn)
import Data.Conduit.Binary (sinkLbs)
import Data.Generics.Product (HasField (field))
import Data.List.Split (splitOn)
import Data.Semigroup (Endo (..))
import Data.String (IsString (fromString))
import Data.String.Interpolate (i)
import qualified Data.Text as T (Text, pack, unpack)
import qualified Data.Text.IO as T (putStrLn)
import qualified Env
import Formatting (formatToString, left)
import Lens.Micro (set, (<&>), (^.))
import Lens.Micro.Extras (view)
import Options.Applicative
import Options.Applicative.Help
import Safe (atMay, headMay)
import System.IO (stderr)
import Text.Read (readMaybe)
data Location where
Std ::Location
Local ::FilePath -> Location
S3 ::Region -> BucketName -> ObjectKey -> Location
deriving (Int -> Location -> ShowS
[Location] -> ShowS
Location -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Location] -> ShowS
$cshowList :: [Location] -> ShowS
show :: Location -> String
$cshow :: Location -> String
showsPrec :: Int -> Location -> ShowS
$cshowsPrec :: Int -> Location -> ShowS
Show)
showLocation :: Location -> T.Text
showLocation :: Location -> Text
showLocation Location
Std = Text
"stdin/stdout"
showLocation (Local String
f) = String -> Text
T.pack String
f
showLocation (S3 Region
r (BucketName Text
b) (ObjectKey Text
o)) = Text
b forall a. Semigroup a => a -> a -> a
<> Text
"/" forall a. Semigroup a => a -> a -> a
<> Text
o
data Input =
StdInput
| FileInput (Maybe FilePath) FilePath
| S3Input Region BucketName ObjectKey
deriving (Int -> Input -> ShowS
[Input] -> ShowS
Input -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Input] -> ShowS
$cshowList :: [Input] -> ShowS
show :: Input -> String
$cshow :: Input -> String
showsPrec :: Int -> Input -> ShowS
$cshowsPrec :: Int -> Input -> ShowS
Show)
data Output =
StdOutput
| FileOutput (Maybe FilePath) FilePath
| S3Output Region BucketName ObjectKey
data InputDecompression = NoDecompress | Decompress deriving (Int -> InputDecompression -> ShowS
[InputDecompression] -> ShowS
InputDecompression -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [InputDecompression] -> ShowS
$cshowList :: [InputDecompression] -> ShowS
show :: InputDecompression -> String
$cshow :: InputDecompression -> String
showsPrec :: Int -> InputDecompression -> ShowS
$cshowsPrec :: Int -> InputDecompression -> ShowS
Show)
data OutputCompression = NoCompress | Compress deriving (Int -> OutputCompression -> ShowS
[OutputCompression] -> ShowS
OutputCompression -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [OutputCompression] -> ShowS
$cshowList :: [OutputCompression] -> ShowS
show :: OutputCompression -> String
$cshow :: OutputCompression -> String
showsPrec :: Int -> OutputCompression -> ShowS
$cshowsPrec :: Int -> OutputCompression -> ShowS
Show)
handleInputDecompression
:: InputDecompression -> BL.ByteString -> BL.ByteString
handleInputDecompression :: InputDecompression -> ByteString -> ByteString
handleInputDecompression InputDecompression
d = case InputDecompression
d of
InputDecompression
Decompress -> ByteString -> ByteString
decompress
InputDecompression
NoDecompress -> forall a. a -> a
id
handleOutputCompression :: OutputCompression -> BL.ByteString -> BL.ByteString
handleOutputCompression :: OutputCompression -> ByteString -> ByteString
handleOutputCompression OutputCompression
d = case OutputCompression
d of
OutputCompression
Compress -> ByteString -> ByteString
compress
OutputCompression
NoCompress -> forall a. a -> a
id
handleInputDecompressionStrict
:: InputDecompression -> BS.ByteString -> BS.ByteString
handleInputDecompressionStrict :: InputDecompression -> ByteString -> ByteString
handleInputDecompressionStrict InputDecompression
d = case InputDecompression
d of
InputDecompression
Decompress -> ByteString -> ByteString
BL.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
decompress forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.fromStrict
InputDecompression
NoDecompress -> forall a. a -> a
id
handleOutputCompressionStrict
:: OutputCompression -> BS.ByteString -> BS.ByteString
handleOutputCompressionStrict :: OutputCompression -> ByteString -> ByteString
handleOutputCompressionStrict OutputCompression
d = case OutputCompression
d of
OutputCompression
Compress -> ByteString -> ByteString
BL.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
compress forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.fromStrict
OutputCompression
NoCompress -> forall a. a -> a
id
readData :: Location -> InputDecompression -> IO BL.ByteString
readData :: Location -> InputDecompression -> IO ByteString
readData Location
Std InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompression InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO ByteString
BL.getContents
readData (Local String
x ) InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompression InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
BL.readFile String
x
readData (S3 Region
r BucketName
b ObjectKey
k) InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompression InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Region -> BucketName -> ObjectKey -> IO ByteString
getS3Object Region
r BucketName
b ObjectKey
k
writeData :: Location -> OutputCompression -> BL.ByteString -> IO ()
writeData :: Location -> OutputCompression -> ByteString -> IO ()
writeData Location
Std OutputCompression
z ByteString
x = ByteString -> IO ()
BL.putStr (OutputCompression -> ByteString -> ByteString
handleOutputCompression OutputCompression
z ByteString
x)
writeData (Local String
f ) OutputCompression
z ByteString
x = String -> ByteString -> IO ()
BL.writeFile String
f (OutputCompression -> ByteString -> ByteString
handleOutputCompression OutputCompression
z ByteString
x)
writeData (S3 Region
r BucketName
b ObjectKey
k) OutputCompression
z ByteString
x = forall a.
PutS3 a =>
Region -> BucketName -> ObjectKey -> a -> IO ()
putS3Object Region
r BucketName
b ObjectKey
k (OutputCompression -> ByteString -> ByteString
handleOutputCompression OutputCompression
z ByteString
x)
readDataStrict :: Location -> InputDecompression -> IO BS.ByteString
readDataStrict :: Location -> InputDecompression -> IO ByteString
readDataStrict Location
Std InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompressionStrict InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO ByteString
BS.getContents
readDataStrict (Local String
x) InputDecompression
d = InputDecompression -> ByteString -> ByteString
handleInputDecompressionStrict InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
BS.readFile String
x
readDataStrict (S3 Region
r BucketName
b ObjectKey
k) InputDecompression
d =
InputDecompression -> ByteString -> ByteString
handleInputDecompressionStrict InputDecompression
d forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> ByteString
BL.toStrict (Region -> BucketName -> ObjectKey -> IO ByteString
getS3Object Region
r BucketName
b ObjectKey
k)
writeDataStrict :: Location -> OutputCompression -> BS.ByteString -> IO ()
writeDataStrict :: Location -> OutputCompression -> ByteString -> IO ()
writeDataStrict Location
Std OutputCompression
z ByteString
x = ByteString -> IO ()
BSC.putStrLn (OutputCompression -> ByteString -> ByteString
handleOutputCompressionStrict OutputCompression
z ByteString
x)
writeDataStrict (Local String
f) OutputCompression
z ByteString
x =
String -> ByteString -> IO ()
BS.writeFile String
f (OutputCompression -> ByteString -> ByteString
handleOutputCompressionStrict OutputCompression
z ByteString
x)
writeDataStrict (S3 Region
r BucketName
b ObjectKey
k) OutputCompression
z ByteString
x =
forall a.
PutS3 a =>
Region -> BucketName -> ObjectKey -> a -> IO ()
putS3Object Region
r BucketName
b ObjectKey
k (OutputCompression -> ByteString -> ByteString
handleOutputCompressionStrict OutputCompression
z ByteString
x)
getS3Object :: Region -> BucketName -> ObjectKey -> IO BL.ByteString
getS3Object :: Region -> BucketName -> ObjectKey -> IO ByteString
getS3Object Region
r BucketName
b ObjectKey
k = do
LogLevel -> ByteStringBuilder -> IO ()
lgr <- forall (m :: * -> *).
MonadIO m =>
LogLevel -> Handle -> m (LogLevel -> ByteStringBuilder -> IO ())
newLogger LogLevel
Debug Handle
stderr
Env' Identity
env <-
forall (m :: * -> *).
MonadIO m =>
(EnvNoAuth -> m (Env' Identity)) -> m (Env' Identity)
newEnv forall (m :: * -> *) (withAuth :: * -> *).
(MonadCatch m, MonadIO m, Foldable withAuth) =>
Env' withAuth -> m (Env' Identity)
discover
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"envLogger") LogLevel -> ByteStringBuilder -> IO ()
lgr
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"envRegion") Region
r
forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT forall a b. (a -> b) -> a -> b
$ do
GetObjectResponse
result <- forall (m :: * -> *) a.
(MonadResource m, AWSRequest a) =>
Env' Identity -> a -> m (AWSResponse a)
send Env' Identity
env (BucketName -> ObjectKey -> GetObject
newGetObject BucketName
b ObjectKey
k)
forall a s. Getting a s a -> s -> a
view (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"body") GetObjectResponse
result forall (m :: * -> *) a.
MonadIO m =>
ResponseBody -> ConduitM ByteString Void (ResourceT IO) a -> m a
`sinkBody` forall (m :: * -> *) o.
Monad m =>
ConduitT ByteString o m ByteString
sinkLbs
class (ToBody a) => PutS3 a where
putS3Object :: Region -> BucketName -> ObjectKey -> a -> IO ()
putS3Object Region
r BucketName
b ObjectKey
k a
o = do
LogLevel -> ByteStringBuilder -> IO ()
lgr <- forall (m :: * -> *).
MonadIO m =>
LogLevel -> Handle -> m (LogLevel -> ByteStringBuilder -> IO ())
newLogger LogLevel
Error Handle
stderr
Env' Identity
env <-
forall (m :: * -> *).
MonadIO m =>
(EnvNoAuth -> m (Env' Identity)) -> m (Env' Identity)
newEnv forall (m :: * -> *) (withAuth :: * -> *).
(MonadCatch m, MonadIO m, Foldable withAuth) =>
Env' withAuth -> m (Env' Identity)
discover
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"envLogger") LogLevel -> ByteStringBuilder -> IO ()
lgr
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"envRegion") Region
r
let obj :: PutObject
obj = forall s t a b. ASetter s t a b -> b -> s -> t
set (forall (field :: Symbol) s t a b.
HasField field s t a b =>
Lens s t a b
field @"acl") (forall a. a -> Maybe a
Just ObjectCannedACL
ObjectCannedACL_Bucket_owner_full_control)
(BucketName -> ObjectKey -> RequestBody -> PutObject
newPutObject BucketName
b ObjectKey
k (forall a. ToBody a => a -> RequestBody
toBody a
o))
forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT forall a b. (a -> b) -> a -> b
$ do
forall (f :: * -> *) a. Functor f => f a -> f ()
void forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) a.
(MonadResource m, AWSRequest a) =>
Env' Identity -> a -> m (AWSResponse a)
send Env' Identity
env forall a b. (a -> b) -> a -> b
$ PutObject
obj
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IO ()
T.putStrLn forall a b. (a -> b) -> a -> b
$
Text
"Successfully Uploaded contents to "
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (forall a. Show a => a -> String
show BucketName
b)
forall a. Semigroup a => a -> a -> a
<> Text
" - "
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (forall a. Show a => a -> String
show ObjectKey
k)
instance PutS3 BL.ByteString
instance PutS3 BS.ByteString
inputToLocation :: Input -> Location
inputToLocation :: Input -> Location
inputToLocation Input
StdInput = Location
Std
inputToLocation (FileInput Maybe String
d String
f) = String -> Location
Local (ShowS
pre String
f)
where
pre :: ShowS
pre = case Maybe String
d of
Maybe String
Nothing -> forall a. Semigroup a => a -> a -> a
(<>) String
""
Just String
s -> forall a. Semigroup a => a -> a -> a
(<>) (String
s forall a. Semigroup a => a -> a -> a
<> String
"/")
inputToLocation (S3Input Region
r BucketName
b ObjectKey
k) = Region -> BucketName -> ObjectKey -> Location
S3 Region
r BucketName
b ObjectKey
k
outputToLocation :: Output -> Location
outputToLocation :: Output -> Location
outputToLocation Output
StdOutput = Location
Std
outputToLocation (FileOutput Maybe String
d String
f) = String -> Location
Local (ShowS
pre String
f)
where
pre :: ShowS
pre = case Maybe String
d of
Maybe String
Nothing -> forall a. Semigroup a => a -> a -> a
(<>) String
""
Just String
s -> forall a. Semigroup a => a -> a -> a
(<>) (String
s forall a. Semigroup a => a -> a -> a
<> String
"/")
outputToLocation (S3Output Region
r BucketName
b ObjectKey
k) = Region -> BucketName -> ObjectKey -> Location
S3 Region
r BucketName
b ObjectKey
k
stdInputParser :: Parser Input
stdInputParser :: Parser Input
stdInputParser = forall (f :: * -> *) a. Applicative f => a -> f a
pure Input
StdInput
stdOutputParser :: Parser Output
stdOutputParser :: Parser Output
stdOutputParser = forall (f :: * -> *) a. Applicative f => a -> f a
pure Output
StdOutput
fileInputParser :: Parser Input
fileInputParser :: Parser Input
fileInputParser =
Maybe String -> String -> Input
FileInput
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional
(forall s. IsString s => Mod OptionFields s -> Parser s
strOption forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"dir" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'd' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"DIRECTORY" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help
String
"optional directory"
)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
(forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"file" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'f' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"INPUT" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"Input file")
fileOutputParser :: Parser Output
fileOutputParser :: Parser Output
fileOutputParser =
Maybe String -> String -> Output
FileOutput
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional
(forall s. IsString s => Mod OptionFields s -> Parser s
strOption forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"outdir" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"DIRECTORY" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help
String
"optional output directory"
)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
(forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"output" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'o' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"OUTPUT" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"Output file")
inputParser :: Parser Input
inputParser :: Parser Input
inputParser = Parser Input
fileInputParser forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Input
s3InputParser forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Input
stdInputParser
s3InputParser :: Parser Input
s3InputParser :: Parser Input
s3InputParser =
Region -> BucketName -> ObjectKey -> Input
S3Input
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
( forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"region"
forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'r'
forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"REGION"
forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasValue f => a -> Mod f a
value Region
"us-east-1"
forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"AWS Region"
)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
(forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"bucket" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'b' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"BUCKET" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"S3 bucket")
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
(forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"key" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'k' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"KEY" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"S3 location")
s3OutputParser :: Parser Output
s3OutputParser :: Parser Output
s3OutputParser =
Region -> BucketName -> ObjectKey -> Output
S3Output
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
(forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"outregion" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"OUTREGION" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasValue f => a -> Mod f a
value Region
"us-east-1" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help
String
"output AWS Region"
)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
(forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"outbucket" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"OUTBUCKET" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"output S3 bucket")
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
(forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"outkey" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"OUTPUTKEY" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"output S3 location")
outputParser :: Parser Output
outputParser :: Parser Output
outputParser = Parser Output
fileOutputParser forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Output
s3OutputParser forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Output
stdOutputParser
inputDecompressionParser :: Parser InputDecompression
inputDecompressionParser :: Parser InputDecompression
inputDecompressionParser =
forall a. a -> Mod FlagFields a -> Parser a
flag' InputDecompression
Decompress
(forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"decompress" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'd' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"decompress gzipped input")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure InputDecompression
NoDecompress
outputCompressionParser :: Parser OutputCompression
outputCompressionParser :: Parser OutputCompression
outputCompressionParser =
forall a. a -> Mod FlagFields a -> Parser a
flag' OutputCompression
Compress (forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"gzip" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'z' forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help String
"compress output using gzip")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure OutputCompression
NoCompress
data IOSpec = MkIOSpec {
IOSpec -> Location
inputLocation :: Location
, IOSpec -> Location
outputLocation :: Location
} deriving (Int -> IOSpec -> ShowS
[IOSpec] -> ShowS
IOSpec -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [IOSpec] -> ShowS
$cshowList :: [IOSpec] -> ShowS
show :: IOSpec -> String
$cshow :: IOSpec -> String
showsPrec :: Int -> IOSpec -> ShowS
$cshowsPrec :: Int -> IOSpec -> ShowS
Show)
parseIOSpec :: Maybe PartitionIndex
-> Input
-> Output
-> Either PartitionTemplateError IOSpec
parseIOSpec :: Maybe PartitionIndex
-> Input -> Output -> Either PartitionTemplateError IOSpec
parseIOSpec Maybe PartitionIndex
Nothing Input
i Output
o = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Location -> Location -> IOSpec
MkIOSpec (Input -> Location
inputToLocation Input
i) (Output -> Location
outputToLocation Output
o)
parseIOSpec (Just PartitionIndex
pid) Input
i Output
o =
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Location -> Location -> IOSpec
MkIOSpec forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall {a} {a} {b}. (Either a a, Either a b) -> Either a (a, b)
involve (Location -> Either PartitionTemplateError Location
update forall a b. (a -> b) -> a -> b
$ Input -> Location
inputToLocation Input
i, Location -> Either PartitionTemplateError Location
update forall a b. (a -> b) -> a -> b
$ Output -> Location
outputToLocation Output
o)
where update :: Location -> Either PartitionTemplateError Location
update = forall a b c. (a -> b -> c) -> b -> a -> c
flip Location
-> PartitionIndex -> Either PartitionTemplateError Location
updateLocationWithPartitionIndex PartitionIndex
pid
involve :: (Either a a, Either a b) -> Either a (a, b)
involve (Either a a
x, Either a b
y) = case Either a a
x of
Left a
e -> forall a b. a -> Either a b
Left a
e
Right a
z1 -> case Either a b
y of
Left a
e2 -> forall a b. a -> Either a b
Left a
e2
Right b
z2 -> forall a b. b -> Either a b
Right (a
z1, b
z2)
logSettingsHelpDoc :: Doc
logSettingsHelpDoc :: Doc
logSettingsHelpDoc =
Doc -> Doc
dullblue (Doc -> Doc
bold Doc
"== Log Settings ==") forall a. Semigroup a => a -> a -> a
<> Doc
linebreak forall a. Semigroup a => a -> a -> a
<> [i|
Users can control the logging behavior by setting environmental variables.
The default for asclepias apps is to send logs to stderr in terminal format.
To change the format to JSON (for example) set the format variable
before calling the application, as in:
```
export LOG_FORMAT=json
```
For more information see,
https://hackage.haskell.org/package/Blammo/docs/Blammo-Logging-LogSettings-Env.html
Available environment variables:
LOG_LEVEL known log level (case insensitive)
LOG_DESTINATION stdout|stderr|@<path>
LOG_FORMAT tty|json
LOG_COLOR auto|always|never
|]
ourDefaultLogSettings :: LogSettings
ourDefaultLogSettings :: LogSettings
ourDefaultLogSettings =
LogDestination -> LogSettings -> LogSettings
setLogSettingsDestination LogDestination
LogDestinationStderr LogSettings
defaultLogSettings
logSettingsParser :: Env.Parser Env.Error LogSettings
logSettingsParser :: Parser Error LogSettings
logSettingsParser = (forall a b. (a -> b) -> a -> b
$ LogSettings
ourDefaultLogSettings) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Endo a -> a -> a
appEndo forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
sequenceA
[ forall e a.
AsUnset e =>
Reader e a -> String -> Mod Var a -> Parser e a
Env.var (forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String LogLevels
readLogLevels LogLevels -> LogSettings -> LogSettings
setLogSettingsLevels) String
"LOG_LEVEL" (forall a. a -> Mod Var a
Env.def forall a. Monoid a => a
mempty)
, forall e a.
AsUnset e =>
Reader e a -> String -> Mod Var a -> Parser e a
Env.var (forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String LogDestination
readLogDestination LogDestination -> LogSettings -> LogSettings
setLogSettingsDestination) String
"LOG_DESTINATION" (forall a. a -> Mod Var a
Env.def forall a. Monoid a => a
mempty)
, forall e a.
AsUnset e =>
Reader e a -> String -> Mod Var a -> Parser e a
Env.var (forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String LogFormat
readLogFormat LogFormat -> LogSettings -> LogSettings
setLogSettingsFormat) String
"LOG_FORMAT" (forall a. a -> Mod Var a
Env.def forall a. Monoid a => a
mempty)
, forall e a.
AsUnset e =>
Reader e a -> String -> Mod Var a -> Parser e a
Env.var (forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String LogColor
readLogColor LogColor -> LogSettings -> LogSettings
setLogSettingsColor) String
"LOG_COLOR" (forall a. a -> Mod Var a
Env.def forall a. Monoid a => a
mempty)
]
parseLogSettings :: IO LogSettings
parseLogSettings :: IO LogSettings
parseLogSettings = forall e a.
AsUnset e =>
(Info Error -> Info e) -> Parser e a -> IO a
Env.parse forall a. a -> a
id Parser Error LogSettings
logSettingsParser
endo
:: Env.AsUnread e
=> (String -> Either String a)
-> (a -> b -> b)
-> Env.Reader e (Endo b)
endo :: forall e a b.
AsUnread e =>
(String -> Either String a) -> (a -> b -> b) -> Reader e (Endo b)
endo String -> Either String a
reader a -> b -> b
setter String
x = forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first forall e. AsUnread e => String -> e
Env.unread forall a b. (a -> b) -> a -> b
$ forall a. (a -> a) -> Endo a
Endo forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> b -> b
setter forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Either String a
reader String
x
data PartitionTemplateError =
NOutOfBounds
| NonPostiveWidth
| FailedToParseWidth
| FailedToSplitByDelimiter
deriving (PartitionTemplateError -> PartitionTemplateError -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PartitionTemplateError -> PartitionTemplateError -> Bool
$c/= :: PartitionTemplateError -> PartitionTemplateError -> Bool
== :: PartitionTemplateError -> PartitionTemplateError -> Bool
$c== :: PartitionTemplateError -> PartitionTemplateError -> Bool
Eq, Int -> PartitionTemplateError -> ShowS
[PartitionTemplateError] -> ShowS
PartitionTemplateError -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PartitionTemplateError] -> ShowS
$cshowList :: [PartitionTemplateError] -> ShowS
show :: PartitionTemplateError -> String
$cshow :: PartitionTemplateError -> String
showsPrec :: Int -> PartitionTemplateError -> ShowS
$cshowsPrec :: Int -> PartitionTemplateError -> ShowS
Show)
data PartitionTemplatePattern =
FixedWidth Int
| NotFixedWidth deriving (PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
$c/= :: PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
== :: PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
$c== :: PartitionTemplatePattern -> PartitionTemplatePattern -> Bool
Eq, Int -> PartitionTemplatePattern -> ShowS
[PartitionTemplatePattern] -> ShowS
PartitionTemplatePattern -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PartitionTemplatePattern] -> ShowS
$cshowList :: [PartitionTemplatePattern] -> ShowS
show :: PartitionTemplatePattern -> String
$cshow :: PartitionTemplatePattern -> String
showsPrec :: Int -> PartitionTemplatePattern -> ShowS
$cshowsPrec :: Int -> PartitionTemplatePattern -> ShowS
Show)
parsePartitionTemplate ::
String
-> FilePath
-> Either PartitionTemplateError (String, PartitionTemplatePattern, String)
parsePartitionTemplate :: String
-> String
-> Either
PartitionTemplateError (String, PartitionTemplatePattern, String)
parsePartitionTemplate String
delim =
forall {a} {c}.
(Maybe a, Maybe PartitionTemplatePattern, Maybe c)
-> Either PartitionTemplateError (a, PartitionTemplatePattern, c)
checkParse
forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall {a} {a} {b} {c}.
(a -> a) -> (a -> b) -> (a -> c) -> a -> (a, b, c)
call3 forall a. [a] -> Maybe a
headMay (\[String]
x -> ([String]
x forall a. [a] -> Int -> Maybe a
`atMay` Int
1) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= String -> Maybe PartitionTemplatePattern
readTemplatePattern) (forall a. [a] -> Int -> Maybe a
`atMay` Int
2))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\[String]
x -> if forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
x forall a. Eq a => a -> a -> Bool
== Int
3 then forall a b. b -> Either a b
Right [String]
x else forall a b. a -> Either a b
Left PartitionTemplateError
FailedToSplitByDelimiter)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
delim
where call3 :: (a -> a) -> (a -> b) -> (a -> c) -> a -> (a, b, c)
call3 a -> a
f a -> b
g a -> c
h = (\((a
x, b
y), c
z) -> (a
x, b
y, c
z)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((a -> a
f forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& a -> b
g) forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& a -> c
h)
readTemplatePattern :: String -> Maybe PartitionTemplatePattern
readTemplatePattern String
"n" = forall a. a -> Maybe a
Just PartitionTemplatePattern
NotFixedWidth
readTemplatePattern String
x = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int -> PartitionTemplatePattern
FixedWidth (forall a. Read a => String -> Maybe a
readMaybe String
x)
checkParse :: (Maybe a, Maybe PartitionTemplatePattern, Maybe c)
-> Either PartitionTemplateError (a, PartitionTemplatePattern, c)
checkParse (Just a
x, Maybe PartitionTemplatePattern
y, Just c
z) = case Maybe PartitionTemplatePattern
y of
Maybe PartitionTemplatePattern
Nothing -> forall a b. a -> Either a b
Left PartitionTemplateError
FailedToParseWidth
Just PartitionTemplatePattern
pat -> case PartitionTemplatePattern
pat of
PartitionTemplatePattern
NotFixedWidth -> forall a b. b -> Either a b
Right (a
x, PartitionTemplatePattern
pat, c
z)
FixedWidth Int
i -> if Int
i forall a. Ord a => a -> a -> Bool
<= Int
0 then forall a b. a -> Either a b
Left PartitionTemplateError
NonPostiveWidth else forall a b. b -> Either a b
Right (a
x, PartitionTemplatePattern
pat, c
z)
checkParse (Maybe a
_, Maybe PartitionTemplatePattern
_, Maybe c
_) = forall a b. a -> Either a b
Left PartitionTemplateError
FailedToSplitByDelimiter
parsePartitionFilePath ::
FilePath
-> Int
-> Either PartitionTemplateError FilePath
parsePartitionFilePath :: String -> Int -> Either PartitionTemplateError String
parsePartitionFilePath String
x Int
n =
String
-> String
-> Either
PartitionTemplateError (String, PartitionTemplatePattern, String)
parsePartitionTemplate String
"%%" String
x forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall {a}.
(Ord a, Num a, Buildable a, Show a) =>
a
-> (String, PartitionTemplatePattern, String)
-> Either PartitionTemplateError String
fillTemplate Int
n
where
fillTemplate :: a
-> (String, PartitionTemplatePattern, String)
-> Either PartitionTemplateError String
fillTemplate a
i (String
x, FixedWidth Int
n, String
z) =
if a
0 forall a. Ord a => a -> a -> Bool
<= a
i Bool -> Bool -> Bool
&& a
i forall a. Ord a => a -> a -> Bool
<= a
10 forall a b. (Num a, Integral b) => a -> b -> a
^ Int
n forall a. Num a => a -> a -> a
- a
1
then forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ String
x forall a. Semigroup a => a -> a -> a
<> forall a. Format String a -> a
formatToString (forall a r. Buildable a => Int -> Char -> Format r (a -> r)
left Int
n Char
'0') a
i forall a. Semigroup a => a -> a -> a
<> String
z
else forall a b. a -> Either a b
Left PartitionTemplateError
NOutOfBounds
fillTemplate a
i (String
x, PartitionTemplatePattern
NotFixedWidth, String
z) =
if a
i forall a. Ord a => a -> a -> Bool
< a
0
then forall a b. a -> Either a b
Left PartitionTemplateError
NOutOfBounds
else forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ String
x forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show a
i forall a. Semigroup a => a -> a -> a
<> String
z
newtype PartitionIndex = MkPartitionIndex Int deriving (PartitionIndex -> PartitionIndex -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PartitionIndex -> PartitionIndex -> Bool
$c/= :: PartitionIndex -> PartitionIndex -> Bool
== :: PartitionIndex -> PartitionIndex -> Bool
$c== :: PartitionIndex -> PartitionIndex -> Bool
Eq, Int -> PartitionIndex -> ShowS
[PartitionIndex] -> ShowS
PartitionIndex -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PartitionIndex] -> ShowS
$cshowList :: [PartitionIndex] -> ShowS
show :: PartitionIndex -> String
$cshow :: PartitionIndex -> String
showsPrec :: Int -> PartitionIndex -> ShowS
$cshowsPrec :: Int -> PartitionIndex -> ShowS
Show)
partitionIndexParser :: Parser PartitionIndex
partitionIndexParser :: Parser PartitionIndex
partitionIndexParser =
forall a. ReadM a -> Mod OptionFields a -> Parser a
option
(forall a. (String -> Either String a) -> ReadM a
eitherReader
(\String
s -> case forall a. Read a => String -> Maybe a
readMaybe String
s :: Maybe Int of
Maybe Int
Nothing -> forall a b. a -> Either a b
Left String
"partition-index value must be an integer"
Just Int
i ->
if Int
i forall a. Ord a => a -> a -> Bool
< Int
0
then forall a b. a -> Either a b
Left String
"partition-index value must be non-negative"
else forall a b. b -> Either a b
Right (Int -> PartitionIndex
MkPartitionIndex Int
i)
))
( forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"partition-index"
forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"INT"
forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. String -> Mod f a
help
(String
"Non-negative integer value to pass an index to templated input/output paths. " forall a. Semigroup a => a -> a -> a
<>
String
"See Processing Indexed Files help in help text for more information.")
)
partitionIndexDoc :: Doc
partitionIndexDoc :: Doc
partitionIndexDoc =
Doc -> Doc
dullblue (Doc -> Doc
bold Doc
"=== Processing Indexed Files ===")
forall a. Semigroup a => a -> a -> a
<> Doc
linebreak
forall a. Semigroup a => a -> a -> a
<> [i|
The following options have a basic templating functionality for processing
indexed filenames (such as file1.json, file2.json, file3.json, etc):
* --file
* --output
* --key
* --outkey
The template pattern is @%%n%%@, where @n@ is either an integer value
or the character 'n'. If n is a positive integer, then the output is
left-padded with zeros to a fixed width of n. If the template variable is
simply 'n', then the output is not left-padded.
A filepath may contain at most one template.
When templated paths are used, the --partition-index option is interpolated
into the path(s). If both input and output go to one of the options listed,
then both paths must be templated.
**EXAMPLES**
Options
--file=path/to/in-%%5%%.ext
--output=path/to/out-%%5%%.ext
--partition-index=1
-----------------------
Interpretation
input file = path/to/in-00001.ext
output file = path/to/out-00001.ext
Options
--file=path/to/in%%n%%.ext
--output=path/to/out%%n%%.ext
--partition-index=1
-----------------------
Interpretation
input file = path/to/in1.ext
output file = path/to/out1.ext
Options
--file=path/to/in-%%5%%.ext
--output=path/to/out-%%5%%.ext
--partition-index=1
-----------------------
Interpretation
input file = path/to/in-00001.ext
output file = path/to/out-00001.ext
Options
--file=path/to/in%%n%%.ext
--outkey=path/to/out%%n%%.ext
--partition-index=1
-----------------------
Interpretation
input file = path/to/in1.ext
output S3 key = path/to/out1.ext
|]
updateLocationWithPartitionIndex ::
Location
-> PartitionIndex
-> Either PartitionTemplateError Location
updateLocationWithPartitionIndex :: Location
-> PartitionIndex -> Either PartitionTemplateError Location
updateLocationWithPartitionIndex Location
Std PartitionIndex
_ = forall a b. b -> Either a b
Right Location
Std
updateLocationWithPartitionIndex (Local String
f) (MkPartitionIndex Int
i) =
case String -> Int -> Either PartitionTemplateError String
parsePartitionFilePath String
f Int
i of
Left PartitionTemplateError
e -> forall a b. a -> Either a b
Left PartitionTemplateError
e
Right String
x -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ String -> Location
Local String
x
updateLocationWithPartitionIndex (S3 Region
r BucketName
b (ObjectKey Text
o)) (MkPartitionIndex Int
i) =
case String -> Int -> Either PartitionTemplateError String
parsePartitionFilePath (Text -> String
T.unpack Text
o) Int
i of
Left PartitionTemplateError
e -> forall a b. a -> Either a b
Left PartitionTemplateError
e
Right String
x -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Region -> BucketName -> ObjectKey -> Location
S3 Region
r BucketName
b (Text -> ObjectKey
ObjectKey (String -> Text
T.pack String
x))