{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

-- | Module defining the output types for 'Cohort.runVariables'.
module Variable.Variable where

import Data.Aeson (ToJSON (..), Value)
import Data.Singletons
import Data.Text
import qualified Data.Vector as V
import GHC.Generics (Generic)
import Variable.Attributes (VarAttrs (..))
import Variable.R.Factor
import Variable.R.SEXP
import Variable.R.Stype

-- | 'Variable', the element type of 'VariableRow', exists to ensure as much as
-- possible that the values computed within an `asclepias` application can be
-- seamlessly converted to one of the supported target type systems downstream.
-- At present, the conversions all pass through JSON as an intermediary.
--
-- Each supported target has an associated variant of 'Variable'. See the
-- variant-specific documentation for details.
--
-- Programmers building an `asclepias` application are intended to wrap their
-- computations in 'Variable' only at the final step of the computations in
-- 'Cohort.runVariables'. 'Variable' intentionally erases the underlying types
-- it wraps, so as to allow an inhomogeneous list in 'VariableRow'. That makes
-- it inconvenient to work with 'Variable' directly in most cases.
--
-- However, users might find it helpful to work with the underlying target
-- types that 'Variable' wraps, such as the base R vectors represented by
-- @RTypeRep r@. See the documentation for the respective types for details.
--
-- To construct a 'Variable', users will use the smart constructors for each
-- target type, such as 'rVector'. Each supported target is represented by one
-- of the variants of 'Variable'.
--
--
-- ==== __Examples__
--
-- Example of converting a list of '[Bool]' to the target R logical vector and
-- wrapping in 'Variable' all in one step.
--
-- The printed output shape shown here is determined by the internal type
-- 'VariableWrapped', which provides the JSON output shape, and is used only
-- for debugging or logging. It contains target-dependent attributes and type
-- information needed to interpret values of this 'Variable' in downstream
-- applications, as read from JSON.
--
-- >>> :set -XOverloadedStrings
-- >>> import Variable
-- >>> :{
-- >>> myVar :: Variable
-- >>> myVar = rVector "myVar" $ as_logical [True, False]
-- >>> :}
-- >>> myVar
-- MkVariableWrapped {varTarget = "RVector", vals = Array [Bool True,Bool
-- False], attrs = MkVarAttrs {varType = "LGLSXP", varName = "myVar"}, subAttrs
-- = Array []}
--
-- Example of using R vector representations for interim computations,
-- before wrapping the final value in 'Variable'. R representations are
-- 'Data.Vector.Vector' s, so you can use all of the utilities of that module.
--
-- Since R has no singleton types, e.g. 'Integer', only vectors, summarizing
-- functions such as 'Data.Vector.maximum' must wrap the results of their
-- computation in a vector again. The utility 'Variable.R.SEXP.summarizeWith'
-- is provided to do so as a convenience.
--
-- See 'RTypeRep' for more on how to use R-related types.
--
-- >>> :set -XOverloadedStrings -XDataKinds
-- >>> import Variable
-- >>> import qualified Data.Vector as V
-- >>> :{
-- >>> ageAtEvent :: RTypeRep 'REALSXP
-- >>> ageAtEvent = as_numeric [51 :: Double, 30, 60]
-- >>> maxAgeVar :: Variable
-- >>> maxAgeVar = rVector "maxAgeVar" $ summarizeWith V.maximum ageAtEvent
-- >>> :}
-- >>> maxAgeVar
-- MkVariableWrapped {varTarget = "RVector", vals = Array [Number 60.0], attrs
-- = MkVarAttrs {varType = "REALSXP", varName = "maxAgeVar"}, subAttrs = Array
-- []}
data Variable where
  -- | A subset of base R vector types, those listed among 'SEXPTYPE', backed
  -- by the Haskell types given in 'RTypeRep'. A 'Variable' intended for this
  -- target should be constructed with 'rVector'.
  RVector :: (RTypeRepConstraints r) => RTypeRep r -> VarAttrs -> Variable
  -- | The unordered @factor@ type in R, backed by the 'Variable.R.Factor.Factor'
  -- type in Haskell and constructed with 'rFactor'.
  RFactor :: Factor -> VarAttrs -> Variable
  -- | Vectors defined in the R @stype@ package, backed by some
  -- 'Variable.R.Stype.Stype' and constructed with 'stypeVector'. All but
  -- @v_rcensored@ is supported.
  StypeVector :: (RTypeRepConstraints r) => Stype r -> VarAttrs -> Variable
  -- | Element of an 'RTypeRep r'. R has no singleton type for its atomic
  -- vectors. However, it is sometimes convenient when processing data row-wise
  -- to indicate to the downstream consumer that the 'Variable' should be a
  -- singleton, to be wrapped in the associated 'SEXPTYPE' vector indicated by
  -- parameter 'r' of the input. It is up to the user to ensure 'r' is not
  -- @'VECSXP@.
  RAtomicVectorElem :: (RTypeRepConstraints r) => SEXPElem r -> VarAttrs -> Variable

-- | A 'VariableRow' is the output type of 'runVariables' and is the collection
-- of computed values associated with each 'ObsUnit' of a 'Cohort'. It can be
-- thought of as a row of data for a given observational unit in a given
-- cohort, with each component 'Variable' giving a particular column's value
-- for that observational unit.
--
-- See 'Variable' for details.
--
-- ==== __JSON shape__
--
-- This section describes the JSON shape produced by a single 'VariableRow'.
-- The JSON will include one element of this shape for each 'Cohort.ObsUnit'.
-- See the top-level 'Hasklepias' module documentation for an overview of the
-- full JSON output shape produced by an @asclepias@ cohort-building
-- application.
--
-- JSON output from a 'VariableRow' is an @array@, each element of which is an
-- @object@ with shape demonstrated by the following example:
--
-- @
-- {
--   "varTarget": "StypeVector",
--   "attrs": {
--     "varName": "ageAtIndex",
--     "varType": "INTSXP"
--   },
--   "subAttrs": {
--     "long_label": "Age at day of index, computed from January 7 of smallest provided birth year.",
--     "short_label": "Age at day of index",
--     "special_attrs": [
--       "91"
--     ],
--     "study_role": null,
--     "stypeType": "v_nominal"
--   },
--   "vals": [
--     91
--   ]
-- }
-- @
--
--   - "varTarget" identifies the supported target type this data was
--   constructed from, corresponding to one of the 'Variable' variant names.
--   - "attrs" is an @object@ with @string@ fields defining the variable name
--   and target variable type. In this example, the target is an @integer@
--   vector in R with name "ageAtIndex".
--   - "subAttrs" is an @object@ that can vary based on the "varTarget". A
--   "StypeVector" takes additional context. If the "varTarget" were "RVector",
--   indicating one of the base R 'SEXPTYPE' s, this field would be @null@.
--   - "vals" contains the values of the 'Variable'. At present, only R-related
--   vector types are supported and hence "vals" will always be an @array@. The
--   JSON type of elements in this array will differ based on the "varTarget"
--   and "varType".  At present, elements will either be of one of the atomic
--   JSON types (@null@, @bool@, @number@, @string@) or will be of the same
--   shape as the JSON 'Variable' displayed above, in the case of @"varType":
--   "VECSXP"@, representing an R list.
type VariableRow = [Variable]

{- CONSTRUCTORS -}

-- | Constructor for 'RVector' with the given name as first argument,
-- automatically producing type attribute information. To produce an
-- 'RTypeRep', use of the of @as_*@ constructors or produce one directly by
-- constructing the 'Data.Vector.Vector a' with appropriate 'a', as determined
-- by 'RTypeRep'.
rVector :: (RTypeRepConstraints r) => Text -> RTypeRep r -> Variable
rVector :: forall (r :: SEXPTYPE).
RTypeRepConstraints r =>
Text -> RTypeRep r -> Variable
rVector Text
nm RTypeRep r
rv = forall (r :: SEXPTYPE).
RTypeRepConstraints r =>
RTypeRep r -> VarAttrs -> Variable
RVector RTypeRep r
rv VarAttrs
a
  where
    a :: VarAttrs
a = Text -> Text -> VarAttrs
MkVarAttrs (forall (r :: SEXPTYPE). SingI r => RTypeRep r -> Text
textSEXPTYPEOf RTypeRep r
rv) Text
nm

-- | Constructor for 'RFactor' with the given name as first argument, 'values'
-- as second argument and 'levels' as third.  Calls 'factor', which is
-- associated to an unordered @factor@ in R. Note the 'varType' always is
-- @STRSXP@ and not the 'SEXPTYPE' of the input, in keeping with the R
-- implementation in which 'factor' variables are backed by character vectors.
rFactor :: (AsCharacter a) => Text -> a -> V.Vector Text -> Variable
rFactor :: forall a. AsCharacter a => Text -> a -> Vector Text -> Variable
rFactor Text
nm a
rv Vector Text
lvls = Factor -> VarAttrs -> Variable
RFactor (forall a. AsCharacter a => a -> Vector Text -> Factor
factor a
rv Vector Text
lvls) VarAttrs
a
  where
    a :: VarAttrs
a = Text -> Text -> VarAttrs
MkVarAttrs Text
"STRSXP" Text
nm

-- | Constructor for 'StypeVector' with the given name as first argument. To
-- produce a 'Stype', use one of the @v_*@ constructors such as
-- 'Variable.R.Stype.v_binary' or 'Variable.R.Stype.as_v_binary'.
stypeVector :: (RTypeRepConstraints r) => Text -> Stype r -> Variable
stypeVector :: forall (r :: SEXPTYPE).
RTypeRepConstraints r =>
Text -> Stype r -> Variable
stypeVector Text
nm Stype r
sv = forall (r :: SEXPTYPE).
RTypeRepConstraints r =>
Stype r -> VarAttrs -> Variable
StypeVector Stype r
sv VarAttrs
a
  where
    a :: VarAttrs
a = Text -> Text -> VarAttrs
MkVarAttrs Text
ty Text
nm
    (Text
ty, StypeAttrs
_) = forall (r :: SEXPTYPE). Stype r -> (Text, StypeAttrs)
stypeInfoOf Stype r
sv

rAtomicVectorElem :: (RTypeRepConstraints r) => Text -> SEXPElem r -> Variable
rAtomicVectorElem :: forall (r :: SEXPTYPE).
RTypeRepConstraints r =>
Text -> SEXPElem r -> Variable
rAtomicVectorElem Text
nm SEXPElem r
e = forall (r :: SEXPTYPE).
RTypeRepConstraints r =>
SEXPElem r -> VarAttrs -> Variable
RAtomicVectorElem SEXPElem r
e VarAttrs
a
  where
    a :: VarAttrs
a = Text -> Text -> VarAttrs
MkVarAttrs Text
ty Text
nm
    ty :: Text
ty = forall (r :: SEXPTYPE). SingI r => SEXPElem r -> Text
textSEXPTYPEOfElem SEXPElem r
e

{- Show, JSON etc -}

-- | Internal. Container to control Show and ToJSON shapes.
data VariableWrapped = MkVariableWrapped
  { -- | "Target" of the variable, meaning which variant of 'Variable' the
    -- data are wrapped in.
    VariableWrapped -> Text
varTarget :: Text,
    -- | Values of the variable.
    VariableWrapped -> Value
vals :: Value,
    -- | Common attritubutes required for all 'Variable' s.
    VariableWrapped -> VarAttrs
attrs :: VarAttrs,
    -- | Any additional attributes lumped into a JSON 'Value'.
    VariableWrapped -> Value
subAttrs :: Value
  }
  deriving (VariableWrapped -> VariableWrapped -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: VariableWrapped -> VariableWrapped -> Bool
$c/= :: VariableWrapped -> VariableWrapped -> Bool
== :: VariableWrapped -> VariableWrapped -> Bool
$c== :: VariableWrapped -> VariableWrapped -> Bool
Eq, forall x. Rep VariableWrapped x -> VariableWrapped
forall x. VariableWrapped -> Rep VariableWrapped x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep VariableWrapped x -> VariableWrapped
$cfrom :: forall x. VariableWrapped -> Rep VariableWrapped x
Generic, Eq VariableWrapped
VariableWrapped -> VariableWrapped -> Bool
VariableWrapped -> VariableWrapped -> Ordering
VariableWrapped -> VariableWrapped -> VariableWrapped
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: VariableWrapped -> VariableWrapped -> VariableWrapped
$cmin :: VariableWrapped -> VariableWrapped -> VariableWrapped
max :: VariableWrapped -> VariableWrapped -> VariableWrapped
$cmax :: VariableWrapped -> VariableWrapped -> VariableWrapped
>= :: VariableWrapped -> VariableWrapped -> Bool
$c>= :: VariableWrapped -> VariableWrapped -> Bool
> :: VariableWrapped -> VariableWrapped -> Bool
$c> :: VariableWrapped -> VariableWrapped -> Bool
<= :: VariableWrapped -> VariableWrapped -> Bool
$c<= :: VariableWrapped -> VariableWrapped -> Bool
< :: VariableWrapped -> VariableWrapped -> Bool
$c< :: VariableWrapped -> VariableWrapped -> Bool
compare :: VariableWrapped -> VariableWrapped -> Ordering
$ccompare :: VariableWrapped -> VariableWrapped -> Ordering
Ord, Int -> VariableWrapped -> ShowS
[VariableWrapped] -> ShowS
VariableWrapped -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [VariableWrapped] -> ShowS
$cshowList :: [VariableWrapped] -> ShowS
show :: VariableWrapped -> String
$cshow :: VariableWrapped -> String
showsPrec :: Int -> VariableWrapped -> ShowS
$cshowsPrec :: Int -> VariableWrapped -> ShowS
Show)

instance ToJSON VariableWrapped

-- NOTE Show and ToJSON instances for Variable are defined only in terms of
-- VariableWrapped, which is the latter's entire purpose.
instance Show Variable where
  show :: Variable -> String
show = forall a. Show a => a -> String
show forall b c a. (b -> c) -> (a -> b) -> a -> c
. Variable -> VariableWrapped
asVariableWrapped

instance ToJSON Variable where
  toJSON :: Variable -> Value
toJSON = forall a. ToJSON a => a -> Value
toJSON forall b c a. (b -> c) -> (a -> b) -> a -> c
. Variable -> VariableWrapped
asVariableWrapped

{- UTILITIES -}

-- Helper utilities for nested RVector lists.
atomicRVectorAsWrapped :: (RTypeRepConstraints s) => RTypeRep s -> VarAttrs -> VariableWrapped
atomicRVectorAsWrapped :: forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
RTypeRep s -> VarAttrs -> VariableWrapped
atomicRVectorAsWrapped Vector (Maybe (SEXPElem s))
rv VarAttrs
a = Text -> Value -> VarAttrs -> Value -> VariableWrapped
MkVariableWrapped Text
"RVector" (forall a. ToJSON a => a -> Value
toJSON Vector (Maybe (SEXPElem s))
rv) VarAttrs
a (forall a. ToJSON a => a -> Value
toJSON ())

rVectorAsWrappedA :: (RTypeRepConstraints s) => Sing s -> RTypeRep s -> VarAttrs -> VariableWrapped
rVectorAsWrappedA :: forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
Sing s -> RTypeRep s -> VarAttrs -> VariableWrapped
rVectorAsWrappedA Sing s
sxp RTypeRep s
rv VarAttrs
a = case Sing s
sxp of
  Sing s
SSEXPTYPE s
SLGLSXP -> forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
RTypeRep s -> VarAttrs -> VariableWrapped
atomicRVectorAsWrapped RTypeRep s
rv VarAttrs
a
  Sing s
SSEXPTYPE s
SINTSXP -> forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
RTypeRep s -> VarAttrs -> VariableWrapped
atomicRVectorAsWrapped RTypeRep s
rv VarAttrs
a
  Sing s
SSEXPTYPE s
SREALSXP -> forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
RTypeRep s -> VarAttrs -> VariableWrapped
atomicRVectorAsWrapped RTypeRep s
rv VarAttrs
a
  Sing s
SSEXPTYPE s
SCPLSXP -> forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
RTypeRep s -> VarAttrs -> VariableWrapped
atomicRVectorAsWrapped RTypeRep s
rv VarAttrs
a
  Sing s
SSEXPTYPE s
SSTRSXP -> forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
RTypeRep s -> VarAttrs -> VariableWrapped
atomicRVectorAsWrapped RTypeRep s
rv VarAttrs
a
  -- Must recurse through the list to produce a
  -- 'VariableWrapped' (converting it then to JSON) for each
  -- element. Types erased within 'SomeRTypeRep' are
  -- recovered.
  Sing s
SSEXPTYPE s
SVECSXP ->
    Text -> Value -> VarAttrs -> Value -> VariableWrapped
MkVariableWrapped
      Text
"RVector"
      ( forall a. ToJSON a => a -> Value
toJSON forall a b. (a -> b) -> a -> b
$
          forall a b. (a -> b) -> Vector a -> Vector b
V.map
            ( \case
                Maybe (Text, SomeRTypeRep)
Nothing -> forall a. Maybe a
Nothing
                Just (Text
nm, SomeRTypeRep
v) -> forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall t.
SomeRTypeRep
-> (forall (s :: SEXPTYPE).
    RTypeRepConstraints s =>
    RTypeRep s -> t)
-> t
withSomeRTypeRep SomeRTypeRep
v (\RTypeRep s
rv' -> forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
Sing s -> RTypeRep s -> VarAttrs -> VariableWrapped
rVectorAsWrappedA (forall (r :: SEXPTYPE). SingI r => RTypeRep r -> Sing r
singOfRTypeRep RTypeRep s
rv') RTypeRep s
rv' (Text -> Text -> VarAttrs
MkVarAttrs (SomeRTypeRep -> Text
textSEXPTYPEOfErased SomeRTypeRep
v) Text
nm))
            )
            RTypeRep s
rv
      )
      VarAttrs
a
      (forall a. ToJSON a => a -> Value
toJSON ())

rVectorAsWrapped :: forall s. (RTypeRepConstraints s) => RTypeRep s -> VarAttrs -> VariableWrapped
rVectorAsWrapped :: forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
RTypeRep s -> VarAttrs -> VariableWrapped
rVectorAsWrapped = forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
Sing s -> RTypeRep s -> VarAttrs -> VariableWrapped
rVectorAsWrappedA (forall {k} (a :: k). SingI a => Sing a
sing @s)

-- | Internal. Convert a 'Variable' to its structured output shape for JSON and String
-- targets, 'VariableWrapped'.
asVariableWrapped :: Variable -> VariableWrapped
asVariableWrapped :: Variable -> VariableWrapped
asVariableWrapped (RVector RTypeRep r
rv VarAttrs
a) = forall (s :: SEXPTYPE).
RTypeRepConstraints s =>
RTypeRep s -> VarAttrs -> VariableWrapped
rVectorAsWrapped RTypeRep r
rv VarAttrs
a
asVariableWrapped (RFactor Factor
rv VarAttrs
a) = Text -> Value -> VarAttrs -> Value -> VariableWrapped
MkVariableWrapped Text
"RFactor" (forall a. ToJSON a => a -> Value
toJSON forall a b. (a -> b) -> a -> b
$ Factor -> RTypeRep 'STRSXP
values Factor
rv) VarAttrs
a (forall a. ToJSON a => a -> Value
toJSON forall a b. (a -> b) -> a -> b
$ Factor -> Vector Text
levels Factor
rv)
asVariableWrapped (StypeVector Stype r
sv VarAttrs
a) = Text -> Value -> VarAttrs -> Value -> VariableWrapped
MkVariableWrapped Text
"StypeVector" (forall a. ToJSON a => a -> Value
toJSON forall a b. (a -> b) -> a -> b
$ forall (r :: SEXPTYPE). Stype r -> RTypeRep r
stypeRTypeRep Stype r
sv) VarAttrs
a (forall a. ToJSON a => a -> Value
toJSON forall a b. (a -> b) -> a -> b
$ forall (r :: SEXPTYPE). Stype r -> WrappedStypeAttrs
wrappedStypeAttrs Stype r
sv)
asVariableWrapped (RAtomicVectorElem SEXPElem r
e VarAttrs
a) = Text -> Value -> VarAttrs -> Value -> VariableWrapped
MkVariableWrapped Text
"RAtomicVectorElem" (forall a. ToJSON a => a -> Value
toJSON SEXPElem r
e) VarAttrs
a (forall a. ToJSON a => a -> Value
toJSON ())

-- | Internal. Used for testing. Extract the attributes of a 'Variable'.
varAttrs :: Variable -> VarAttrs
varAttrs :: Variable -> VarAttrs
varAttrs (RVector RTypeRep r
_ VarAttrs
a) = VarAttrs
a
varAttrs (RFactor Factor
_ VarAttrs
a) = VarAttrs
a
varAttrs (StypeVector Stype r
_ VarAttrs
a) = VarAttrs
a
varAttrs (RAtomicVectorElem SEXPElem r
_ VarAttrs
a) = VarAttrs
a