{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeFamilies #-}

module Variable.R.Factor where

import Data.Aeson (ToJSON (..))
import Data.Text (Text)
import qualified Data.Vector as V
import GHC.Generics (Generic)
import Variable.R.SEXP

-- | Haskell representation of R's `factor`, either ordered or unordered.
-- Construct with 'factor' or 'ordered', preferably.
data Factor = MkFactor
  { -- | Get the @RTypeRep 'STRSXP@ backing the 'Factor'.
    Factor -> RTypeRep 'STRSXP
values :: RTypeRep 'STRSXP,
    -- | Get the levels of the 'Factor'. In R these cannot include 'NA'
    -- values, so 'levels' is of type @Vector Text@ instead of @RTypeRep
    -- 'STRSXP@.
    Factor -> Vector Text
levels :: V.Vector Text
  }
  deriving (forall x. Rep Factor x -> Factor
forall x. Factor -> Rep Factor x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Factor x -> Factor
$cfrom :: forall x. Factor -> Rep Factor x
Generic, Factor -> Factor -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Factor -> Factor -> Bool
$c/= :: Factor -> Factor -> Bool
== :: Factor -> Factor -> Bool
$c== :: Factor -> Factor -> Bool
Eq, Eq Factor
Factor -> Factor -> Bool
Factor -> Factor -> Ordering
Factor -> Factor -> Factor
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Factor -> Factor -> Factor
$cmin :: Factor -> Factor -> Factor
max :: Factor -> Factor -> Factor
$cmax :: Factor -> Factor -> Factor
>= :: Factor -> Factor -> Bool
$c>= :: Factor -> Factor -> Bool
> :: Factor -> Factor -> Bool
$c> :: Factor -> Factor -> Bool
<= :: Factor -> Factor -> Bool
$c<= :: Factor -> Factor -> Bool
< :: Factor -> Factor -> Bool
$c< :: Factor -> Factor -> Bool
compare :: Factor -> Factor -> Ordering
$ccompare :: Factor -> Factor -> Ordering
Ord, Int -> Factor -> ShowS
[Factor] -> ShowS
Factor -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Factor] -> ShowS
$cshowList :: [Factor] -> ShowS
show :: Factor -> String
$cshow :: Factor -> String
showsPrec :: Int -> Factor -> ShowS
$cshowsPrec :: Int -> Factor -> ShowS
Show)

instance ToJSON Factor

-- TODO a candidate for some optimization.

-- | Constructor for 'Factor'. 'as_character' is called on the first argument.
-- User provided 'levels' in the second argument are sorted and made unique
-- with 'VA.sortUniq'.
--
-- Important: As in R, elements of the input vector that do not appear among
-- the 'levels' (after conversion with 'as_character') are marked as invalid,
-- here represented by 'Nothing'. That check has an \(O(nm)\) runtime cost,
-- where n is the length of the input and m the length of the levels.
--
-- To avoid that cost, users can use 'MkConstructor' directly but must
-- themselves ensure all 'values' appear in the 'levels'.
factor :: (AsCharacter a) => a -> V.Vector Text -> Factor
factor :: forall a. AsCharacter a => a -> Vector Text -> Factor
factor a
rv Vector Text
lvls = RTypeRep 'STRSXP -> Vector Text -> Factor
MkFactor Vector (Maybe Text)
rv' Vector Text
lvls'
  where
    lvls' :: Vector Text
lvls' = forall a. Ord a => Vector a -> Vector a
sortUniq Vector Text
lvls
    rv' :: Vector (Maybe Text)
rv' = forall a b. (a -> b) -> Vector a -> Vector b
V.map Maybe Text -> Maybe Text
op forall a b. (a -> b) -> a -> b
$ forall a. AsCharacter a => a -> RTypeRep 'STRSXP
as_character a
rv
    op :: Maybe Text -> Maybe Text
op Maybe Text
Nothing = forall a. Maybe a
Nothing
    op (Just Text
x) = if forall a. Eq a => a -> Vector a -> Bool
V.elem Text
x Vector Text
lvls then forall a. a -> Maybe a
Just Text
x else forall a. Maybe a
Nothing

-- TODO: would like to support 'ordered', but the previous implementation was misguided.
-- the only way this makes sense is in a case where the user wishes to build levels automatically
-- from sortUniq applied to the values. however, that would conflict with the current factor api,
-- which requires the user to supply the levels explicitly --- which was a specific choice discussed
-- in the MR review for this work as part of v0.30.0. i have removed the 'ordered' api rather than provide
-- something ill-composed that will almost immediately be cruft. --bbrown