module Data.Clustering.Hierarchical.Internal.Types
    ( Dendrogram(..)
    , Linkage(..)
    , Distance
    ) where

-- from base
import Control.Applicative ((<$>), (<*>))
import Data.Foldable (Foldable (..))
import Data.Monoid (mappend)
import Data.Traversable (Traversable(..))

-- | Data structure for storing hierarchical clusters.  The
-- distance between clusters is stored on the branches.
-- Distances between leafs are the distances between the elements
-- on those leafs, while distances between branches are defined
-- by the linkage used (see 'Linkage').
data Dendrogram a =
    Leaf a
    -- ^ The leaf contains the item @a@ itself.
  | Branch {-# UNPACK #-} !Distance (Dendrogram a) (Dendrogram a)
    -- ^ Each branch connects two clusters/dendrograms that are
    -- @d@ distance apart.
    deriving (Dendrogram a -> Dendrogram a -> Bool
(Dendrogram a -> Dendrogram a -> Bool)
-> (Dendrogram a -> Dendrogram a -> Bool) -> Eq (Dendrogram a)
forall a. Eq a => Dendrogram a -> Dendrogram a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Dendrogram a -> Dendrogram a -> Bool
$c/= :: forall a. Eq a => Dendrogram a -> Dendrogram a -> Bool
== :: Dendrogram a -> Dendrogram a -> Bool
$c== :: forall a. Eq a => Dendrogram a -> Dendrogram a -> Bool
Eq, Eq (Dendrogram a)
Eq (Dendrogram a) =>
(Dendrogram a -> Dendrogram a -> Ordering)
-> (Dendrogram a -> Dendrogram a -> Bool)
-> (Dendrogram a -> Dendrogram a -> Bool)
-> (Dendrogram a -> Dendrogram a -> Bool)
-> (Dendrogram a -> Dendrogram a -> Bool)
-> (Dendrogram a -> Dendrogram a -> Dendrogram a)
-> (Dendrogram a -> Dendrogram a -> Dendrogram a)
-> Ord (Dendrogram a)
Dendrogram a -> Dendrogram a -> Bool
Dendrogram a -> Dendrogram a -> Ordering
Dendrogram a -> Dendrogram a -> Dendrogram a
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall a. Ord a => Eq (Dendrogram a)
forall a. Ord a => Dendrogram a -> Dendrogram a -> Bool
forall a. Ord a => Dendrogram a -> Dendrogram a -> Ordering
forall a. Ord a => Dendrogram a -> Dendrogram a -> Dendrogram a
min :: Dendrogram a -> Dendrogram a -> Dendrogram a
$cmin :: forall a. Ord a => Dendrogram a -> Dendrogram a -> Dendrogram a
max :: Dendrogram a -> Dendrogram a -> Dendrogram a
$cmax :: forall a. Ord a => Dendrogram a -> Dendrogram a -> Dendrogram a
>= :: Dendrogram a -> Dendrogram a -> Bool
$c>= :: forall a. Ord a => Dendrogram a -> Dendrogram a -> Bool
> :: Dendrogram a -> Dendrogram a -> Bool
$c> :: forall a. Ord a => Dendrogram a -> Dendrogram a -> Bool
<= :: Dendrogram a -> Dendrogram a -> Bool
$c<= :: forall a. Ord a => Dendrogram a -> Dendrogram a -> Bool
< :: Dendrogram a -> Dendrogram a -> Bool
$c< :: forall a. Ord a => Dendrogram a -> Dendrogram a -> Bool
compare :: Dendrogram a -> Dendrogram a -> Ordering
$ccompare :: forall a. Ord a => Dendrogram a -> Dendrogram a -> Ordering
$cp1Ord :: forall a. Ord a => Eq (Dendrogram a)
Ord, Int -> Dendrogram a -> ShowS
[Dendrogram a] -> ShowS
Dendrogram a -> String
(Int -> Dendrogram a -> ShowS)
-> (Dendrogram a -> String)
-> ([Dendrogram a] -> ShowS)
-> Show (Dendrogram a)
forall a. Show a => Int -> Dendrogram a -> ShowS
forall a. Show a => [Dendrogram a] -> ShowS
forall a. Show a => Dendrogram a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Dendrogram a] -> ShowS
$cshowList :: forall a. Show a => [Dendrogram a] -> ShowS
show :: Dendrogram a -> String
$cshow :: forall a. Show a => Dendrogram a -> String
showsPrec :: Int -> Dendrogram a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Dendrogram a -> ShowS
Show)

-- | A distance is simply a synonym of 'Double' for efficiency.
type Distance = Double

-- | Does not recalculate the distances!
instance Functor Dendrogram where
    fmap :: (a -> b) -> Dendrogram a -> Dendrogram b
fmap f :: a -> b
f (Leaf d :: a
d)         = b -> Dendrogram b
forall a. a -> Dendrogram a
Leaf (a -> b
f a
d)
    fmap f :: a -> b
f (Branch s :: Distance
s c1 :: Dendrogram a
c1 c2 :: Dendrogram a
c2) = Distance -> Dendrogram b -> Dendrogram b -> Dendrogram b
forall a. Distance -> Dendrogram a -> Dendrogram a -> Dendrogram a
Branch Distance
s ((a -> b) -> Dendrogram a -> Dendrogram b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f Dendrogram a
c1) ((a -> b) -> Dendrogram a -> Dendrogram b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f Dendrogram a
c2)

instance Foldable Dendrogram where
    foldMap :: (a -> m) -> Dendrogram a -> m
foldMap f :: a -> m
f (Leaf d :: a
d)         = a -> m
f a
d
    foldMap f :: a -> m
f (Branch _ c1 :: Dendrogram a
c1 c2 :: Dendrogram a
c2) = (a -> m) -> Dendrogram a -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap a -> m
f Dendrogram a
c1 m -> m -> m
forall a. Monoid a => a -> a -> a
`mappend` (a -> m) -> Dendrogram a -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap a -> m
f Dendrogram a
c2

instance Traversable Dendrogram where
    traverse :: (a -> f b) -> Dendrogram a -> f (Dendrogram b)
traverse f :: a -> f b
f (Leaf d :: a
d)         = b -> Dendrogram b
forall a. a -> Dendrogram a
Leaf (b -> Dendrogram b) -> f b -> f (Dendrogram b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> a -> f b
f a
d
    traverse f :: a -> f b
f (Branch s :: Distance
s c1 :: Dendrogram a
c1 c2 :: Dendrogram a
c2) = Distance -> Dendrogram b -> Dendrogram b -> Dendrogram b
forall a. Distance -> Dendrogram a -> Dendrogram a -> Dendrogram a
Branch Distance
s (Dendrogram b -> Dendrogram b -> Dendrogram b)
-> f (Dendrogram b) -> f (Dendrogram b -> Dendrogram b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (a -> f b) -> Dendrogram a -> f (Dendrogram b)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse a -> f b
f Dendrogram a
c1 f (Dendrogram b -> Dendrogram b)
-> f (Dendrogram b) -> f (Dendrogram b)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (a -> f b) -> Dendrogram a -> f (Dendrogram b)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse a -> f b
f Dendrogram a
c2


-- | The linkage type determines how the distance between
-- clusters will be calculated.  These are the linkage types
-- currently available on this library.
data Linkage =
    SingleLinkage
  -- ^ The distance between two clusters @a@ and @b@ is the
  -- /minimum/ distance between an element of @a@ and an element
  -- of @b@.
  | CompleteLinkage
  -- ^ The distance between two clusters @a@ and @b@ is the
  -- /maximum/ distance between an element of @a@ and an element
  -- of @b@.
  | CLINK
  -- ^ The same as 'CompleteLinkage', but using the CLINK
  -- algorithm.  It's much faster however doesn't always give the
  -- best complete linkage dendrogram.
  | UPGMA
  -- ^ Unweighted Pair Group Method with Arithmetic mean, also
  -- called \"average linkage\".  The distance between two
  -- clusters @a@ and @b@ is the /arithmetic average/ between the
  -- distances of all elements in @a@ to all elements in @b@.
  | FakeAverageLinkage
  -- ^ This method is usually wrongly called \"average linkage\".
  -- The distance between cluster @a = a1 U a2@ (that is, cluster
  -- @a@ was formed by the linkage of clusters @a1@ and @a2@) and
  -- an old cluster @b@ is @(d(a1,b) + d(a2,b)) / 2@.  So when
  -- clustering two elements to create a cluster, this method is
  -- the same as UPGMA.  However, in general when joining two
  -- clusters this method assigns equal weights to @a1@ and @a2@,
  -- while UPGMA assigns weights proportional to the number of
  -- elements in each cluster.  See, for example:
  --
  -- *
  -- <http://www.cs.tau.ac.il/~rshamir/algmb/00/scribe00/html/lec08/node21.html>,
  -- which defines the real UPGMA and gives the equation to
  -- calculate the distance between an old and a new cluster.
  --
  -- *
  -- <http://github.com/JadeFerret/ai4r/blob/master/lib/ai4r/clusterers/average_linkage.rb>,
  -- code for \"average linkage\" on ai4r library implementing
  -- what we call here @FakeAverageLinkage@ and not UPGMA.
    deriving (Linkage -> Linkage -> Bool
(Linkage -> Linkage -> Bool)
-> (Linkage -> Linkage -> Bool) -> Eq Linkage
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Linkage -> Linkage -> Bool
$c/= :: Linkage -> Linkage -> Bool
== :: Linkage -> Linkage -> Bool
$c== :: Linkage -> Linkage -> Bool
Eq, Eq Linkage
Eq Linkage =>
(Linkage -> Linkage -> Ordering)
-> (Linkage -> Linkage -> Bool)
-> (Linkage -> Linkage -> Bool)
-> (Linkage -> Linkage -> Bool)
-> (Linkage -> Linkage -> Bool)
-> (Linkage -> Linkage -> Linkage)
-> (Linkage -> Linkage -> Linkage)
-> Ord Linkage
Linkage -> Linkage -> Bool
Linkage -> Linkage -> Ordering
Linkage -> Linkage -> Linkage
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Linkage -> Linkage -> Linkage
$cmin :: Linkage -> Linkage -> Linkage
max :: Linkage -> Linkage -> Linkage
$cmax :: Linkage -> Linkage -> Linkage
>= :: Linkage -> Linkage -> Bool
$c>= :: Linkage -> Linkage -> Bool
> :: Linkage -> Linkage -> Bool
$c> :: Linkage -> Linkage -> Bool
<= :: Linkage -> Linkage -> Bool
$c<= :: Linkage -> Linkage -> Bool
< :: Linkage -> Linkage -> Bool
$c< :: Linkage -> Linkage -> Bool
compare :: Linkage -> Linkage -> Ordering
$ccompare :: Linkage -> Linkage -> Ordering
$cp1Ord :: Eq Linkage
Ord, Int -> Linkage -> ShowS
[Linkage] -> ShowS
Linkage -> String
(Int -> Linkage -> ShowS)
-> (Linkage -> String) -> ([Linkage] -> ShowS) -> Show Linkage
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Linkage] -> ShowS
$cshowList :: [Linkage] -> ShowS
show :: Linkage -> String
$cshow :: Linkage -> String
showsPrec :: Int -> Linkage -> ShowS
$cshowsPrec :: Int -> Linkage -> ShowS
Show, Int -> Linkage
Linkage -> Int
Linkage -> [Linkage]
Linkage -> Linkage
Linkage -> Linkage -> [Linkage]
Linkage -> Linkage -> Linkage -> [Linkage]
(Linkage -> Linkage)
-> (Linkage -> Linkage)
-> (Int -> Linkage)
-> (Linkage -> Int)
-> (Linkage -> [Linkage])
-> (Linkage -> Linkage -> [Linkage])
-> (Linkage -> Linkage -> [Linkage])
-> (Linkage -> Linkage -> Linkage -> [Linkage])
-> Enum Linkage
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: Linkage -> Linkage -> Linkage -> [Linkage]
$cenumFromThenTo :: Linkage -> Linkage -> Linkage -> [Linkage]
enumFromTo :: Linkage -> Linkage -> [Linkage]
$cenumFromTo :: Linkage -> Linkage -> [Linkage]
enumFromThen :: Linkage -> Linkage -> [Linkage]
$cenumFromThen :: Linkage -> Linkage -> [Linkage]
enumFrom :: Linkage -> [Linkage]
$cenumFrom :: Linkage -> [Linkage]
fromEnum :: Linkage -> Int
$cfromEnum :: Linkage -> Int
toEnum :: Int -> Linkage
$ctoEnum :: Int -> Linkage
pred :: Linkage -> Linkage
$cpred :: Linkage -> Linkage
succ :: Linkage -> Linkage
$csucc :: Linkage -> Linkage
Enum)