{-# LANGUAGE BangPatterns, FlexibleContexts #-}

module Data.Clustering.Hierarchical.Internal.DistanceMatrix
    (singleLinkage
    ,completeLinkage
    ,upgma
    ,fakeAverageLinkage
    ) where

-- from base
import Control.Monad (forM_)
import Control.Monad.ST (ST, runST)
import Data.Array (listArray, (!))
import Data.Array.ST (STArray, STUArray, newArray_, newListArray, readArray, writeArray)
import Data.Function (on)
import Data.List (delete, tails, (\\))
import Data.STRef (STRef, newSTRef, readSTRef, writeSTRef)

-- from containers
import qualified Data.IntMap as IM

-- from this package
import Data.Clustering.Hierarchical.Internal.Types

mkErr :: String -> a
mkErr :: String -> a
mkErr = String -> a
forall a. HasCallStack => String -> a
error (String -> a) -> (String -> String) -> String -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ("Data.Clustering.Hierarchical.Internal.DistanceMatrix." String -> String -> String
forall a. [a] -> [a] -> [a]
++)

-- | Internal (to this package) type used to represent a cluster
-- (of possibly just one element).  The @key@ should be less than
-- or equal to all elements of the cluster.
data Cluster = Cluster { Cluster -> Item
key  :: {-# UNPACK #-} !Item  -- ^ Element used as key.
                       , Cluster -> Item
size :: {-# UNPACK #-} !Int   -- ^ At least one, the @key@.
                       }
               deriving (Cluster -> Cluster -> Bool
(Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Bool) -> Eq Cluster
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Cluster -> Cluster -> Bool
$c/= :: Cluster -> Cluster -> Bool
== :: Cluster -> Cluster -> Bool
$c== :: Cluster -> Cluster -> Bool
Eq, Eq Cluster
Eq Cluster =>
(Cluster -> Cluster -> Ordering)
-> (Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Cluster)
-> (Cluster -> Cluster -> Cluster)
-> Ord Cluster
Cluster -> Cluster -> Bool
Cluster -> Cluster -> Ordering
Cluster -> Cluster -> Cluster
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Cluster -> Cluster -> Cluster
$cmin :: Cluster -> Cluster -> Cluster
max :: Cluster -> Cluster -> Cluster
$cmax :: Cluster -> Cluster -> Cluster
>= :: Cluster -> Cluster -> Bool
$c>= :: Cluster -> Cluster -> Bool
> :: Cluster -> Cluster -> Bool
$c> :: Cluster -> Cluster -> Bool
<= :: Cluster -> Cluster -> Bool
$c<= :: Cluster -> Cluster -> Bool
< :: Cluster -> Cluster -> Bool
$c< :: Cluster -> Cluster -> Bool
compare :: Cluster -> Cluster -> Ordering
$ccompare :: Cluster -> Cluster -> Ordering
$cp1Ord :: Eq Cluster
Ord, Item -> Cluster -> String -> String
[Cluster] -> String -> String
Cluster -> String
(Item -> Cluster -> String -> String)
-> (Cluster -> String)
-> ([Cluster] -> String -> String)
-> Show Cluster
forall a.
(Item -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [Cluster] -> String -> String
$cshowList :: [Cluster] -> String -> String
show :: Cluster -> String
$cshow :: Cluster -> String
showsPrec :: Item -> Cluster -> String -> String
$cshowsPrec :: Item -> Cluster -> String -> String
Show)

-- | An element of a cluster.
type Item = IM.Key

-- | Creates a singleton cluster.
singleton :: Item -> Cluster
singleton :: Item -> Cluster
singleton k :: Item
k = $WCluster :: Item -> Item -> Cluster
Cluster {key :: Item
key = Item
k, size :: Item
size = 1}

-- | /O(1)/. Joins two clusters, returns the 'key' that didn't
-- become 'key' of the new cluster as well.  Clusters are not
-- monoid because we don't have 'mempty'.
merge :: Cluster -> Cluster -> (Cluster, Item)
merge :: Cluster -> Cluster -> (Cluster, Item)
merge c1 :: Cluster
c1 c2 :: Cluster
c2 = let (kl :: Item
kl,km :: Item
km) = if Cluster -> Item
key Cluster
c1 Item -> Item -> Bool
forall a. Ord a => a -> a -> Bool
< Cluster -> Item
key Cluster
c2
                            then (Cluster -> Item
key Cluster
c1, Cluster -> Item
key Cluster
c2)
                            else (Cluster -> Item
key Cluster
c2, Cluster -> Item
key Cluster
c1)
              in ($WCluster :: Item -> Item -> Cluster
Cluster {key :: Item
key  = Item
kl
                          ,size :: Item
size = Cluster -> Item
size Cluster
c1 Item -> Item -> Item
forall a. Num a => a -> a -> a
+ Cluster -> Item
size Cluster
c2}
                 ,Item
km)




-- | A distance matrix.
data DistMatrix s =
    DM { DistMatrix s -> STUArray s (Item, Item) Distance
matrix   :: {-# UNPACK #-} !(STUArray s (Item, Item) Distance)
       , DistMatrix s -> STRef s [Item]
active   :: {-# UNPACK #-} !(STRef    s [Item])
       , DistMatrix s -> STArray s Item Cluster
clusters :: {-# UNPACK #-} !(STArray  s Item Cluster)
       }


-- | /O(n^2)/. Creates a list of possible combinations between
-- the given elements.
combinations :: [a] -> [(a,a)]
combinations :: [a] -> [(a, a)]
combinations xs :: [a]
xs = [(a
a,a
b) | (a :: a
a:as :: [a]
as) <- [a] -> [[a]]
forall a. [a] -> [[a]]
tails [a]
xs, a
b <- [a]
as]


-- | /O(n^2)/. Constructs a new distance matrix from a distance
-- function and a number @n@ of elements.  Elements will be drawn
-- from @[1..n]@
fromDistance :: (Item -> Item -> Distance) -> Item -> ST s (DistMatrix s)
fromDistance :: (Item -> Item -> Distance) -> Item -> ST s (DistMatrix s)
fromDistance _ n :: Item
n | Item
n Item -> Item -> Bool
forall a. Ord a => a -> a -> Bool
< 2 = String -> ST s (DistMatrix s)
forall a. String -> a
mkErr "fromDistance: n < 2 is meaningless"
fromDistance dist :: Item -> Item -> Distance
dist n :: Item
n = do
  STUArray s (Item, Item) Distance
matrix_ <- ((Item, Item), (Item, Item))
-> ST s (STUArray s (Item, Item) Distance)
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
(i, i) -> m (a i e)
newArray_ ((1,2), (Item
nItem -> Item -> Item
forall a. Num a => a -> a -> a
-1,Item
n))
  STRef s [Item]
active_ <- [Item] -> ST s (STRef s [Item])
forall a s. a -> ST s (STRef s a)
newSTRef [1..Item
n]
  [(Item, Item)] -> ((Item, Item) -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Item] -> [(Item, Item)]
forall a. [a] -> [(a, a)]
combinations [1..Item
n]) (((Item, Item) -> ST s ()) -> ST s ())
-> ((Item, Item) -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \x :: (Item, Item)
x -> STUArray s (Item, Item) Distance
-> (Item, Item) -> Distance -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> e -> m ()
writeArray STUArray s (Item, Item) Distance
matrix_ (Item, Item)
x ((Item -> Item -> Distance) -> (Item, Item) -> Distance
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Item -> Item -> Distance
dist (Item, Item)
x)
  STArray s Item Cluster
clusters_ <- (Item, Item) -> [Cluster] -> ST s (STArray s Item Cluster)
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
(i, i) -> [e] -> m (a i e)
newListArray (1,Item
n) ((Item -> Cluster) -> [Item] -> [Cluster]
forall a b. (a -> b) -> [a] -> [b]
map Item -> Cluster
singleton [1..Item
n])
  DistMatrix s -> ST s (DistMatrix s)
forall (m :: * -> *) a. Monad m => a -> m a
return (DistMatrix s -> ST s (DistMatrix s))
-> DistMatrix s -> ST s (DistMatrix s)
forall a b. (a -> b) -> a -> b
$ $WDM :: forall s.
STUArray s (Item, Item) Distance
-> STRef s [Item] -> STArray s Item Cluster -> DistMatrix s
DM {matrix :: STUArray s (Item, Item) Distance
matrix   = STUArray s (Item, Item) Distance
matrix_
              ,active :: STRef s [Item]
active   = STRef s [Item]
active_
              ,clusters :: STArray s Item Cluster
clusters = STArray s Item Cluster
clusters_}


-- | /O(n^2)/. Returns the minimum distance of the distance
-- matrix.  The first key given is less than the second key.
findMin :: DistMatrix s -> ST s ((Cluster, Cluster), Distance)
findMin :: DistMatrix s -> ST s ((Cluster, Cluster), Distance)
findMin dm :: DistMatrix s
dm = STRef s [Item] -> ST s [Item]
forall s a. STRef s a -> ST s a
readSTRef (DistMatrix s -> STRef s [Item]
forall s. DistMatrix s -> STRef s [Item]
active DistMatrix s
dm) ST s [Item]
-> ([Item] -> ST s ((Cluster, Cluster), Distance))
-> ST s ((Cluster, Cluster), Distance)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Item] -> ST s ((Cluster, Cluster), Distance)
forall (m :: * -> *).
(MArray (STUArray s) Distance m, MArray (STArray s) Cluster m) =>
[Item] -> m ((Cluster, Cluster), Distance)
go1
    where
      matrix_ :: STUArray s (Item, Item) Distance
matrix_ = DistMatrix s -> STUArray s (Item, Item) Distance
forall s. DistMatrix s -> STUArray s (Item, Item) Distance
matrix DistMatrix s
dm
      choose :: (a, b) -> a -> b -> (a, b)
choose b :: (a, b)
b i :: a
i m' :: b
m' = if b
m' b -> b -> Bool
forall a. Ord a => a -> a -> Bool
< (a, b) -> b
forall a b. (a, b) -> b
snd (a, b)
b then (a
i, b
m') else (a, b)
b

      go1 :: [Item] -> m ((Cluster, Cluster), Distance)
go1 is :: [Item]
is@(i1 :: Item
i1:i2 :: Item
i2:_) = do Distance
di <- STUArray s (Item, Item) Distance -> (Item, Item) -> m Distance
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray STUArray s (Item, Item) Distance
matrix_ (Item
i1, Item
i2) -- initial
                            ((b1 :: Item
b1, b2 :: Item
b2), d :: Distance
d) <- [Item] -> ((Item, Item), Distance) -> m ((Item, Item), Distance)
forall (m :: * -> *).
MArray (STUArray s) Distance m =>
[Item] -> ((Item, Item), Distance) -> m ((Item, Item), Distance)
go2 [Item]
is ((Item
i1, Item
i2), Distance
di)
                            Cluster
c1 <- STArray s Item Cluster -> Item -> m Cluster
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray (DistMatrix s -> STArray s Item Cluster
forall s. DistMatrix s -> STArray s Item Cluster
clusters DistMatrix s
dm) Item
b1
                            Cluster
c2 <- STArray s Item Cluster -> Item -> m Cluster
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray (DistMatrix s -> STArray s Item Cluster
forall s. DistMatrix s -> STArray s Item Cluster
clusters DistMatrix s
dm) Item
b2
                            ((Cluster, Cluster), Distance) -> m ((Cluster, Cluster), Distance)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Cluster
c1, Cluster
c2), Distance
d)
      go1 _            = String -> m ((Cluster, Cluster), Distance)
forall a. String -> a
mkErr "findMin: empty DistMatrix"

      go2 :: [Item] -> ((Item, Item), Distance) -> m ((Item, Item), Distance)
go2 (i1 :: Item
i1:is :: [Item]
is@(_:_)) !((Item, Item), Distance)
b = Item
-> [Item] -> ((Item, Item), Distance) -> m ((Item, Item), Distance)
forall (m :: * -> *).
MArray (STUArray s) Distance m =>
Item
-> [Item] -> ((Item, Item), Distance) -> m ((Item, Item), Distance)
go3 Item
i1 [Item]
is ((Item, Item), Distance)
b m ((Item, Item), Distance)
-> (((Item, Item), Distance) -> m ((Item, Item), Distance))
-> m ((Item, Item), Distance)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Item] -> ((Item, Item), Distance) -> m ((Item, Item), Distance)
go2 [Item]
is
      go2 _              b :: ((Item, Item), Distance)
b = ((Item, Item), Distance) -> m ((Item, Item), Distance)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Item, Item), Distance)
b

      go3 :: Item
-> [Item] -> ((Item, Item), Distance) -> m ((Item, Item), Distance)
go3 i1 :: Item
i1 (i2 :: Item
i2:is :: [Item]
is) !((Item, Item), Distance)
b = STUArray s (Item, Item) Distance -> (Item, Item) -> m Distance
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray STUArray s (Item, Item) Distance
matrix_ (Item
i1,Item
i2) m Distance
-> (Distance -> m ((Item, Item), Distance))
-> m ((Item, Item), Distance)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Item
-> [Item] -> ((Item, Item), Distance) -> m ((Item, Item), Distance)
go3 Item
i1 [Item]
is (((Item, Item), Distance) -> m ((Item, Item), Distance))
-> (Distance -> ((Item, Item), Distance))
-> Distance
-> m ((Item, Item), Distance)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Item, Item), Distance)
-> (Item, Item) -> Distance -> ((Item, Item), Distance)
forall b a. Ord b => (a, b) -> a -> b -> (a, b)
choose ((Item, Item), Distance)
b (Item
i1,Item
i2)
      go3 _  []       b :: ((Item, Item), Distance)
b = ((Item, Item), Distance) -> m ((Item, Item), Distance)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Item, Item), Distance)
b



-- | Type for functions that calculate distances between
-- clusters.
type ClusterDistance =
       (Cluster, Distance) -- ^ Cluster B1 and distance from A to B1
    -> (Cluster, Distance) -- ^ Cluster B2 and distance from A to B2
    -> Distance            -- ^ Distance from A to (B1 U B2).


-- Some cluster distances
cdistSingleLinkage      :: ClusterDistance
cdistSingleLinkage :: ClusterDistance
cdistSingleLinkage      = \(_, d1 :: Distance
d1) (_, d2 :: Distance
d2) -> Distance
d1 Distance -> Distance -> Distance
forall a. Ord a => a -> a -> a
`min` Distance
d2

cdistCompleteLinkage    :: ClusterDistance
cdistCompleteLinkage :: ClusterDistance
cdistCompleteLinkage    = \(_, d1 :: Distance
d1) (_, d2 :: Distance
d2) -> Distance
d1 Distance -> Distance -> Distance
forall a. Ord a => a -> a -> a
`max` Distance
d2

cdistUPGMA              :: ClusterDistance
cdistUPGMA :: ClusterDistance
cdistUPGMA              = \(b1 :: Cluster
b1,d1 :: Distance
d1) (b2 :: Cluster
b2,d2 :: Distance
d2) ->
                            let n1 :: Distance
n1 = Item -> Distance
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Cluster -> Item
size Cluster
b1)
                                n2 :: Distance
n2 = Item -> Distance
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Cluster -> Item
size Cluster
b2)
                            in (Distance
n1 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
* Distance
d1 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
+ Distance
n2 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
* Distance
d2) Distance -> Distance -> Distance
forall a. Fractional a => a -> a -> a
/ (Distance
n1 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
+ Distance
n2)

cdistFakeAverageLinkage :: ClusterDistance
cdistFakeAverageLinkage :: ClusterDistance
cdistFakeAverageLinkage = \(_, d1 :: Distance
d1) (_, d2 :: Distance
d2) -> (Distance
d1 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
+ Distance
d2) Distance -> Distance -> Distance
forall a. Fractional a => a -> a -> a
/ 2



-- | /O(n)/. Merges two clusters, returning the new cluster and
-- the new distance matrix.
mergeClusters :: ClusterDistance
              -> DistMatrix s
              -> (Cluster, Cluster)
              -> ST s Cluster
mergeClusters :: ClusterDistance
-> DistMatrix s -> (Cluster, Cluster) -> ST s Cluster
mergeClusters cdist :: ClusterDistance
cdist (DM matrix_ :: STUArray s (Item, Item) Distance
matrix_ active_ :: STRef s [Item]
active_ clusters_ :: STArray s Item Cluster
clusters_) (b1 :: Cluster
b1, b2 :: Cluster
b2) = do
  let (bu :: Cluster
bu, kl :: Item
kl) = Cluster
b1 Cluster -> Cluster -> (Cluster, Item)
`merge` Cluster
b2
      b1k :: Item
b1k = Cluster -> Item
key Cluster
b1
      b2k :: Item
b2k = Cluster -> Item
key Cluster
b2
      km :: Item
km  = Cluster -> Item
key Cluster
bu
      ix :: b -> b -> (b, b)
ix i :: b
i j :: b
j | b
i b -> b -> Bool
forall a. Ord a => a -> a -> Bool
< b
j     = (b
i,b
j)
             | Bool
otherwise = (b
j,b
i)

  -- Calculate new distances
  [Item]
activeV <- STRef s [Item] -> ST s [Item]
forall s a. STRef s a -> ST s a
readSTRef STRef s [Item]
active_
  [Item] -> (Item -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Item]
activeV [Item] -> [Item] -> [Item]
forall a. Eq a => [a] -> [a] -> [a]
\\ [Item
b1k, Item
b2k]) ((Item -> ST s ()) -> ST s ()) -> (Item -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \k :: Item
k -> do
      -- a   <- readArray clusters_ k
      Distance
d_a_b1 <- STUArray s (Item, Item) Distance -> (Item, Item) -> ST s Distance
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray STUArray s (Item, Item) Distance
matrix_ ((Item, Item) -> ST s Distance) -> (Item, Item) -> ST s Distance
forall a b. (a -> b) -> a -> b
$ Item -> Item -> (Item, Item)
forall b. Ord b => b -> b -> (b, b)
ix Item
k Item
b1k
      Distance
d_a_b2 <- STUArray s (Item, Item) Distance -> (Item, Item) -> ST s Distance
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray STUArray s (Item, Item) Distance
matrix_ ((Item, Item) -> ST s Distance) -> (Item, Item) -> ST s Distance
forall a b. (a -> b) -> a -> b
$ Item -> Item -> (Item, Item)
forall b. Ord b => b -> b -> (b, b)
ix Item
k Item
b2k
      let d :: Distance
d = ClusterDistance
cdist (Cluster
b1, Distance
d_a_b1) (Cluster
b2, Distance
d_a_b2)
      STUArray s (Item, Item) Distance
-> (Item, Item) -> Distance -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> e -> m ()
writeArray STUArray s (Item, Item) Distance
matrix_ (Item -> Item -> (Item, Item)
forall b. Ord b => b -> b -> (b, b)
ix Item
k Item
km) (Distance -> ST s ()) -> Distance -> ST s ()
forall a b. (a -> b) -> a -> b
$! Distance
d

  -- Save new cluster, invalidate old one
  STArray s Item Cluster -> Item -> Cluster -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> e -> m ()
writeArray STArray s Item Cluster
clusters_ Item
km Cluster
bu
  STArray s Item Cluster -> Item -> Cluster -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> e -> m ()
writeArray STArray s Item Cluster
clusters_ Item
kl (Cluster -> ST s ()) -> Cluster -> ST s ()
forall a b. (a -> b) -> a -> b
$ String -> Cluster
forall a. String -> a
mkErr "mergeClusters: invalidated"
  STRef s [Item] -> [Item] -> ST s ()
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s [Item]
active_ ([Item] -> ST s ()) -> [Item] -> ST s ()
forall a b. (a -> b) -> a -> b
$ Item -> [Item] -> [Item]
forall a. Eq a => a -> [a] -> [a]
delete Item
kl [Item]
activeV

  -- Return new cluster.
  Cluster -> ST s Cluster
forall (m :: * -> *) a. Monad m => a -> m a
return Cluster
bu


-- | Worker function to create dendrograms based on a
-- 'ClusterDistance'.
dendrogram' :: ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' :: ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' _ []  _ = String -> Dendrogram a
forall a. String -> a
mkErr "dendrogram': empty input list"
dendrogram' _ [x :: a
x] _ = a -> Dendrogram a
forall a. a -> Dendrogram a
Leaf a
x
dendrogram' cdist :: ClusterDistance
cdist items :: [a]
items dist :: a -> a -> Distance
dist = (forall s. ST s (Dendrogram a)) -> Dendrogram a
forall a. (forall s. ST s a) -> a
runST (() -> ST s (Dendrogram a)
forall p s. p -> ST s (Dendrogram a)
act ())
    where
      n :: Item
n = [a] -> Item
forall (t :: * -> *) a. Foldable t => t a -> Item
length [a]
items
      act :: p -> ST s (Dendrogram a)
act _noMonomorphismRestrictionPlease :: p
_noMonomorphismRestrictionPlease = do
        let xs :: Array Item a
xs = (Item, Item) -> [a] -> Array Item a
forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (1, Item
n) [a]
items
            im :: IntMap (Dendrogram a)
im = [(Item, Dendrogram a)] -> IntMap (Dendrogram a)
forall a. [(Item, a)] -> IntMap a
IM.fromDistinctAscList ([(Item, Dendrogram a)] -> IntMap (Dendrogram a))
-> [(Item, Dendrogram a)] -> IntMap (Dendrogram a)
forall a b. (a -> b) -> a -> b
$ [Item] -> [Dendrogram a] -> [(Item, Dendrogram a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [1..] ([Dendrogram a] -> [(Item, Dendrogram a)])
-> [Dendrogram a] -> [(Item, Dendrogram a)]
forall a b. (a -> b) -> a -> b
$ (a -> Dendrogram a) -> [a] -> [Dendrogram a]
forall a b. (a -> b) -> [a] -> [b]
map a -> Dendrogram a
forall a. a -> Dendrogram a
Leaf [a]
items
        (Item -> Item -> Distance) -> Item -> ST s (DistMatrix s)
forall s. (Item -> Item -> Distance) -> Item -> ST s (DistMatrix s)
fromDistance (a -> a -> Distance
dist (a -> a -> Distance) -> (Item -> a) -> Item -> Item -> Distance
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Array Item a
xs Array Item a -> Item -> a
forall i e. Ix i => Array i e -> i -> e
!)) Item
n ST s (DistMatrix s)
-> (DistMatrix s -> ST s (Dendrogram a)) -> ST s (Dendrogram a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Item
-> IntMap (Dendrogram a) -> DistMatrix s -> ST s (Dendrogram a)
forall t a s.
(Eq t, Num t) =>
t -> IntMap (Dendrogram a) -> DistMatrix s -> ST s (Dendrogram a)
go (Item
nItem -> Item -> Item
forall a. Num a => a -> a -> a
-1) IntMap (Dendrogram a)
im
      go :: t -> IntMap (Dendrogram a) -> DistMatrix s -> ST s (Dendrogram a)
go !t
i !IntMap (Dendrogram a)
ds !DistMatrix s
dm = do
        ((c1 :: Cluster
c1,c2 :: Cluster
c2), distance :: Distance
distance) <- DistMatrix s -> ST s ((Cluster, Cluster), Distance)
forall s. DistMatrix s -> ST s ((Cluster, Cluster), Distance)
findMin DistMatrix s
dm
        Cluster
cu <- ClusterDistance
-> DistMatrix s -> (Cluster, Cluster) -> ST s Cluster
forall s.
ClusterDistance
-> DistMatrix s -> (Cluster, Cluster) -> ST s Cluster
mergeClusters ClusterDistance
cdist DistMatrix s
dm (Cluster
c1,Cluster
c2)
        let dendro :: Cluster -> IntMap a -> (Maybe a, IntMap a)
dendro c :: Cluster
c = (Item -> a -> Maybe a) -> Item -> IntMap a -> (Maybe a, IntMap a)
forall a.
(Item -> a -> Maybe a) -> Item -> IntMap a -> (Maybe a, IntMap a)
IM.updateLookupWithKey (\_ _ -> Maybe a
forall a. Maybe a
Nothing) (Cluster -> Item
key Cluster
c)
            (Just d1 :: Dendrogram a
d1, !IntMap (Dendrogram a)
ds')  = Cluster
-> IntMap (Dendrogram a)
-> (Maybe (Dendrogram a), IntMap (Dendrogram a))
forall a. Cluster -> IntMap a -> (Maybe a, IntMap a)
dendro Cluster
c1 IntMap (Dendrogram a)
ds
            (Just d2 :: Dendrogram a
d2, !IntMap (Dendrogram a)
ds'') = Cluster
-> IntMap (Dendrogram a)
-> (Maybe (Dendrogram a), IntMap (Dendrogram a))
forall a. Cluster -> IntMap a -> (Maybe a, IntMap a)
dendro Cluster
c2 IntMap (Dendrogram a)
ds'
            du :: Dendrogram a
du = Distance -> Dendrogram a -> Dendrogram a -> Dendrogram a
forall a. Distance -> Dendrogram a -> Dendrogram a -> Dendrogram a
Branch Distance
distance Dendrogram a
d1 Dendrogram a
d2
        case t
i of
          1 -> Dendrogram a -> ST s (Dendrogram a)
forall (m :: * -> *) a. Monad m => a -> m a
return Dendrogram a
du
          _ -> let !ds''' :: IntMap (Dendrogram a)
ds''' = Item
-> Dendrogram a -> IntMap (Dendrogram a) -> IntMap (Dendrogram a)
forall a. Item -> a -> IntMap a -> IntMap a
IM.insert (Cluster -> Item
key Cluster
cu) Dendrogram a
du IntMap (Dendrogram a)
ds''
               in Dendrogram a
du Dendrogram a -> ST s (Dendrogram a) -> ST s (Dendrogram a)
forall a b. a -> b -> b
`seq` t -> IntMap (Dendrogram a) -> DistMatrix s -> ST s (Dendrogram a)
go (t
it -> t -> t
forall a. Num a => a -> a -> a
-1) IntMap (Dendrogram a)
ds''' DistMatrix s
dm


-- | /O(n^3)/ time and /O(n^2)/ space. Calculates a complete,
-- rooted dendrogram for a list of items using single linkage
-- with the naïve algorithm using a distance matrix.
singleLinkage :: [a] -> (a -> a -> Distance) -> Dendrogram a
singleLinkage :: [a] -> (a -> a -> Distance) -> Dendrogram a
singleLinkage = ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
forall a.
ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' ClusterDistance
cdistSingleLinkage


-- | /O(n^3)/ time and /O(n^2)/ space. Calculates a complete,
-- rooted dendrogram for a list of items using complete linkage
-- with the naïve algorithm using a distance matrix.
completeLinkage :: [a] -> (a -> a -> Distance) -> Dendrogram a
completeLinkage :: [a] -> (a -> a -> Distance) -> Dendrogram a
completeLinkage = ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
forall a.
ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' ClusterDistance
cdistCompleteLinkage


-- | /O(n^3)/ time and /O(n^2)/ space. Calculates a complete,
-- rooted dendrogram for a list of items using UPGMA with the
-- naïve algorithm using a distance matrix.
upgma :: [a] -> (a -> a -> Distance) -> Dendrogram a
upgma :: [a] -> (a -> a -> Distance) -> Dendrogram a
upgma = ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
forall a.
ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' ClusterDistance
cdistUPGMA


-- | /O(n^3)/ time and /O(n^2)/ space. Calculates a complete,
-- rooted dendrogram for a list of items using fake average
-- linkage with the naïve algorithm using a distance matrix.
fakeAverageLinkage :: [a]
                   -> (a -> a -> Distance) -> Dendrogram a
fakeAverageLinkage :: [a] -> (a -> a -> Distance) -> Dendrogram a
fakeAverageLinkage = ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
forall a.
ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' ClusterDistance
cdistFakeAverageLinkage