-- This module solves, more or less, the maximal unique match (MUM)
-- problem for two input lists, using a generalised suffix tree.
--
-- Unfortunately, we can't check for left maximality because we're
-- using lists instead of indices into arrays.  It's easy to look one
-- element to the left in an array, but you can't look one element
-- left of the head of a list.

module UniqueMatch (Sym(..), mkGenTree, maxUniqueMatches) where

import Data.SuffixTree (STree(..), construct, prefix)

-- We construct a generalised suffix tree, with elements annotated to
-- tell us whether they come from the left or right list.  Each list
-- is terminated with a stop symbol.
data Sym a = L a
           | Lx
           | R a
           | Rx
             deriving (Show)

isLeft (L _:_) = True
isLeft (Lx:_) = True
isLeft _ = False

isRight (R _:_) = True
isRight (Rx:_) = True
isRight _ = False

fromSyms (L a:ss) = a : fromSyms ss
fromSyms (R a:ss) = a : fromSyms ss
fromSyms (Lx:_) = []
fromSyms (Rx:_) = []
fromSyms _ = []

instance (Eq a) => Eq (Sym a) where
    L a == L b = a == b
    R a == R b = a == b
    L a == R b = a == b
    R a == L b = a == b
    Lx == Lx = True
    Rx == Rx = True
    _ == _ = False

instance (Ord a) => Ord (Sym a) where
    L a <= L b = a <= b
    R a <= R b = a <= b
    L a <= R b = a <= b
    R a <= L b = a <= b
    L _ <= Lx = True
    L _ <= Rx = True
    R _ <= Lx = True
    R _ <= Rx = True
    Lx <= Lx = True
    Rx <= Rx = True
    Lx <= Rx = True
    _ <= _ = False

mkGenTree :: (Ord a) => [a] -> [a] -> STree (Sym a)
mkGenTree a b = construct (map L a ++ Lx : map R b ++ [Rx])

maxUniqueMatches :: (Ord a) => STree (Sym a) -> [[a]]
maxUniqueMatches t = map (fromSyms . concatMap prefix . reverse)
                     (recurse [] t)
    where recurse _ Leaf = []
          recurse path (Node es) = loop path es

          loop path ((p, t):es) = matches ++ loop path es
              where matches | rightMaximal t = [p:path]
                            | otherwise = recurse (p:path) t
          loop _ _ = []

          rightMaximal (Node [(pa,Leaf), (pb,Leaf)]) =
                (isLeft a && isRight b) || (isRight a && isLeft b)
              where a = prefix pa
                    b = prefix pb
          rightMaximal _ = False

