-- See: L. Allison. Types and classes of machine learning and data mining.
--      26th Australasian Computer Science Conference (ACSC) pp.207-215,
--      Adelaide, February 2003
--      L. Allison. Models for machine learning and data mining in
--      functional programming.      doi:10.1017/S0956796804005301
--      J. Functional Programming, 15(1), pp.15-32, Jan. 2005
-- Author: Lloyd ALLISON           lloyd at bruce cs monash edu au
--         http://www.csse.monash.edu.au/~lloyd/tildeFP/II/200309/
-- This program is free software; you can redistribute it and/or modify it
-- under the terms of the GNU General Public License (GPL) as published by
-- the Free Software Foundation; either version 2 of the License, or (at
-- your option) any later version.  This program is distributed in the hope
-- that it will be useful, but without any warranty, without even the implied
-- warranty of merchantability or fitness for a particular purpose.  See the
-- GNU General Public License for more details.  You should have received a
-- copy of the GNU General Public License with this program; if not, write to:
-- Free Software Foundation, Inc., Boston, MA 02111, USA.

module Models (module Models) where
import SM_Utilities
import SM_Classes


bivariate (m1, m2) =         -- (Model of d1, Model of d2) -> Model of (d1, d2)
  let nlp (d1, d2) = (nlPr m1 d1) + (nlPr m2 d2)  -- d1, d2 assumed independent
  in MnlPr ((msg1 m1) + (msg1 m2)) nlp
           (\() -> "(" ++ showsPrec 0 m1 ("," ++ show m2 ++ ")") )

estBivariate (est1, est2) dataSet =
  let (ds1, ds2) = unzip dataSet
  in bivariate (est1 ds1, est2 ds2)

estBivariateWeighted (est1, est2) dataSet weights =    -- for mixture modelling
  let (ds1, ds2) = unzip dataSet
  in bivariate (est1 ds1 weights, est2 ds2 weights)

-- ----------------------------------------------------------------------------

-- 2003, I moved     wallaceIntModel, -- Model of non-neg Int,  >= 0
-- to module Classes for "pragmatic" reasons.


quadraticIntModel =        -- a simple parameterless Model of non-neg Int, >= 0
  let nlPr n = if n < 0 then error "n < 0"
               else log (fromIntegral ((n+1)*(n+2)))  -- n >= 0
                    -- i.e.  pr n = 1/((n+1)*(n+2))
  in MnlPr 0 nlPr (\() -> "P(n)=1/((n+1)*(n+2), n>=0")
  -- NB. Sum[n=0..] 1/((n+1)*(n+2)) = Sum[n=0..]{ 1/(n+1) - 1/(n+2) }
  --     = 1 - 1/2 + 1/2 - 1/3 + 1/3 ... = 1,  i.e. it is a prob' dist'n.


fifty50 = uniform 0 1                        -- 50:50 on {0,1}


uniform lo hi =                              -- Model of Enum Bounded dataSpace
  let (lwb, upb) = if (fromEnum hi) >= (fromEnum lo)
                   then (lo, hi) else (hi, lo)  -- ?switch?
  in MPr 0 (\_ -> 1 / fromIntegral((fromEnum upb) - (fromEnum lwb) + 1))
         (\() -> "uniform[" ++ show lo ++ ".." ++ show hi ++ "]")


probs2model ps =                           --  :: [Probability] -> Model of Int
  MPr 0 (\n -> ps !! n ) (\() -> "mState"++show ps)  -- NB. MPr 0 ...


freqs2model fs =     -- :: frequencies, [Num] -> Model of Int (i.e. MultiState)
  -- Taking a short-cut here, see Wallace and Boulton (1968), and
  -- Wallace and Freeman (1987) for more information.
  let total = foldl (+) 0 fs
      -- noModel estimates cost of data using the "uninformative" method
      noModel  []    ans = ans
      noModel (f:fs) ans = noModel fs (ans - (stirling f))   -- NB base e
      -- MML:  part1 + part2 = noModel... + delta, so...
      part1 = (noModel fs (stirling (total+1))) + delta - part2
      part2 = - (foldl (+) 0 ((zipWith (*) fs (map log ps))))
            -- part2 = msg Len of data given the MML model
      delta = nParams * ((log(pi/6))+1) / 2 -- about 0.176 nits per parameter
      nParams = fromIntegral(length fs - 1)
      ps = normalise (map ((+) 0.5) fs)  -- probabilities, NB +0.5  MML est'r
      p n = ps !! n                      -- the prob' function
  in MPr (if part1 > 0 then part1 else 0) p (\()->"mState"++show ps)


       -- list of frequencies from a series of (weights and) Enum, Bounded type
countWeighted dataSeries weights =
  let c  []     _     counters = counters
      c (d:ds) (w:ws) counters = c ds ws (inc counters (fromEnum d) w)
      inc  []    n _ = error ("count, in inc, ran off end of list "++(show n))
      inc (c:cs) 0 w = (c+w) : cs            -- counter, c += w
      inc (c:cs) n w = c : (inc cs (n-1) w)  -- w is a weight
      zeroes 0 = []
      zeroes n = 0 : (zeroes (n-1))  -- e.g. zeroes 2 = [0, 0]
      upb = maxBound `asTypeOf` (dataSeries !! 0)
      lwb = minBound `asTypeOf` (dataSeries !! 0)
      typeSize = (fromEnum upb) - (fromEnum lwb) + 1
  in c dataSeries weights (zeroes typeSize)

count dataSeries =   -- list of frequencies from a series of Enum, Bounded type
  countWeighted dataSeries (repeat 1)  -- totalAssignment


                      -- (Bounded t, Enum t) => t -> Model of Int -> Model of t
modelInt2model egValue intModel =       -- example value gives the Model's type
  let fromE x = fromEnum( x `asTypeOf` egValue )
      toInt x = (fromE x) - (fromE minBound)
      p datum = pr intModel (toInt datum)
  in MPr (msg1 intModel) p (\()->show intModel)


estMultiStateWeighted dataSet weights =    -- weights and dataSet -> Model of t
  modelInt2model (dataSet !! 0) (freqs2model (countWeighted dataSet weights))

estMultiState dataSet =                    -- :: [Enum Bounded t] -> Model of t
  modelInt2model (dataSet !! 0) (freqs2model (count dataSet))


coinByPrH prH = modelInt2model H (probs2model [prH, 1-prH])

-- ------------------------------9/2002--6/2003--L.Allison--CSSE--Monash--.au--

normal eps m s = normalModel 0 eps m s                 -- i.e. a *fixed* Normal

normalModel m1 eps m s =                         -- i.e. Gaussian a.k.a. Normal
  -- eps is the data measurement accuracy,  +/-(eps/2).
  if s < eps then  -- need better integration, much earlier really!!!
       error "normal: std-dev too small v. data measurement accuracy"
  else MnlPr m1 (normalNlPr eps m s)                          -- Model of Float
       (\() -> "N(" ++ show m ++ "," ++ show s ++ ")(+-" ++ show eps ++ ")")

normalNlPr    eps m s x = normalNlPrDensity m s x - log eps    -- neg log prob'

normalNlPrDensity m s x =                                      -- neg log dens'
  let constPart = (log(2 * pi)) / 2 + (log s)
  in ( constPart + (((x-m)/s)**2)/2 )


estNormal minMean maxMean minSigma maxSigma eps xs =
-- Dealing with the normal (Gaussian) distribution is a good deal trickier
-- than sometimes realised.  The programmer cannot have any prior idea of the
-- ranges of the mean and standard deviation, but the guy with the data does!
-- So we require information about the ranges to be passed in.
-- This version of an estimator uses a uniform prior for the mean over
-- the given range, and 1/sigma over the given range.
--   (In some applications it is not unknown to stretch correctness and
--    to use priors based on the population (data) statistics.)
-- Priors, mean:  uniform over [minMean..maxMean]
--         sigma: 1/sigma over [minSigma..maxSigma]
-- Data is measured to +/-eps/2

-- f(x|m,s) = (1/(sqrt(2 pi) s)) exp(-sqr(x-m)/(2 sqr(s)))        pr(x) density
-- L = -ln(f( ))                                         neg log likelihood(xi)
--   = N.{ln(2 pi)/2 + ln(s)} + {SUMi (xi-m)^2}/(2 s^2)
-- 1st derivatives:
--   d L/d m = {N.m - SUMi xi}/s^2         (hence max LH est', m ~ sum/N) 
--   d L/d s = -N/s - {SUMi (xi-m)^2}/s^3  (hence max LH est', s ~.../N)
-- 2nd derivatives:
--   d2 L/d m d s = zero in expectation
--   d2 L/d m2    = N/s^2
--   d2 L/d s2    = N/s^2 + 3.{SUM (xi-m)^2}/s^4   = - 2.N/s^2 in expectation
-- Fisher Info = 2.N^2 / s^4      (e.g. see Ch6 s6.2 CSW's book-draft 6/2003)
-- NB. This code is deliberately straightforward, not optimised.

 let sqr z = z*z
     n     = fromIntegral(length xs)
     xMean = (foldl (+) 0 xs) / n                            -- naive algorithm
     xVariance
       = if n <= 1 then exp((log minSigma + log maxSigma) / 2) -- geom' mean
         else (foldl (\a -> \b -> a+sqr(b-mean)) 0 xs) / (n-1) -- NB n-1

     mean  = min maxMean (max minMean xMean)  -- or error if outside limits ???
     sigma = min maxSigma (max (max eps minSigma) (sqrt xVariance))

-- 1. It does not make sense to infer a standard deviation less
--    than something-like(!) the data measurement accuracy.
-- 2. A better(!) method of integrating the prior and the
--    likelihood should be used if n is small.
-- 3. Sometimes (small) data-sets are "odd", with just the one value
--    repeated in which case should you really be using the normal?

     nlPriorMean   = log(maxMean - minMean)               -- i.e. uniform prior
-- NB. INTEGRAL[a..b] 1/x = log b - log a
     nlPriorSigma  = log(log maxSigma - log minSigma)+log sigma  -- i.e 1/sigma
     nlPrior       = nlPriorMean + nlPriorSigma
-- konstant: the *2, for 2 parameters, cancels with /2 in the general msg form.
     konstant      = (1 + log(latticeK2))
     halfLogFisher = (log 2)/2 + log n - 2 * log sigma
     logFisher     = 2 * halfLogFisher
-- msg1, 1st part of message, e.g. Farr 1999, or
-- e.g. http://www.csse.monash.edu.au/~lloyd/tildeMML/Notes/Fisher.html
     msg1 = nlPrior + halfLogFisher + konstant

 in normalModel msg1 eps mean sigma

-- ----------------------------------------------------------------------------

test03 =
 let multi01 = freqs2model [1.1, 1.1, 2.2]     -- test fractional freq's
     coin01  = coinByPrH 0.5                   -- 50:50
     coin02  = estMultiState [H,H,T]           --  5:3  !
  -- NB. tuples of Enum Bounded types made instances of class Enum in Utilities
     twoCoin = estMultiState [(H,H),(T,T), (H,T),(H,T), (T,H), (T,H) ]
     bitsMdl = freqs2model [100,100]
     n01     = normal  0.1  0  1               -- NB. data measured to  +/- 0.1
     flts1   = [1.0, 2.0, 0.0, 1.0, -1.0, 3.0]
     flts2   = take 60 (cycle flts1) -- just more of the same
     norm1   = estNormal (-10.0) 10.0  0.1 10.0  0.1 flts1
     norm2   = estNormal (-10.0) 10.0  0.1 10.0  0.1 flts2
 in print "-- test03 --"
 >> print("multi01 = " ++ show multi01
    ++ " = " ++ show(map (pr multi01) [0, 1, 2] ) )
 >> print("pr coin01 H and T = " ++ show(map (pr coin01) [H, T] ) )
 >> print("pr coin02 H and T = " ++ show(map (pr coin02) [H, T] ) )
 >> print("pr twoCoin same   = " ++ show(map (pr twoCoin) [(H,H),(T,T)] ) )
 >> print("msgBase  2 bitsMdl [0,1]= "
    ++ show( msgBase 2 bitsMdl [0,1] ) ++ " bits" )
 >> print("msg1Base 2 bitsMdl      = "
    ++ show( msg1Base 2 bitsMdl ) ++ " bits" )
 >> print("msg2Base 2 bitsMdl [0,1]= "
    ++ show( msg2Base 2 bitsMdl [0,1] ) ++ " bits" )
 >> print("msg1Base 2 (mixture bitsMdl ...) = "
    ++ show( msg1Base 2 (mixture (Mix bitsMdl [bitsMdl, bitsMdl])) )++" bits")
 >> print("wallaceIntModel 0, 1, ... = "
    ++ show(map (nlPrBase 2 wallaceIntModel) [0,1,2,3,4,5,6]) ++ " bits")
 >> print(show n01 ++ " 0.0 1.0 = " ++ show( map (pr n01) [0.0, 1.0] ) )
 >> print(show norm1 ++" flts1 = " ++show(msg1 norm1) ++"+"
                 ++show(msg2 norm1 flts1) ++" nits")
 >> print(show norm2 ++" flts2 = " ++show(msg1 norm2) ++"+"
                 ++show(msg2 norm2 flts2) ++" nits")
-- ----------------------------------------------------------------------------