-- See: L. Allison. Types and classes of machine learning and data mining.
--      26th Australasian Computer Science Conference (ACSC) pp.207-215,
--      Adelaide, February 2003
--      L. Allison. Models for machine learning and data mining in
--      functional programming.      doi:10.1017/S0956796804005301
--      J. Functional Programming, 15(1), pp.15-32, Jan. 2005
-- Author: Lloyd ALLISON           lloyd at bruce cs monash edu au
--         http://www.csse.monash.edu.au/~lloyd/tildeFP/II/200309/
-- This program is free software; you can redistribute it and/or modify it
-- under the terms of the GNU General Public License (GPL) as published by
-- the Free Software Foundation; either version 2 of the License, or (at
-- your option) any later version.  This program is distributed in the hope
-- that it will be useful, but without any warranty, without even the implied
-- warranty of merchantability or fitness for a particular purpose.  See the
-- GNU General Public License for more details.  You should have received a
-- copy of the GNU General Public License with this program; if not, write to:
-- Free Software Foundation, Inc., Boston, MA 02111, USA.

module FnModels (module FnModels) where
import SM_Utilities
import SM_Classes
import Models

-- NB. tuples of Enum Bounded types are made instances of Enum in Utilities.

                      -- (weighted) estimate a FunctionModel of ipSpace opSpace
estFiniteIpFunctionWeighted estOpModel ipSeries opSeries weights =
  let -- re weights: bug corrected 18/12/2003
      -- select outputs and weights corr' to a given input value, v.
      select v (ip:ips) (op:ops) (w:ws) selOp selW =
        if v == ip then select v ips ops ws (op:selOp) (w:selW)  -- pick
                   else select v ips ops ws     selOp     selW   -- drop
      select v _ _ _ selOp selW = (selOp, selW)
      mdls = map (\v -> uncurry estOpModel
                         (select v ipSeries opSeries weights [] []))
                 [lwbIp .. upbIp]
      part1 = foldl (+) 0 (map msg1 mdls)
      condMdl ip = mdls !! ((fromIp ip) - (fromIp lwbIp))

      fromIp ip = fromEnum (ip `asTypeOf` (ipSeries !! 0))
      lwbIp = minBound `asTypeOf` (ipSeries !! 0)
      upbIp = maxBound `asTypeOf` (ipSeries !! 0)
  in FM part1 condMdl (\()->"{finite_FM "++show mdls++"}")

estFiniteIpFunction estOpModelWeighted ipSeries opSeries =        -- unweighted
  estFiniteIpFunctionWeighted estOpModelWeighted ipSeries opSeries (repeat 1)


                                               -- outputSpace also Enum Bounded
estFiniteFunctionWeighted ipSeries opSeries weights =
 estFiniteIpFunctionWeighted estMultiStateWeighted ipSeries opSeries weights

estFiniteFunction ipSeries opSeries =                             -- unweighted
  estFiniteIpFunction estMultiStateWeighted ipSeries opSeries



         -- The next one will let us infer an order_k Markov model (TimeSeries)
         -- WARNING: It is assumed that |alphabet|**k is "small enough", else
         -- you had better implement ppm-like context trees or similar.
      -- estimate an order_k predictor FunctionModel of [dataSpace1] dataSpace2
estFiniteListFunction k inputs outputs =
  if k <= 0 then
    model2functionModel (estMultiState outputs)  -- order zero
  else  -- the order, k >= 1
    let select v []       _       selIps selOps = (selIps, selOps)     -- done
        select v _        []      selIps selOps = (selIps, selOps)     -- done
        select v ([]:ips) (d:ops) selIps selOps =    -- c empty
          select v ips ops selIps selOps             -- exclude
        select v (ip:ips)  (op:ops) selIps selOps =  -- c's matching [v,...]
          if v == head ip
          then select v ips ops ((tail ip):selIps) (op:selOps)  -- include
          else select v ips ops            selIps      selOps   -- exclude

        fms = map (\v -> uncurry (estFiniteListFunction (k-1))
                                 (select v inputs outputs [] []))
                  [lwbIp .. upbIp]  -- input values

        part1 = foldl (+) 0 (map msg1 fms)

        predictorFn [] =       -- k > |input|, i.e. input too short
          uniform lwbOp upbOp  -- at least it's simple!!!
        predictorFn (ip:ips) =
          condModel (fms !! ((fromIp ip) - (fromIp lwbIp))) ips

        egInput  = (inputs !! 0)!! 0
        upbIp    = maxBound `asTypeOf` egInput
        lwbIp    = minBound `asTypeOf` egInput
        fromIp ip = fromEnum( ip `asTypeOf` egInput )

        egOutput = outputs !! 0
        upbOp    = maxBound `asTypeOf` egOutput
        lwbOp    = minBound `asTypeOf` egOutput
        fromOp op = fromEnum( op `asTypeOf` egInput )

    in FM part1 predictorFn (\()->"{FiniteListFunction"++show fms++"}")

-- ------------------------------9/2002--6/2003--L.Allison--CSSE--Monash--.au--

test04 = let
 { coin1 = [ H, H, H, H, T, T, T, T];     -- inputs
   coin2 = [ H, H, H, T, T, T, T, H];     -- outputs
   fm01 = estFiniteFunction coin1 coin2;  -- 3.5:1.5 = 7:3 same:different

   -- NB. tuples of Enum Bounded types are made instances of Enum in Utilities
   bool2  = [(True,True), (True,False), (False,True), (False,False)];
   bool2X = [ False,       True,         True,         False       ];
   xor = estFiniteFunction (take 12 (cycle bool2))  --  add some noise
                           ((take 8 (cycle bool2X)) ++ (map not bool2X));

   inpts = [[H,H],[H,T],[T,H],[T,T], [H,T],[T,H]];  -- inputs of length 2
   rslts = [  H,    T,    H,    T,     T,    T  ];  -- results
   fm02 = estFiniteListFunction 2 inpts rslts       -- 3:1 1:5 1:1 1:3
 }
 in print "-- test04 --"
 >> print("fm01 = " ++ show fm01 )
 >> print("fm01 H H,... H T  = "
     ++ show( zipWith (condPr fm01) [H,H,T,T] [H,T,H,T] ))
 >> print("noisy xor =         " ++ show xor )
 >> print("fm02 = " ++ show fm02 )
 >> print("fm02 HH ... -> H  = "
     ++ show( zipWith (condPr fm02) [[H,H],[H,T],[T,H],[T,T]] [H,H,H,H]))

-- ----------------------------------------------------------------------------
