#!/usr/bin/env python3
# -*- coding: utf-8 -*-

#Marmote and MarmoteMDP and pyMarmoteMDP are free softwares: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.

#Marmote is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with MarmoteMDP. If not, see <http://www.gnu.org/licenses/>.

#Copyright 2019 Emmanuel Hyon, Alain Jean-Marie

"""
 @brief An example to enumerate a state space of two dimensions
 @author Hyon, Lip6
 @date July 2022
 @version 1.1
 
 This example allows to create a very simple MDP. The objective is a discounted cost.
 
 This model has two states and two actions
 
 
 A MDP in marmote is an object that receive four important things
 
 a) a state space that is a set the simplest set is the interval. Here we use an interval
 
 b) an action space that is also an interval
 
 c) a vector of transition matrices.
 Each entry in the vector corresponds with a matrix associated with an action. 
 Hence the matrix at the a-th entry in the vector is the transition matrix associated with the action whose index is a
 
 d) a matrix of costs. This is a matrix in which cost for an action a in a state x is defined
 in the row the index of the states
 in columns the index of the action
 the entry (i,j) represents the cost of action of index j in state of index i

*Detail of an object interval: 
an interval is an object which varies between two values. 
For example here the interval [a,b]. There is a function that maps a value of the interval to an index
for example the index of a is 0
* it exists a function to enter a value in the matrix : addEntry(i,j,x) 
where i is the idex of the row j the index of the column and x the value

* details of Transition matrices 
* This is a square matrix
the entry (i,j) is the probability to jump from state of index i to set of index j

a null entry does not need to be filled in

"""

# import og the library
from pyMarmoteMDP import *

# Here are the criteria dedicated to the MDP
# we want to maximise
critere = "max"
# here is the disocunt factor
beta=0.5
#here are the parameters for the value iteration 
epsilon = 0.0001
maxIter = 700

# creating the state space
dimSS = 2 #defining dimension
stateSpace = marmoteInterval(0,dimSS-1)
# we just cretaed an interval from 0 to dimSS-1. 

# creating the action space
dimSA = 2
actionSpace =marmoteInterval(0,dimSA-1)


print("#")
#I create a vector to store the transition matrices
trans=sparseMatrixVector(dimSA)

#I create the first matrix P0
P0 = sparseMatrix(dimSS)
# I enter the value of the transitions
P0.addToEntry(0,0,0.6) # transition from state of index 0 to state of index 0. The value is 0.6
P0.addToEntry(0,1,0.4)
P0.addToEntry(1,0,0.5)
P0.addToEntry(1,1,0.5)
# I add the matrix to the vector of transitions 
#P0 is associated to the action of index 0
trans[0] = P0

#I create the second matrix P1
P1 =sparseMatrix(dimSS)
P1.addToEntry(0,0,0.2)
P1.addToEntry(0,1,0.8)
P1.addToEntry(1,0,0.7)
P1.addToEntry(1,1,0.3)
# I add the matrix to the vector of transitions
trans[1] = P1

#I create the reward matrix 
Reward  = sparseMatrix(dimSS, dimSA);
Reward.addToEntry(0,0,4.5) # in state of index 0 and action of index 0 the cost is 4.5
Reward.addToEntry(0,1,2)
Reward.addToEntry(1,0,-1.5) # in state of index 1 and action of index 0 the cost is -1.5
Reward.addToEntry(1,1,3)

print("Begining of MDP building")
mdp1 = discountedMDP(critere, stateSpace, actionSpace, trans, Reward,beta)
print("End of MDP building\n")

print("Print MDP")
#be carefull that the mdp will be written after all the python instructions print had been printed 
mdp1.writeMDP()

print("Call of  value iteration")
#call the function to solve the MDP.
optimum = mdp1.valueIteration(epsilon, maxIter)
#please note that all the printing in marmote appears after ALL the python print instructions had been performed
print("********************************")
print("Print value iteration Solution")
optimum.writeSolution()

print("Call of modified value iteration")
optimum2 = mdp1.policyIterationModified(epsilon, maxIter, 0.001, 100)
#0.001 and 100 are the inner loop parameters
#the inner loop solves the system with a power method with at most 100 iterations here and stops when the difference between to iteration is smaller than 0.001
print("Print modified value iteration valeur Solution") 
optimum2.writeSolution()  
  
#call the function Gauss Seild to solve the MDP
print("Call Gauss Seidel value iteration")
optimum3 = mdp1.valueIterationGS(epsilon, maxIter)

print("Print Gauss Seidel value iteration Solution")
optimum3.writeSolution()

print("********************************")
