#!/usr/bin/env python3
# -*- coding: utf-8 -*-

#Marmote and MarmoteMDP and pyMarmoteMDP are free softwares: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.

#Marmote is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with MarmoteMDP. If not, see <http://www.gnu.org/licenses/>.

#Copyright 2019 Emmanuel Hyon, Alain Jean-Marie

"""
 @brief An example to enumerate a state space of two dimensions
 @author Hyon, Lip6
 @date Nov 2022
 @version 1.1
 
 This example allows to handle the value function and several methods of resolving. 
 
 The objective is a discounted cost.
 
 Here we use two function to solve the MDP the initializing of the algorithm is not the zero function 
 but a value function given in parameter with a feedbackpolicy
 
 the feedbackpolicy is the last parameter. All the other paramter are similar than Value Iteration or Modified Policy Iteration.
 
 Warning only Modified Policy Iteration and VAlue Iteration have this ability.
"""

# import og the library
from pyMarmoteMDP import *

# Here are the criteria dedicated to the MDP
# we want to maximise
critere = "max"
# here is the discount factor
beta=0.5
#here are the parameters for the value iteration 
epsilon = 0.0001
maxIter = 700

# creating the state space
dimSS = 2 #defining dimension
stateSpace = marmoteInterval(0,dimSS-1)
# we just cretaed an interval from 0 to dimSS-1. 

# creating the action space
dimSA = 2
actionSpace =marmoteInterval(0,dimSA-1)


print("#")
#I create a vector to store the transition matrices
trans=sparseMatrixVector(dimSA)

#I create the first matrix P0
P0 = sparseMatrix(dimSS)
# I enter the value of the transitions
P0.addToEntry(0,0,0.6) # transition from state of index 0 to state of index 0. The value is 0.6
P0.addToEntry(0,1,0.4)
P0.addToEntry(1,0,0.5)
P0.addToEntry(1,1,0.5)
# I add the matrix to the vector of transitions 
#P0 is associated to the action of index 0
trans[0] = P0

#I create the second matrix P1
P1 =sparseMatrix(dimSS)
P1.addToEntry(0,0,0.2)
P1.addToEntry(0,1,0.8)
P1.addToEntry(1,0,0.7)
P1.addToEntry(1,1,0.3)
# I add the matrix to the vector of transitions
trans[1] = P1

#I create the reward matrix 
Reward  = sparseMatrix(dimSS, dimSA);
Reward.addToEntry(0,0,4.5) # in state of index 0 and action of index 0 the cost is 4.5
Reward.addToEntry(0,1,2)
Reward.addToEntry(1,0,-1.5) # in state of index 1 and action of index 0 the cost is -1.5
Reward.addToEntry(1,1,3)

print("Begining of MDP building")
mdp1 = discountedMDP(critere, stateSpace, actionSpace, trans, Reward,beta)
print("End of MDP building\n")

print("Print MDP")
#be carefull that the mdp will be written after all the python instructions print had been printed 
mdp1.writeMDP()

print("Call of  value iteration")
#call the function to solve the MDP.
optimum = mdp1.valueIteration(epsilon, maxIter)
#please note that all the printing in marmote appears after ALL the python print instructions had been performed
print("********************************")
print("Print value iteration Solution")
optimum.writeSolution()

print("Call of modified value iteration")
optimum2 = mdp1.policyIterationModified(epsilon, maxIter, 0.001, 100)
#0.001 and 100 are the inner loop parameters
#the inner loop solves the system with a power method with at most 100 iterations here and stops when the difference between to iteration is smaller than 0.001
print("Print modified value iteration valeur Solution") 
optimum2.writeSolution()  
  
#call the function Gauss Seild to solve the MDP
print("Call Gauss Seidel value iteration")
optimum3 = mdp1.valueIterationGS(epsilon, maxIter)

print("Print Gauss Seidel value iteration Solution")
optimum3.writeSolution()

print("********************************")

print("*** Check the cost of the solution ****")
#Compute the cost of a solution given in parameter
l=mdp1.policyCost(optimum,epsilon, maxIter)
print("cost",l)

print("********************************")
print("*** Check the computation with VI from a special initializing ****")
 #call the function to solve the MDP.
optimum4 = mdp1.valueIteration(epsilon*0.001, maxIter,optimum);
optimum4.writeSolution();
  
print("*** Check the computation with PIM from a special initializing ****");
#call the function to solve the MDP.
optimum5 = mdp1.policyIterationModified(epsilon*0.001, maxIter,epsilon,100,optimum);
optimum5.writeSolution();
 
