#!/usr/bin/env python3
# -*- coding: utf-8 -*-

#Marmote and MarmoteMDP and pyMarmoteMDP are free softwares: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.

#Marmote is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with MarmoteMDP. If not, see <http://www.gnu.org/licenses/>.

#Copyright 2022 Emmanuel Hyon, Alain Jean-Marie

"""
 @brief An example to enumerate a state space of two dimensions
 @author Hyon, Lip6
 @date Nov 2022
 @version 1.0
 
 This example allows to create a very simple MDP and to manage a QValue Object. 
 
 The objective is a discounted cost.
 
 We build a Qvalue from a computed solution with the value function
 
 Epsilongreedy and Boltzmann are functions that return action with hazard
 
 
"""

# import og the library
from pyMarmoteMDP import *

# Here are the criteria dedicated to the MDP
# we want to maximise
critere = "max"
# here is the disocunt factor
beta=0.5
#here are the parameters for the value iteration 
epsilon = 0.0001
maxIter = 700

# creating the state space
dimSS = 2 #defining dimension
stateSpace = marmoteInterval(0,dimSS-1)
# we just cretaed an interval from 0 to dimSS-1. 

# creating the action space
dimSA = 2
actionSpace =marmoteInterval(0,dimSA-1)


print("#")
#I create a vector to store the transition matrices
trans=sparseMatrixVector(dimSA)

#I create the first matrix P0
P0 = sparseMatrix(dimSS)
# I enter the value of the transitions
P0.addToEntry(0,0,0.6) # transition from state of index 0 to state of index 0. The value is 0.6
P0.addToEntry(0,1,0.4)
P0.addToEntry(1,0,0.5)
P0.addToEntry(1,1,0.5)
# I add the matrix to the vector of transitions 
#P0 is associated to the action of index 0
trans[0] = P0

#I create the second matrix P1
P1 =sparseMatrix(dimSS)
P1.addToEntry(0,0,0.2)
P1.addToEntry(0,1,0.8)
P1.addToEntry(1,0,0.7)
P1.addToEntry(1,1,0.3)
# I add the matrix to the vector of transitions
trans[1] = P1

#I create the reward matrix 
Reward  = sparseMatrix(dimSS, dimSA);
Reward.addToEntry(0,0,4.5) # in state of index 0 and action of index 0 the cost is 4.5
Reward.addToEntry(0,1,2)
Reward.addToEntry(1,0,-1.5) # in state of index 1 and action of index 0 the cost is -1.5
Reward.addToEntry(1,1,3)

print("Begining of MDP building")
mdp1 = discountedMDP(critere, stateSpace, actionSpace, trans, Reward,beta)
print("End of MDP building\n")

print("Print MDP")
#be carefull that the mdp will be written after all the python instructions print had been printed 
mdp1.writeMDP()

print("Call of  value iteration")
#call the function to solve the MDP.
optimum = mdp1.valueIteration(epsilon, maxIter)
#please note that all the printing in marmote appears after ALL the python print instructions had been performed
print("********************************")
print("Print value iteration Solution")
optimum.writeSolution()


print("Building Q value")
# the variable Q is the Q value
Q=mdp1.Qfunction(optimum)
print("Printing Q value")
Q.writeQValue()
  
print("Test epsilon Greedy");
# testing how epsilon greedy return action
#epsilon is 0.5
state=0
for i in range(10):
	action=Q.epsilonGreedy(state,0.5)
	print("Action is",action)
print("end epsilon greedy")

  
print("Test Boltzmann T = 1. Return Optimal very often")
state=1;
for i in range(10):
	action=Q.boltzmann(state,1)
	print("Action (for T= 1) is ",action)
print("end boltzmann T=1");
  
print("Test Boltzmann. T= 175  close uniform")
state=1
for i in range(10):
	action=Q.boltzmann(state,175)
	print("Action (for T= 175) is ",action)
print("End Boltzman T=175")
 
print("********************************")
 
print("That's all folk*****************")
