#!/usr/bin/env python3
# -*- coding: utf-8 -*-

#Marmote and MarmoteMDP are free softwares: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.

#Marmote is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY  without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with Marmote. If not, see <http://www.gnu.org/licenses/>.

#Copyright 2022 Emmanuel Hyon, Alain Jean-Marie

"""
 * @brief class to implement an example of total reward MDP related to a SSP
 * @author Hyon, lip6.
 * @version 0.1
 * @date Oct 2020
 
 Infinite Horizon Total reward example 
 
 The SSP is described in the joined document
"""

from pyMarmoteMDP import *

#define criteria
critere="min"

#create and initialize epsilon.
epsilon = 0.000001

#create and initialize the maximum number of iterations allowed.
maxIter = 20

#Create a penalties for non avaiable action (we should use the limit values) but we do not this.
peno = +10000000000

#Create the MDP object to test 1-dimension example and fill all its fields.


stateSpace = marmoteInterval(0,8)     
actionSpace = marmoteInterval(0,7)               
          
# dimension of the state space 
dim_SS= stateSpace.cardinal()
#dimension of the action space
dim_AS=actionSpace.cardinal() 

#list reference to avoid garbage collector
refe=list()
    
Reward  = sparseMatrix(dim_SS, dim_AS) 
Reward.addToEntry(0,0,15) 
Reward.addToEntry(0,1,10) 
Reward.addToEntry(0,2,5) 
Reward.addToEntry(0,3,peno) 
Reward.addToEntry(0,4,peno) 
Reward.addToEntry(0,5,peno) 
Reward.addToEntry(0,6,peno) 
Reward.addToEntry(0,7,peno) 

Reward.addToEntry(1,0,peno) 
Reward.addToEntry(1,1,peno) 
Reward.addToEntry(1,2,peno) 
Reward.addToEntry(1,3,10) 
Reward.addToEntry(1,4,30) 
Reward.addToEntry(1,5,peno) 
Reward.addToEntry(1,6,peno) 
Reward.addToEntry(1,7,peno) 


Reward.addToEntry(2,0,peno) 
Reward.addToEntry(2,1,peno) 
Reward.addToEntry(2,2,peno) 
Reward.addToEntry(2,3,peno) 
Reward.addToEntry(2,4,15) 
Reward.addToEntry(2,5,peno) 
Reward.addToEntry(2,6,peno) 
Reward.addToEntry(2,7,peno) 

Reward.addToEntry(3,0,peno) 
Reward.addToEntry(3,1,peno) 
Reward.addToEntry(3,2,peno) 
Reward.addToEntry(3,3,peno) 
Reward.addToEntry(3,4,peno) 
Reward.addToEntry(3,5,10) 
Reward.addToEntry(3,6,peno) 
Reward.addToEntry(3,7,peno) 

Reward.addToEntry(4,0,peno) 
Reward.addToEntry(4,1,peno) 
Reward.addToEntry(4,2,peno) 
Reward.addToEntry(4,3,peno) 
Reward.addToEntry(4,4,peno) 
Reward.addToEntry(4,5,20) 
Reward.addToEntry(4,6,peno) 
Reward.addToEntry(4,7,peno) 

Reward.addToEntry(5,0,peno) 
Reward.addToEntry(5,1,peno) 
Reward.addToEntry(5,2,peno) 
Reward.addToEntry(5,3,peno) 
Reward.addToEntry(5,4,peno) 
Reward.addToEntry(5,5,60) 
Reward.addToEntry(5,6,peno) 
Reward.addToEntry(5,7,peno) 

Reward.addToEntry(6,0,peno) 
Reward.addToEntry(6,1,peno) 
Reward.addToEntry(6,2,peno) 
Reward.addToEntry(6,3,peno) 
Reward.addToEntry(6,4,peno) 
Reward.addToEntry(6,5,peno) 
Reward.addToEntry(6,6,5) 
Reward.addToEntry(6,7,peno) 

Reward.addToEntry(7,0,peno) 
Reward.addToEntry(7,1,peno) 
Reward.addToEntry(7,2,peno) 
Reward.addToEntry(7,3,5) 
Reward.addToEntry(7,4,peno) 
Reward.addToEntry(7,5,peno) 
Reward.addToEntry(7,6,peno) 
Reward.addToEntry(7,7,15) 

Reward.addToEntry(8,0,peno) 
Reward.addToEntry(8,1,peno) 
Reward.addToEntry(8,2,peno) 
Reward.addToEntry(8,3,peno) 
Reward.addToEntry(8,4,peno) 
Reward.addToEntry(8,5,peno) 
Reward.addToEntry(8,6,0) 
Reward.addToEntry(8,7,peno) 


print("Debut de la construction MDP") 
mdpSSP = totalRewardMDP(critere, stateSpace, actionSpace, Reward) 
print("Fin de la construction MDP") 


print("Ajout des matrices\n") 
# matrix for the 0 action
P0 = sparseMatrix(dim_SS)  
P0.addToEntry(0,1,0.6) 
P0.addToEntry(0,2,0.4) 
P0.addToEntry(1,1,1)  
#fill in the other transitions to have a stochastic matrix 
P0.addToEntry(2,2,1) 
P0.addToEntry(3,3,1) 
P0.addToEntry(4,4,1) 
P0.addToEntry(5,5,1) 
P0.addToEntry(6,6,1) 
P0.addToEntry(7,7,1) 
P0.addToEntry(8,8,1) 
mdpSSP.addMatrix(0,P0) 
print("Ajout P0") 
refe.append(P0)

P0=None

# matrix for the 1 action
P0 = sparseMatrix(dim_SS)  
P0.addToEntry(0,3,0.3) 
P0.addToEntry(0,4,0.5) 
P0.addToEntry(0,5,0.2) 
# fill in the other transitions to have a stochastic matrix 
P0.addToEntry(1,1,1)   
P0.addToEntry(2,2,1) 
P0.addToEntry(3,3,1) 
P0.addToEntry(4,4,1) 
P0.addToEntry(5,5,1) 
P0.addToEntry(6,6,1) 
P0.addToEntry(7,7,1) 
P0.addToEntry(8,8,1) 

mdpSSP.addMatrix(1,P0) 
print("Ajout P1") 
refe.append(P0)
P0=None

# matrix for the 2 action
P0 = sparseMatrix(dim_SS)  
P0.addToEntry(0,6,0.15) 
P0.addToEntry(0,7,0.85) 
 #fill in the other transitions to have a stochastic matrix 
P0.addToEntry(1,1,1)   
P0.addToEntry(2,2,1) 
P0.addToEntry(3,3,1) 
P0.addToEntry(4,4,1) 
P0.addToEntry(5,5,1) 
P0.addToEntry(6,6,1) 
P0.addToEntry(7,7,1) 
P0.addToEntry(8,8,1) 
mdpSSP.addMatrix(2,P0) 
print("Ajout P2") 
# allow to keep P0 in order to avoid garbage collector
refe.append(P0)
#unset P0 to receive a new value
P0=None

# matrix for the 3 action
P0 = sparseMatrix(dim_SS)  
P0.addToEntry(1,0,1) 
P0.addToEntry(7,0,1) 
#Fill in the other transitions to have a stochastic matrix
P0.addToEntry(0,0,1)    
P0.addToEntry(2,2,1) 
P0.addToEntry(3,3,1) 
P0.addToEntry(4,4,1) 
P0.addToEntry(5,5,1) 
P0.addToEntry(6,6,1) 
P0.addToEntry(8,8,1) 
mdpSSP.addMatrix(3,P0) 
print("Ajout P3") 
# allow to keep P0 in order to avoid garbage collector
refe.append(P0)
#unset P0 to receive a new value
P0=None

# matrix for the 4 action
P0 = sparseMatrix(dim_SS)  
P0.addToEntry(1,8,1) 
P0.addToEntry(2,8,1) 
# fill in the other transitions to have a stochastic matrix
P0.addToEntry(0,0,1)    
P0.addToEntry(3,3,1) 
P0.addToEntry(4,4,1) 
P0.addToEntry(5,5,1) 
P0.addToEntry(6,6,1) 
P0.addToEntry(7,7,1) 
P0.addToEntry(8,8,1) 
mdpSSP.addMatrix(4,P0) 
print("Ajout P4") 
# allow to keep P0 in order to avoid garbage collector
refe.append(P0)
#unset P0 to receive a new value
P0=None

# matrix for the 5 action
P0 = sparseMatrix(dim_SS)  
P0.addToEntry(3,8,1) 
P0.addToEntry(4,8,1) 
P0.addToEntry(5,8,1) 
# fill in the other transitions to have a stochastic matrix
P0.addToEntry(1,1,1)    
P0.addToEntry(0,0,1) 
P0.addToEntry(2,2,1) 
P0.addToEntry(6,6,1) 
P0.addToEntry(7,7,1) 
P0.addToEntry(8,8,1) 
mdpSSP.addMatrix(5,P0) 
print("Ajout P5") 
# allow to keep P0 in order to avoid garbage collector
refe.append(P0)
#unset P0 to receive a new value
P0=None

#matrix for the 6 action
P0 = sparseMatrix(dim_SS)  
P0.addToEntry(6,8,1) 
P0.addToEntry(8,8,1)    
P0.addToEntry(2,2,1) 
P0.addToEntry(0,0,1) 
#fill in the other transitions to have a stochastic matrix 
P0.addToEntry(3,3,1) 
P0.addToEntry(4,4,1) 
P0.addToEntry(5,5,1) 
P0.addToEntry(1,1,1) 
P0.addToEntry(7,7,1) 
mdpSSP.addMatrix(6,P0) 
print("Ajout P6") 
# allow to keep P0 in order to avoid garbage collector
refe.append(P0)
#unset P0 to receive a new value
P0=None

 # matrix for the 7 action
P0 = sparseMatrix(dim_SS) 
P0.addToEntry(7,6,1) 
P0.addToEntry(0,0,1)   
# fill in the other transitions to have a stochastic matrix 
P0.addToEntry(3,3,1) 
P0.addToEntry(4,4,1) 
P0.addToEntry(5,5,1) 
P0.addToEntry(6,6,1) 
P0.addToEntry(1,1,1) 
P0.addToEntry(8,8,1) 
P0.addToEntry(2,2,1) 
mdpSSP.addMatrix(7,P0) 
print("Ajout P7") 
# allow to keep P0 in order to avoid garbage collector
refe.append(P0)
#unset P0 to receive a new value
P0=None

print("Fin de la construction MDP\n") 

print("Affichage MDP") 
mdpSSP.writeMDP() 

print("Ugly list of references only to check",refe)


print("\n###############################\n") 
print("Affichage solution iteration valeur\n") 
maxIter=5 
optimum = mdpSSP.valueIteration(epsilon, maxIter) 
print("Done : longueur chemin",maxIter) 

print("Affichage solution pour",maxIter) 
optimum.writeSolution() 

maxIter=50 
optimum2 = mdpSSP.valueIteration(epsilon, maxIter) 
print("Done : longueur chemin\n",maxIter) 
print("Affichage solution") 
optimum2.writeSolution() 

print("\n################################\n") 
 

