# Marmote and MarmoteMDP are free softwares: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.

#Marmote is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with Marmote. If not, see <http://www.gnu.org/licenses/>.

#Copyright 2022 Emmanuel Hyon, Alain Jean-Marie

"""
 * @brief class to implement an example of average MDP
 * @author Hyon, lip6 Tournaire.
 * @version 0.3
 * @date jan 2019
 *
 * Average model example with handling of policy.
 
 
"""


from pyMarmoteMDP import *



critere="min"

#create and initialize epsilon.
epsilon = 0.00001

#create and initialize the maximum number of iterations allowed.
maxIter = 500


#==================Test new features==================

dim_SS = 4 # dimension of the state space 
dim_AS = 3 # dimension of the action space 
# Intervalle allant de 0 à 2 donc 0, 1, et 2
actionSpace =  marmoteInterval(0,2)     
stateSpace =  marmoteInterval(0,3)            # Intervalle allant de 0 à 3 donc 0, 1, et 2 et 3

# vector trans = ( matrice P_0 , matrice P_1 , matrice P_2 )
trans=sparseMatrixVector(dimSA)

# matrix for the a_0 action
P0 =  sparseMatrix(dim_SS) 

P0.addToEntry(0,1,0.875)
P0.addToEntry(0,2,0.0625)
P0.addToEntry(0,3,0.0625)
P0.addToEntry(1,1,0.75)
P0.addToEntry(1,2,0.125)
P0.addToEntry(1,3,0.125)
P0.addToEntry(2,0,0)
P0.addToEntry(2,1,0)
P0.addToEntry(2,2,0.5)
P0.addToEntry(2,3,0.5)
P0.addToEntry(3,0,0)
P0.addToEntry(3,1,0)
P0.addToEntry(3,2,0)
P0.addToEntry(3,3,1.0)
# adding P0 to vector
trans[0] = P0

P1 =  sparseMatrix(dim_SS)
P1.addToEntry(0,0,0)
P1.addToEntry(0,1,0.875)
P1.addToEntry(0,2,0.0625)
P1.addToEntry(0,3,0.0625)
P1.addToEntry(1,0,0)
P1.addToEntry(1,1,0.75)
P1.addToEntry(1,2,0.125)
P1.addToEntry(1,3,0.125)
#these are no usable instructions
P1.addToEntry(2,0,0)
P1.addToEntry(2,1,1.0)
P1.addToEntry(2,2,0)
P1.addToEntry(2,3,0)
P1.addToEntry(3,0,0)
P1.addToEntry(3,1,0)
P1.addToEntry(3,2,0)
P1.addToEntry(3,3,1.0)
trans[1] = P1

P2 =  sparseMatrix(dim_SS)
P2.addToEntry(0,0,0)
P2.addToEntry(0,1,0.875)
P2.addToEntry(0,2,0.0625)
P2.addToEntry(0,3,0.0625)
P2.addToEntry(1,0,1.0)
P2.addToEntry(1,1,0)
P2.addToEntry(1,2,0)
P2.addToEntry(1,3,0)
P2.addToEntry(2,0,1.0)
P2.addToEntry(2,1,0)
P2.addToEntry(2,2,0)
P2.addToEntry(2,3,0)
P2.addToEntry(3,0,1.0)
P2.addToEntry(3,1,0)
P2.addToEntry(3,2,0)
P2.addToEntry(3,3,0)
trans[2] = P2



Reward  =  sparseMatrix(dim_SS, dim_AS)
Reward.addToEntry(0,0,0)
Reward.addToEntry(0,1,4000)
Reward.addToEntry(0,2,6000)
Reward.addToEntry(1,0,1000)
Reward.addToEntry(1,1,4000)
Reward.addToEntry(1,2,6000)
Reward.addToEntry(2,0,3000)
Reward.addToEntry(2,1,4000)
Reward.addToEntry(2,2,6000)
Reward.addToEntry(3,0,3000)
Reward.addToEntry(3,1,4000)
Reward.addToEntry(3,2,6000)


#test of the  object to test 1-dimension example.
print("Enumerate State Space:\n")
stateSpace.enumerate()
printf("\n")

print("Enumerate action Set:\n")
actionSpace.enumerate()
printf("\n")

#creating array to store staes
etat=new_intTab(1)
intTab_setitem(etat,0)

print("Enumerate action Set by index:\n")
for i in range (actionSpace.cardinal()):
   intTab_setitem(etat,i) 
   print("Action ", i , " index of Action ", actionSpace.index(etat))
print("\n")

print("Debut de la construction MDP\n")
mdp1 =  averageMDP(critere, stateSpace, actionSpace, trans)
print("Fin de la construction MDP\n")

print("Affichage MDP\n")
mdp1.writeMDP()

print("\n\n###############################\n")
print("Affichage solution iteration valeur\n")
    #call the function to solve the MDP.
    start_t =clock()
    solutionMDP optimum = mdp1->valueIteration(epsilon, maxIter)
    end_t= clock()
    total_t = (double)(end_t - start_t)/CLOCKS_PER_SEC
    printf("# Done : temps ecoule %.6f sec\n",total_t)
    optimum->writeSolution()

    print("\n\nVerification des solutions\n")
    double sol1 = mdp1->policyCost(optimum,epsilon, maxIter)
    for(i=0i<stateSpace->cardinal()i++){
           printf("i=%d sol1= %f \n",i,sol1[i])
    }

    #call the function to solve the MDP.
    print("\n\n###############################\n")
    print("Calcul par iteration politique modifiee\n")
    start_t =clock()
    solutionMDP optimum2 = mdp1->policyIterationModified(epsilon, maxIter, 0.001, 1000)
    end_t= clock()
    total_t = (double)(end_t - start_t)/CLOCKS_PER_SEC
    printf("# Done : temps ecoule %.6f sec\n",total_t)
    optimum2->writeSolution()

    print("\n\nVerification des solutions\n")
    double sol2 = mdp1->policyCost(optimum2,epsilon, maxIter)
    for(i=0i<stateSpace->cardinal()i++){
           printf("i=%d sol1= %f \n",i,sol2[i])
    }

    #call the function to solve the MDP.
    print("\n\n###############################\n")
    printf("\n\nCalcul par iteration valeur relative\n")
    start_t =clock()
    solutionMDP  optimum3 = mdp1->relativeValueIteration(epsilon, maxIter)
    end_t= clock()
    total_t = (double)(end_t - start_t)/CLOCKS_PER_SEC
    printf("# Done : temps ecoule %.6f sec\n",total_t)
    optimum3->writeSolution()

    print("\n\nVerification des solutions\n")
    double sol3 = mdp1->policyCost(optimum3,epsilon, maxIter)
    for(i=0i<stateSpace->cardinal()i++){
           printf("i=%d sol1= %f \n",i,sol3[i])
    }

   
    print("\n\n###############################\n")
    print("Verification des couts moyen de politique donnees\n")
    print("\nPolitique Ra\n")
    feedbackSolutionMDP politique =  feedbackSolutionMDP()
    politique->initVectors(stateSpace->cardinal())
    #politique->writeSolution()
    #politique->setSize(stateSpace->cardinal())
    # tableau qui pour chaque etat enregistre l'action optimale
    #int *action = (int*) calloc( stateSpace->cardinal(), sizeof(int) )
    #politique->setAction(action)
    politique->setActionIndex(0,0)
    politique->setActionIndex(1,0)
    politique->setActionIndex(2,0)
    politique->setActionIndex(3,2)
    print("\nVerification des solutions\n")
    double solpol = mdp1->policyCost(politique,epsilon, maxIter)
    politique->setValue(solpol)
    politique->writeSolution()
    #free(solpol)

    print("\n\nPolitique Rc\n")
    # tableau qui pour chaque etat enregistre l'action optimale
    int action = (int) calloc( stateSpace->cardinal(), sizeof(int) )
    politique->setAction(action)
    politique->setActionIndex(0,0)
    politique->setActionIndex(1,0)
    politique->setActionIndex(2,2)
    politique->setActionIndex(3,2)
    print("\nVerification des solutions\n")
    solpol = mdp1->policyCost(politique,epsilon, maxIter)
    politique->setValue(solpol)
    politique->writeSolution()
    #free(solpol)

    print("\n\nPolitique Rd\n")
    # tableau qui pour chaque etat enregistre l'action optimale
    #politique->setAction(action)
    politique->setActionIndex(0,0)
    politique->setActionIndex(1,2)
    politique->setActionIndex(2,2)
    politique->setActionIndex(3,2)
    # reset values
    politique->resetValue()
    print("\n\nVerification politique en entree\n")
    politique->writeSolution()
    
    print("\n\nVerification des solutions\n")
    solpol = mdp1->policyCost(politique,epsilon, 3)
    politique->setValue(solpol)
    politique->writeSolution()

    
    print("\n\n###############################\n")
    print("\n\nVerification des propriétés de monotonie VF\n")
    structuralPropertiesVF VFverif =  structuralPropertiesVF(stateSpace)
    feedbackSolutionMDP poloverif = dynamic_cast <feedbackSolutionMDP > (optimum) 
    int monotone=VFverif->monotonicityVF(poloverif)
    print("Affichage monotonie fonction valeur (1 croissant -1 decroissant 0 aucune propriete) : %d\n",monotone)
    print("\n\nVerification des propriétés de monotonie VF par dim\n")
    feedbackSolutionMDP poloverif2 = dynamic_cast <feedbackSolutionMDP > (optimum2) 
    monotone=VFverif->monotonicityVFByDim(poloverif2,0)
    print("Affichage monotonie fonction valeur suivant dimension 0 (1 croissant -1 decroissant 0 aucune propriete) : %d\n",monotone)
    
    print("\n\n###############################\n")
    print("\n\nVerification des propriétés de monotonie Politique\n")
    structuralPropertiesPol Polverif =  structuralPropertiesPol(stateSpace)
    monotone=Polverif->monotonicityPol(poloverif)
    print("Affichage monotonie Politique (1 croissant -1 decroissant 0 aucune propriete) : %d\n",monotone)
    
    
    print("\n\n********************************\n")
 
}
