import numpy as np
# Ordre des états: P, R, S
P_rock = np.matrix([[0.25, 0.5, 0.25], [0.25, 0.5, 0.25], [0.25, 0.5, 0.25]])
P_paper = np.matrix([[0.5, 0.25, 0.25], [0.5, 0.25, 0.25], [0.5, 0.25, 0.25]])
P_scissor = np.matrix([[0.25, 0.25, 0.5], [0.25, 0.25, 0.5], [0.25, 0.25, 0.5]])
P_change = np.matrix([[0.2, 0.4, 0.4], [0.4, 0.2, 0.4], [0.4, 0.4, 0.2]])
P_keep = np.matrix([[0.5, 0.25, 0.25], [0.25, 0.5, 0.25], [0.25, 0.25, 0.5]])
P_balance = np.matrix([[1/3, 1/3, 1/3], [1/3, 1/3, 1/3], [1/3, 1/3, 1/3]])
agents = { 'rock': P_rock, 'paper': P_paper, 'scissor': P_scissor,
          'change': P_change, 'keep': P_keep, 'balance': P_balance }
actions = { 'P': 0, 'R': 1, 'S': 2 }
scores = np.matrix([[0.5, 1, 0], [0, 0.5, 1], [1, 0, 0.5]])

import random
def playRandomGuy(seed=None, cheat=False):
    global actions, scores
    actionsArray = list(actions.keys())
    random.seed(seed)
    agentCode = random.choices(list(agents.keys()))[0]
    if cheat:
        print(agentCode)
    P = agents[agentCode]
    curState = None
    myPoints, userPoints = 0, 0
    while True:
        # Attente d'une action utilisateur :
        choice = input('Move?').upper()
        if choice not in actionsArray:
            break
        # Choix d'une action (programme) :
        if curState is None:
            curState = random.choices(actionsArray)[0]
        else:
            curState = random.choices(actionsArray, weights=P[actions[curState],:].tolist()[0])[0]
        # Mise à jour du score
        score = scores[actions[curState], actions[choice]]
        myPoints += score
        userPoints += 1 - score
        print(f'Prog [{curState}]: {myPoints} / User [{choice}]: {userPoints}')
    if not cheat:
        print(agentCode)


playRandomGuy(cheat=True) #pour essayer de gagner plus vite... :)

rock
Move?p
Prog [R]: 0.0 / User [P]: 1.0
Move?p
R
Prog [P]: 0.5 / User [P]: 1.5
Move?p
P
Prog [R]: 0.5 / User [P]: 2.5
Move?p
R
Prog [R]: 0.5 / User [P]: 3.5
Move?x


def counterAction(choix):
    if choix == 'P':
        return 'S'
    if choix == 'R':
        return 'P'
    if choix == 'S':
        return 'R'

def playAdaptiveGuy(seed=None):
    global actions, scores
    random.seed(seed)
    actionsArray = list(actions.keys())
    oldChoice = -1
    myPoints, userPoints = 0, 0
    P = np.matrix([[0, 0, 0], [0, 0, 0], [0, 0, 0]]) #unnormalized user matrix
    while True:
        # Attente d'une action utilisateur :
        choice = input('Move?').upper()
        if choice not in actionsArray:
            break
        # Choix d'une action (programme) :
        indMax = np.argmax(P[oldChoice,:]) if oldChoice >= 0 else 0
        curState = (random.choices(actionsArray)[0] if oldChoice < 0 or P[oldChoice,indMax] == 0
                    else counterAction(actionsArray[indMax]))
        # Mise à jour de la matrice de transitions utilisateur :
        if oldChoice >= 0:
            P[oldChoice,actions[choice]] += 1
        oldChoice = actions[choice]
        # Mise à jour du score
        score = scores[actions[curState], actions[choice]]
        myPoints += score
        userPoints += 1 - score
        print(f'Prog [{curState}]: {myPoints} / User [{choice}]: {userPoints}')
    # Normalisation de P pour affichage
    sum_of_rows = np.array(P.sum(axis=1))
    sum_of_rows[sum_of_rows == 0] = 1
    normalized_P = P / np.squeeze(np.asarray(sum_of_rows))[:, None]
    print(normalized_P)


playAdaptiveGuy()

Move?p
Prog [P]: 0.5 / User [P]: 0.5
Move?p
Prog [S]: 1.5 / User [P]: 0.5
Move?r
Prog [S]: 1.5 / User [R]: 1.5
Move?r
Prog [R]: 2.0 / User [R]: 2.0
Move?r
Prog [P]: 3.0 / User [R]: 2.0
Move?s
Prog [P]: 3.0 / User [S]: 3.0
Move?s
Prog [S]: 3.5 / User [S]: 3.5
Move?x
[[0.5        0.5        0.        ]
 [0.         0.66666667 0.33333333]
 [0.         0.         1.        ]]

Exercice 1¶

Exercice 2¶

Exercice 3¶