首页 > 解决方案 > Minimax 算法为 tictactoe 产生错误的结果

问题描述

我正在练习我的算法,并想在 python 中实现一个基于 minimax 算法的井字游戏 AI(代码如下):

import itertools

import more_itertools
import numpy as np


class Position:
    win_positions = np.array(([1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 5, 9],
                              [1, 4, 7], [2, 5, 8], [3, 6, 9], [3, 5, 7]))

    def __init__(self, position=None):

        """If a position is given ensure 0 is in the first index, 
        for a dictionary lookup later for
            an empty board otherwise use the position
        """
        self.position = np.array([0]) if position is None else \
            np.array(position) if position[0] == 0 else np.append([0], np.array(
                position))

        self.turn = itertools.cycle(
            ['x', 'o']) if self.position.size % 2 else itertools.cycle(
            ['o', 'x'])
        self.p = more_itertools.peekable(self.turn)

    def add(self, value):
        next(self.turn)
        self.position = np.append(self.position, value)
        return self.win()

    def win(self):
        x = np.array(self.position[1::2])
        o = np.array(self.position[2::2])

        """
        takes the difference of a players moves and the winning move sets,
        an array resulting from this with a size of 0 indicates the player 
        contains
        a winning arrangement in their moves
        """

        if any([False if np.setdiff1d(position, x).size else True for position
                in Position.win_positions]):
            return 10
        elif any([False if np.setdiff1d(position, o).size else True for position
                  in Position.win_positions]):
            return -10
        elif self.valid_moves().size == 0:
            return 0
        else:
            return None

    def whose_turn(self, player=None):
        return self.p.peek() if player is None else self.p.peek() == player

    def valid_moves(self):
        return np.setdiff1d(np.arange(1, 10), self.position)

    def clone(self, next_pos=None):
        """returns a new position object with one of two options, either a copy,
            of the current position or a copy with a move added"""
        return Position(
            np.append(self.position, next_pos)) if next_pos else Position(
            np.copy(self.position))

    def mini_max(self, depth=0):

        best_move = -1
        """return the win value of the board position
            draws are 0, 'o' wins are -10 and 'x' wins are
            10. A board with valid moves available returns none for 
            this call """
        if self.win() is not None:
            return self.win(), None

        minimax = float('-inf') if self.whose_turn('x') else float('inf')
        for move in self.valid_moves():

            val = self.clone(move).mini_max()[0]
            if minimax < val and self.whose_turn(
                    'x') or minimax > val and self.whose_turn('o'):
                minimax = val
                best_move = move

        return minimax + (-1 if self.whose_turn('x') else 1), best_move

    def print_board(self):
        blank = np.chararray(shape=(9,), unicode=True)
        for i in range(1, self.position.size):
            blank[self.position[i] - 1] = 'X' if i % 2 else 'O'
        string = ("{:^3s}█{:^3s}█{:^3s}\n"
                  "▄▄▄█▄▄▄█▄▄▄\n"
                  "{:^3s}█{:^3s}█{:^3s}\n"
                  "▀▀▀█▀▀▀█▀▀▀\n"
                  "{:^3s}█{:^3s}█{:^3s}\n".format(*blank))
        print(string)

    def __str__(self):
        return "".join(map(str, self.position))


if __name__ == '__main__':
    # test winning conditions
    assert Position([1, 4, 2, 5, 3]).win() == 10
    assert Position([1, 4, 2, 5, 7, 6]).win() == -10
    assert Position([1, 4, 2, 5, 7, 8, 6, 5, 9, 3]).win() == 0

    # testing turns
    assert Position([1, 2, 3]).whose_turn() == 'o'
    assert Position([1, 5, 6, 4]).whose_turn() == 'x'
    assert Position([5, 6, 3, 2]).whose_turn('x')
    assert Position([5, 6, 3, 2, 1]).whose_turn('o')

    # valid moves
    assert all(
        Position([1, 2, 3, 4, 5, 6]).valid_moves() == np.array([7, 8, 9]))
    assert all(
        Position([9, 5, 3, 8]).valid_moves() == np.array([1, 2, 4, 6, 7]))

    # test minimax correct moves
    assert Position([1, 4, 2, 5]).mini_max()[1] == 3
    assert Position([1, 3, 4, 5]).mini_max()[1] == 7
    assert Position([1, 2, 5]).mini_max()[1] == 9  # error here

我已经尝试多次重写我的代码,但我的测试在最佳移动上一直失败。o 玩家似乎忽略了 x 玩家的获胜位置。我有点不知所措。我已经根据维基百科上的伪代码检查了它,但似乎没有任何明显错误

标签: pythonartificial-intelligencetic-tac-toeminimax

解决方案


推荐阅读