使用python实现最小步数走迷宫_python迷宫最短路线算法

import numpy as np
import matplotlib

matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

# matplotlib.use('TkAgg')
fig = plt.figure(figsize=(5, 5))
ax = plt.gca()
plt.plot([1, 1], [0, 1], color='red', linewidth=2)
plt.plot([1, 2], [2, 2], color='red', linewidth=2)
plt.plot([2, 2], [2, 1], color='red', linewidth=2)
plt.plot([2, 3], [1, 1], color='red', linewidth=2)

plt.text(0.5, 2.5, 'S0', size=14, ha='center')
plt.text(1.5, 2.5, 'S1', size=14, ha='center')
plt.text(2.5, 2.5, 'S2', size=14, ha='center')
plt.text(0.5, 1.5, 'S3', size=14, ha='center')
plt.text(1.5, 1.5, 'S4', size=14, ha='center')
plt.text(2.5, 1.5, 'S5', size=14, ha='center')
plt.text(0.5, 0.5, 'S6', size=14, ha='center')
plt.text(1.5, 0.5, 'S7', size=14, ha='center')

plt.text(2.5, 0.5, 'S8', size=14, ha='center')
plt.text(0.5, 2.3, 'START', size=14, ha='center')
plt.text(2.5, 0.3, 'GOAL', size=14, ha='center')
ax.set_xlim(0, 3)
ax.set_ylim(0, 3)
plt.tick_params(axis='both', which='both', bottom='off', top='off', labelbottom='off', right='off', labelleft='off')
line, = ax.plot([0.5], [2.5], marker='o', color='g', markersize=60)
theta_0 = np.array([[np.nan, 1, 1, np.nan],
                    [np.nan, 1, np.nan, 1],
                    [np.nan, np.nan, 1, 1],
                    [1, 1, 1, np.nan],
                    [np.nan, np.nan, 1, 1],
                    [1, np.nan, np.nan, np.nan],
                    [1, np.nan, np.nan, np.nan],
                    [1, 1, np.nan, np.nan],
                    ])
'''def simple_convert_into_pi_from_theta(theta):
    [m,n]=theta.shape
    pi=np.zeros((m,n))
    for i in range(0,m):
        pi[i,:]=theta[i,:]/np.nansum(theta[i,:])
    pi=np.nan_to_num(pi)
    return pi
pi_0=simple_convert_into_pi_from_theta(theta_0)
print(pi_0)'''


def softmax_convert_into_pi_from_theta(theta):
    beta = 1.0
    [m, n] = theta.shape
    pi = np.zeros((m, n))
    exp_theta = np.exp(beta * theta)
    for i in range(0, m):
        pi[i, :] = exp_theta[i, :] / np.nansum(exp_theta[i, :])
    pi = np.nan_to_num(pi)
    return pi


pi_0 = softmax_convert_into_pi_from_theta(theta_0)
# print(pi_0)


'''def get_next_s(pi,s):
    direction=["up","right","down","left"]
    next_direction=np.random.choice(direction,p=pi[s,:])
    if next_direction=="up":
        s_next=s-3
    elif next_direction=="right":
        s_next=s+1
    elif next_direction=="down":
        s_next=s+3
    elif next_direction=="left":
        s_next=s-1
    return s_next
def goal_maze(pi):
    s=0
    state_history=[0]
    while(1):
        next_s=get_next_s(pi,s)
        state_history.append(next_s)
        if next_s==8:
            break
        else:
            s=next_s
    return state_history
state_history=goal_maze(pi_0)
print(state_history)
print("步数是"+str(len(state_history)-1))'''


def get_action_and_next_s(pi, s):
    direction = ["up", "right", "down", "left"]
    next_direction = np.random.choice(direction, p=pi[s, :])
    if next_direction == "up":
        action = 0
        s_next = s - 3
    elif next_direction == "right":
        action = 1
        s_next = s + 1
    elif next_direction == "down":
        action = 2
        s_next = s + 3
    elif next_direction == "left":
        action = 3
        s_next = s - 1
    return [action, s_next]


def goal_maze_ret_s_a(pi):
    s = 0
    s_a_history = [[0, np.nan]]
    while (1):
        [action, next_s] = get_action_and_next_s(pi, s)
        s_a_history[-1][1] = action
        s_a_history.append([next_s, np.nan])
        if next_s == 8:
            break
        else:
            s = next_s
    return s_a_history


s_a_history = goal_maze_ret_s_a(pi_0)
print(s_a_history)
print("步数是" + str(len(s_a_history) - 1))


def update_theta(theta, pi, s_a_history):
    eta = 0.1
    T = len(s_a_history) - 1
    [m, n] = theta.shape
    delta_theta = theta.copy()
    for i in range(0, m):
        for j in range(0, n):
            if not (np.isnan(theta[i, j])):
                SA_i = [SA for SA in s_a_history if SA[0] == i]
                SA_ij = [SA for SA in s_a_history if SA == [i, j]]
                N_i = len(SA_i)
                N_ij = len(SA_ij)
                delta_theta[i, j] = (N_ij - pi[i, j] * N_i) / T
    new_theta = theta + eta * delta_theta
    return new_theta


new_theta = update_theta(theta_0, pi_0, s_a_history)
pi = softmax_convert_into_pi_from_theta(new_theta)
print(pi)
stop_epsilon = 10 ** -4
theta = theta_0
pi = pi_0
is_continue = True
count = 1
while is_continue:
    s_a_history = goal_maze_ret_s_a(pi)
    new_theta = update_theta(theta, pi, s_a_history)
    new_pi = softmax_convert_into_pi_from_theta(new_theta)
    print(np.sum(np.abs(new_pi - pi)))
    print("步数是" + str(len(s_a_history) - 1))
    if np.sum(np.abs(new_pi - pi)) < stop_epsilon:
        is_continue = False
    else:
        theta = new_theta
        pi = new_pi
np.set_printoptions(precision=3, suppress=True)
print(pi)

from matplotlib import animation


# matplotlib.use('TkAgg')
# from IPython.display import HTML
def init():
    line.set_data([], [])
    return (line,)


def animate(i):
    state = s_a_history[i][0]
    x = (state % 3) + 0.5
    y = 2.5 - int(state / 3)
    line.set_data(x, y)
    return (line,)


anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(s_a_history), interval=200, repeat=False)
# HTML(anim.to_jshtml())
# plt.show()
plt.pause(50)
plt.show()

相关推荐

取消回复欢迎你发表评论:

Vue-实现自定义插件弹窗（vue 实现弹窗）

Vue自定义Hook示例:useUrlState（vue中的自定义指令如何使用）

MySQL中这14个小玩意，让人眼前一亮!

旗舰机新标杆 OPPO Find X2系列正式发布售价5499元起

什么是幂等?分布式锁如何实现业务幂等?

手把手教你搞定菜单权限设计，精确到按钮级别，建议收藏

详解MySQL 字符串拼接之concat\concat_ws\group_concat

如何快速切换node版本?利用n包快速切换nodejs版本

如何发个 npm 包?

【Python机器学习系列】建立多层感知机模型预测心脏疾病

使用python实现最小步数走迷宫_python迷宫最短路线算法

相关推荐

取消回复欢迎 你 发表评论:

Vue-实现自定义插件弹窗（vue 实现弹窗）

Vue自定义Hook示例:useUrlState（vue中的自定义指令如何使用）

MySQL中这14个小玩意，让人眼前一亮!

旗舰机新标杆 OPPO Find X2系列正式发布 售价5499元起

什么是幂等?分布式锁如何实现业务幂等?

手把手教你搞定菜单权限设计，精确到按钮级别，建议收藏

详解MySQL 字符串拼接之concat\concat_ws\group_concat

如何快速切换node版本?利用n包快速切换nodejs版本

如何发个 npm 包?

【Python机器学习系列】建立多层感知机模型预测心脏疾病

取消回复欢迎你发表评论:

旗舰机新标杆 OPPO Find X2系列正式发布售价5499元起