Tflearn “nan” weight matrices
-
31-10-2019 - |
Pregunta
I wanted to build a DQN. So I followed this code and watched some videos about the idea of DQN. My Code is this (mine is written in tflearn and his in keras):
import tflearn as tfl
import numpy as np
import gym
from collections import deque
import random
class DeepQ():
def __init__(self,game="SpaceInvaders-v0"):
self.game=game
self.env=gym.make(game)
self.storage=deque()
self.filter_size=[4,4]
self.itertime=1000
self.random_move_prop=0.8
np.random.seed(1)
self.minibatch_size=250
self.discounted_future_reward=0.9
def Q_Network(self,learning_rate=0.0000001,load=False,model_path=None,checkpoint_path="X://xxx//xxx//Documents//GitHub//Deeplearning_for_starters//Atari_modells//checkpoint.ckpt"):
if load==False:
net=tfl.layers.core.input_data(shape=[None,210,160,3])# rework this stuff
net=tfl.layers.conv.conv_2d(net,nb_filter=3,filter_size=self.filter_size,activation='relu')
net=tfl.layers.conv.conv_2d(net,nb_filter=3,filter_size=self.filter_size,activation="relu")
#net=tfl.layers.fully_connected(net,20,activation="relu")
net=tfl.layers.flatten(net)
#net=tfl.layers.fully_connected(net,18,activation="relu")
net=tfl.layers.fully_connected(net,10,activation='relu')
net=tfl.layers.fully_connected(net,self.env.action_space.n,activation="linear")
net=tfl.layers.estimator.regression(net,learning_rate=learning_rate)
self.modell=tfl.DNN(net,checkpoint_path=checkpoint_path)
else:
net=tfl.layers.core.input_data(shape=[None,210,160,3])
net=tfl.layers.conv.conv_2d(net,nb_filter=3,filter_size=self.filter_size,activation='relu')
net=tfl.layers.conv.conv_2d(net,nb_filter=3,filter_size=self.filter_size,activation="relu")
#net=tfl.layers.fully_connected(net,20,activation="relu")
net=tfl.layers.flatten(net)
#net=tfl.layers.fully_connected(net,18,activation="relu")
net=tfl.layers.fully_connected(net,10,activation='relu')
net=tfl.layers.fully_connected(net,self.env.action_space.n,activation="linear")
net=tfl.layers.estimator.regression(net,learning_rate=learning_rate)
self.modell=tfl.DNN(net)
self.modell.load(model_path,weights_only=True)
def Q_Learning(self):
observation=self.env.reset()
for i in range(self.itertime):
#self.env.render()
observation=observation.reshape(1,210,160,3)
if np.random.rand()<=self.random_move_prop:
#print("Random step")
action=np.random.randint(low=0,high=self.env.action_space.n)
else:
#print("Random prediction") #for debugging usefull
action=self.modell.predict(observation)
action=np.argmax(action)
new_observation, reward, done, info=self.env.step(action)
self.storage.append((observation,action,reward,new_observation,done))
observation=new_observation
if done:
self.env.reset()
print("###############################################")
print("Done with observing!")
print("###############################################")
minibatch=random.sample(self.storage,self.minibatch_size)# take random observations from our data
x=np.zeros((self.minibatch_size,)+observation.shape)
y=np.zeros((self.minibatch_size,self.env.action_space.n))
for i in range(0,self.minibatch_size):
Observation=minibatch[i][0]
Action=minibatch[i][1]
Reward=minibatch[i][2]
New_observation=minibatch[i][3]
done=minibatch[i][4]
print("Processing batch data... (step:"+str(i)+" from "+str(self.minibatch_size)+")")
x[i:i+1]=Observation.reshape((1,)+observation.shape)
y[i]=self.modell.predict(Observation)
Q_sa=self.modell.predict(Observation)
if done:
y[i,action]=reward
else:
y[i,action]=reward+self.discounted_future_reward*np.max(Q_sa)
self.modell.fit_batch(x,y)
self.modell.save("X://xxx//xxx//xxx//SpaceInvaders1.tfl")
print("")
print("Modell fitting acomplished!")
print("")
def Q_predict(self,model_path="Your path here"):
self.Q_Network(load=True,model_path=model_path)
observation=self.env.reset()
observation=observation.reshape((1,)+observation.shape)
done=False
total_reward=0.0
while not done:
self.env.render()
Q=self.modell.predict(observation)
print(Q)
action=np.argmax(Q)
print(action)
new_observation,reward,done,info=self.env.step(action)
observation=new_observation
observation=new_observation.reshape((1,)+observation.shape)
total_reward+=reward
print("Game ends with a score of: "+str(total_reward))
print("")
The problem is that, if I run the predict function the network does nothing.
I figured out that all weights are filled with nan
. What I have read is that it can depend on the learning rate, so I have lowered the rate from 1e-3
to the actual one, but this changed nothing.
No hay solución correcta
Licenciado bajo: CC-BY-SA con atribución
No afiliado a datascience.stackexchange