Training a Tensorflow NN on observations (error)


#1

I am trying to make a NN in Tensorflow that trains on experience replay, but I’ve ran into an error that I can’t fix. Here’s my code:
import gym
import tensorflow as tf
import numpy as np

enviroment = 'CartPole-v0’
env = gym.make(enviroment)
x_train = []
y_train = []
max_reward = 1

for i_episode in range(100):
observation = env.reset()
for i in range(100):
env.render()
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if reward >= max_reward:
x_train.append(observation)
y_train.append(action)
max_reward = reward
if done:
break

n_hidden_1 = 50
n_hidden_2 = 50
n_input = 4
n_classes = 1

x = tf.placeholder(“float”)
y = tf.placeholder(“float”)

def multilayer_perceptron(x):
weights = {
‘h1’: tf.Variable(tf.random_normal([n_input, n_hidden_1])),
‘h2’: tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
‘out’: tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
‘b1’: tf.Variable(tf.random_normal([n_hidden_1])),
‘b2’: tf.Variable(tf.random_normal([n_hidden_2])),
‘out’: tf.Variable(tf.random_normal([n_classes]))
}

layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)

layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)

out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer

prediction = multilayer_perceptron(x)

loss = tf.reduce_sum(tf.square(prediction-y))
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(100):
sess.run(optimizer, {x:x_train, y:y_train})

with tf.Session() as sess:
sess.run(init)
env = gym.make(enviroment)
observation = env.reset
for t in range(1000):
env.render()
action = np.around(multilayer_perceptron(observation))
observation, reward, done, info = env.step(action)
if done:
break

And here’s the error:
Traceback (most recent call last):
File “D:\Kay\AI\Q-learning\Q-Learn.py”, line 69, in
action = np.around(multilayer_perceptron(observation))
File “D:\Kay\AI\Q-learning\Q-Learn.py”, line 44, in multilayer_perceptron
layer_1 = tf.add(tf.matmul(x, weights[‘h1’]), biases[‘b1’])
File “C:\Users\Katharina\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\math_ops.py”, line 1683, in matmul
a = ops.convert_to_tensor(a, name=“a”)
File “C:\Users\Katharina\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\ops.py”, line 669, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File “C:\Users\Katharina\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\constant_op.py”, line 176, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File “C:\Users\Katharina\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\constant_op.py”, line 165, in constant
tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File “C:\Users\Katharina\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\tensor_util.py”, line 441, in make_tensor_proto
tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
File “C:\Users\Katharina\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\tensor_util.py”, line 441, in
tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
File “C:\Users\Katharina\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\util\compat.py”, line 65, in as_bytes
(bytes_or_text,))
TypeError: Expected binary or unicode string, got <bound method Env.reset of <gym.envs.classic_control.cartpole.CartPoleEnv object at 0x000001EB2BEB9F28>>

I would very appreciate if you tell me whats wrong with my code. Thanks.


#2

change observation = env.reset to observation = env.reset()