1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
| class Actor(object): def __init__(self, sess, s_dim, a_bound, lr): self.sess = sess
self.s = tf.placeholder(tf.float32, shape=(1, s_dim), name='s') self.a = tf.placeholder(tf.float32, shape=(), name='a') self.td_error = tf.placeholder(tf.float32, shape=(), name='td_error')
l1 = tf.layers.dense(inputs=self.s, units=30, activation=tf.nn.relu, **initializer_helper)
mu = tf.layers.dense(inputs=l1, units=1, activation=tf.nn.tanh, **initializer_helper) sigma = tf.layers.dense(inputs=l1, units=1, activation=tf.nn.softplus, **initializer_helper)
mu, sigma = tf.squeeze(mu * a_bound), tf.squeeze(sigma + 1)
self.normal_dist = tf.distributions.Normal(mu, sigma) self.action = tf.clip_by_value(self.normal_dist.sample(1), -a_bound, a_bound)
loss = self.normal_dist.log_prob(self.a) * self.td_error
self.optimizer = tf.train.AdamOptimizer(lr).minimize(-loss)
def learn(self, s, a, td_error): self.sess.run(self.optimizer, feed_dict={ self.s: s[np.newaxis, :], self.a: a, self.td_error: td_error })
def choose_action(self, s): return self.sess.run(self.action, { self.s: s[np.newaxis, :] }).squeeze()
|