提交 4b4b5824 编写于 作者: H Haonan 提交者: emailweixu

preliminary implementations of the ComputationTask, Algorithm, and Model classes (#9)

* prelimary implementations of ComputationTask, Algorithm and Model classes

* remove "model_func" from the args of an algorithm

* a clean clone() function for Algorithm and Model

* add use_next_value as a input to learn()

* further re-structure

* added Feedforward and RLAlgorithm classes

* maxid -> argmax

* discrete_distribution -> category_distribution

* category -> categorical

* revisions
上级 2ce57115
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from parl.framework.algorithm import RLAlgorithm
import parl.layers as layers
import parl.framework.policy_distribution as pd
from parl.layers import common_functions as comf
import paddle.fluid as fluid
from copy import deepcopy
class SimpleAC(RLAlgorithm):
"""
A simple Actor-Critic that has a feedforward policy network and
a single discrete action.
learn() requires keywords: "action", "reward", "v_value"
"""
def __init__(self,
model,
hyperparas=dict(lr=1e-4),
gpu_id=-1,
discount_factor=0.99):
super(SimpleAC, self).__init__(model, hyperparas, gpu_id)
self.discount_factor = discount_factor
def learn(self, inputs, next_inputs, states, next_states, episode_end,
actions, rewards):
action = actions["action"]
reward = rewards["reward"]
values = self.model.value(inputs, states)
next_values = self.model.value(next_inputs, next_states)
value = values["v_value"]
next_value = next_values["v_value"] * episode_end["episode_end"]
next_value.stop_gradient = True
assert value.shape[1] == next_value.shape[1]
critic_value = reward + self.discount_factor * next_value
td_error = critic_value - value
value_cost = layers.square(td_error)
dist, _ = self.model.policy(inputs, states)
dist = dist["action"]
assert isinstance(dist, pd.CategoricalDistribution)
pg_cost = 0 - dist.loglikelihood(action)
avg_cost = layers.mean(x=value_cost + pg_cost * td_error)
optimizer = fluid.optimizer.SGD(learning_rate=self.hp["lr"])
optimizer.minimize(avg_cost)
return dict(cost=avg_cost)
class SimpleQ(RLAlgorithm):
"""
A simple Q-learning that has a feedforward policy network and a single discrete action.
learn() requires keywords: "action", "reward", "q_value"
"""
def __init__(self,
model,
hyperparas=dict(lr=1e-4),
gpu_id=-1,
discount_factor=0.99,
update_ref_interval=100):
super(SimpleQ, self).__init__(model, hyperparas, gpu_id)
self.discount_factor = discount_factor
self.gpu_id = gpu_id
assert update_ref_interval > 0
self.update_ref_interval = update_ref_interval
self.total_batches = 0
## create a reference model
self.ref_model = deepcopy(model)
def before_every_batch(self):
if self.total_batches % self.update_ref_interval == 0:
self.model.sync_paras_to(self.ref_model, self.gpu_id)
self.total_batches += 1
def learn(self, inputs, next_inputs, states, next_states, episode_end,
actions, rewards):
action = actions["action"]
reward = rewards["reward"]
values = self.model.value(inputs, states)
next_values = self.ref_model.value(next_inputs, next_states)
q_value = values["q_value"]
next_q_value = next_values["q_value"] * episode_end["episode_end"]
next_q_value.stop_gradient = True
next_value = layers.reduce_max(next_q_value, dim=-1)
assert q_value.shape[1] == next_q_value.shape[1]
num_actions = q_value.shape[1]
value = comf.idx_select(input=q_value, idx=action)
critic_value = reward + self.discount_factor * next_value
td_error = critic_value - value
avg_cost = layers.mean(x=layers.square(td_error))
optimizer = fluid.optimizer.SGD(learning_rate=self.hp["lr"])
optimizer.minimize(avg_cost)
return dict(cost=avg_cost)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import parl.layers as layers
from parl.layers import Network
import parl.framework.policy_distribution as pd
from abc import ABCMeta, abstractmethod
def check_duplicate_spec_names(model):
"""
Check if there are two specs that have the same name.
"""
specs = model.get_input_specs() \
+ model.get_action_specs() \
+ model.get_state_specs() \
+ model.get_reward_specs()
names = [name for name, _ in specs]
duplicates = set([n for n in names if names.count(n) > 1])
assert not duplicates, \
"duplicate names with different specs: " + " ".join(duplicates)
class Model(Network):
"""
A Model is owned by an Algorithm. It implements all the network model of
a specific problem.
"""
__metaclass__ = ABCMeta
def __init__(self):
super(Model, self).__init__()
@abstractmethod
def get_input_specs(self):
"""
Output: list of tuples
"""
pass
def get_state_specs(self):
"""
States are optional to a Model.
Output: list of tuples
"""
return []
@abstractmethod
def get_action_specs(self):
"""
Output: list of tuples
"""
pass
def get_reward_specs(self):
"""
By default, a scalar reward.
User can specify a vector of rewards for some problems
"""
return [("reward", dict(shape=[1]))]
def policy(self, inputs, states):
"""
Return: action_dists: a dict of action distribution objects
states
An action distribution object can be created with
PolicyDistribution().
Optional: a model might not always have to implement policy()
"""
raise NotImplementedError()
def value(self, inputs, states):
"""
Return: values: a dict of estimated values for the current observations and states
For example, "q_value" and "v_value"
Optional: a model might not always have to implement value()
"""
raise NotImplementedError()
class Algorithm(object):
"""
An Algorithm implements two functions:
1. predict() computes forward
2. learn() computes a cost for optimization
An algorithm should be only part of a network. The user only needs to
implement the rest of the network in the Model class.
"""
def __init__(self, model, hyperparas, gpu_id):
assert isinstance(model, Model)
check_duplicate_spec_names(model)
self.model = model
self.hp = hyperparas
self.gpu_id = gpu_id
def get_input_specs(self):
return self.model.get_input_specs()
def get_state_specs(self):
return self.model.get_state_specs()
def get_action_specs(self):
"""
For non-RL algortihms, this can return []
"""
return self.model.get_action_specs()
def get_reward_specs(self):
"""
For non-RL algortihms, this can return []
"""
return self.model.get_reward_specs()
def before_every_batch(self):
"""
A callback function inserted before every batch of training.
See ComputationTask.learn()
"""
pass
def after_every_batch(self):
"""
A callback function inserted after every batch of training.
See ComputationTask.learn()
"""
pass
def predict(self, inputs, states):
"""
Given the inputs and states, this function does forward prediction and updates states.
Optional: an algorithm might not implement predict()
"""
pass
def learn(self, inputs, next_inputs, states, next_states, episode_end,
actions, rewards):
"""
This function computes a learning cost to be optimized.
The return should be the cost.
Output: cost(dict)
Optional: an algorithm might not implement learn()
"""
pass
class RLAlgorithm(Algorithm):
"""
A derived Algorithm class specially for RL problems.
"""
def __init__(self, model, hyperparas, gpu_id):
super(RLAlgorithm, self).__init__(model, hyperparas, gpu_id)
def get_behavior_model(self):
"""
Return the behavior model to compute actions. The behavior model could be different
from the training model, which is common in off-policy RL algorithms.
The default behavior model is set to the training model. The user can override this
function to specify another different model.
"""
return self.model
def predict(self, inputs, states):
"""
Implementation of Algorithm.predict()
Given the inputs and states, this function predicts actions and updates states.
Input: inputs(dict), states(dict)
Output: actions(dict), states(dict)
"""
behavior_model = self.get_behavior_model()
distributions, states = behavior_model.policy(inputs, states)
actions = {}
for key, dist in distributions.iteritems():
assert isinstance(
dist, pd.PolicyDistribution
), "behavior_model.policy must return PolicyDist!"
actions[key] = dist()
return actions, states
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import parl.layers as layers
from parl.framework.algorithm import Model, Algorithm
def split_list(l, sizes):
"""
Split a list into several chunks, each chunk with a size in sizes
"""
chunks = []
offset = 0
for size in sizes:
chunks.append(l[offset:offset + size])
offset += size
return chunks
class ComputationTask(object):
"""
A ComputationTask is responsible for the general data flow
outside the algorithm
A ComputationTask is created in a bottom-up way:
a. create a Model
b. create an Algorithm with the model as an input
c. define a ComputationTask with the algorithm
"""
def __init__(self, algorithm):
assert isinstance(algorithm, Algorithm)
self.alg = algorithm
## create an Fluid executor
self._define_program()
place = fluid.CPUPlace() if self.alg.gpu_id < 0 \
else fluid.CUDAPlace(self.alg.gpu_id)
self.fluid_executor = fluid.Executor(place)
self.fluid_executor.run(fluid.default_startup_program())
def _create_data_layers(self, specs):
data_layers = {}
for name, args in specs:
data_layers[name] = layers.data(name, **args)
return data_layers
def _define_program(self):
self.learn_program = fluid.Program()
self.predict_program = fluid.Program()
def _get_next_specs(specs):
return [("next_" + spec[0], spec[1]) for spec in specs]
def _select_data(data_layer_dict, specs):
return {name: data_layer_dict[name] for name, _ in specs}
input_specs = self.alg.get_input_specs()
state_specs = self.alg.get_state_specs()
next_input_specs = _get_next_specs(input_specs)
next_state_specs = _get_next_specs(state_specs)
action_specs = self.alg.get_action_specs()
reward_specs = self.alg.get_reward_specs()
episode_end_specs = [("episode_end", dict(shape=[1]))]
self.action_names = sorted([name for name, _ in action_specs])
self.state_names = sorted([name for name, _ in state_specs])
with fluid.program_guard(self.predict_program):
data_layer_dict = self._create_data_layers(input_specs)
data_layer_dict.update(self._create_data_layers(state_specs))
self.predict_feed_names = sorted(data_layer_dict.keys())
inputs = _select_data(data_layer_dict, input_specs)
states = _select_data(data_layer_dict, state_specs)
### call alg predict()
pred_actions, pred_states = self.alg.predict(inputs, states)
self.predict_fetch = [pred_actions, pred_states]
with fluid.program_guard(self.learn_program):
data_layer_dict = self._create_data_layers(input_specs)
data_layer_dict.update(self._create_data_layers(state_specs))
data_layer_dict.update(self._create_data_layers(next_input_specs))
data_layer_dict.update(self._create_data_layers(next_state_specs))
data_layer_dict.update(self._create_data_layers(action_specs))
data_layer_dict.update(self._create_data_layers(reward_specs))
data_layer_dict.update(self._create_data_layers(episode_end_specs))
self.learn_feed_names = sorted(data_layer_dict.keys())
inputs = _select_data(data_layer_dict, input_specs)
states = _select_data(data_layer_dict, state_specs)
next_inputs = _select_data(data_layer_dict, next_input_specs)
next_states = _select_data(data_layer_dict, next_state_specs)
actions = _select_data(data_layer_dict, action_specs)
rewards = _select_data(data_layer_dict, reward_specs)
episode_end = _select_data(data_layer_dict, episode_end_specs)
## call alg learn()
### TODO: implement a recurrent layer to strip the sequence information
self.cost = self.alg.learn(inputs, next_inputs, states,
next_states, episode_end, actions,
rewards)
def predict(self, inputs, states=dict()):
"""
ComputationTask predict API
This function is responsible to convert Python data to Fluid tensors, and
then convert the computational results in the reverse way.
"""
data = {}
data.update(inputs)
data.update(states)
assert sorted(data.keys()) == self.predict_feed_names, \
"field names mismatch: %s %s" % (data.keys(), self.predict_feed_names)
feed = {n: data[n] for n in self.predict_feed_names}
### run the predict_program and fetch the computational results
action_tensors, state_tensors = self.predict_fetch
action_tensors = list(action_tensors.iteritems())
state_tensors = list(state_tensors.iteritems())
result = self.fluid_executor.run(
self.predict_program,
feed=feed,
fetch_list=[t for _, t in action_tensors + state_tensors])
## actions and states are numpy arrays
actions, states = split_list(
result, [len(action_tensors), len(state_tensors)])
## wrap the results into dictionaries for better access
actions = dict(zip([name for name, _ in action_tensors], actions))
states = dict(zip([name for name, _ in state_tensors], states))
assert sorted(actions.keys()) == self.action_names
assert sorted(states.keys()) == self.state_names
return actions, states
def learn(self,
inputs,
next_inputs,
episode_end,
actions,
rewards,
states=dict(),
next_states=dict()):
"""
ComputationTask learn API
This function is responsible to convert Python data to Fluid tensors, and
then convert the computational results in the reverse way.
"""
data = {}
data.update(inputs)
data.update(next_inputs)
data.update(states)
data.update(next_states)
data.update(episode_end)
data.update(actions)
data.update(rewards)
assert sorted(data.keys()) == self.learn_feed_names, \
"field names mismatch: %s %s" % ()
feed = {n: data[n] for n in self.learn_feed_names}
self.alg.before_every_batch()
## run the learn program and fetch the sole cost output
result = self.fluid_executor.run(self.learn_program,
feed=feed,
fetch_list=[self.cost["cost"]])
self.alg.after_every_batch()
return dict(cost=result[0])
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import parl.layers as layers
from paddle.fluid.framework import Variable
from parl.layers import common_functions as comf
from paddle.fluid.framework import convert_np_dtype_to_dtype_
from abc import ABCMeta, abstractmethod
class PolicyDistribution(object):
__metaclass__ = ABCMeta
def __init__(self, dist):
assert len(dist.shape) == 2
self.dim = dist.shape[1]
self.dist = dist
@abstractmethod
def __call__(self):
"""
Implement __call__ to sample an instance.
"""
pass
def dim(self):
"""
For discrete policies, this function returns the number of actions.
For continuous policies, this function returns the action vector length.
For sequential policies (e.g., sentences), this function returns the number
of choices at each step.
"""
return self.dim
def dist(self):
return self.dist
def loglikelihood(self, action):
"""
Given an action, this function returns the log likelihood of this action under
the current distribution.
"""
raise NotImplementedError()
class CategoricalDistribution(PolicyDistribution):
def __init__(self, dist):
super(CategoricalDistribution, self).__init__(dist)
def __call__(self):
return comf.categorical_random(self.dist)
def loglikelihood(self, action):
return 0 - layers.cross_entropy(input=self.dist, label=action)
class Deterministic(PolicyDistribution):
def __init__(self, dist):
super(Deterministic, self).__init__(dist)
## For deterministic action, we only support continuous ones
assert dist.dtype == convert_np_dtype_to_dtype_("float32") \
or dist.dtype == convert_np_dtype_to_dtype_("float64")
def __call__(self):
return self.dist
def loglikelihood(self, action):
assert False, "You cannot compute likelihood for a deterministic action!"
def q_categorical_distribution(q_value, exploration_rate=0.0):
"""
Generate a PolicyDistribution object given a Q value.
We first construct a one-hot distribution according to the Q value,
and then add an exploration rate to get a probability.
"""
assert len(q_value.shape) == 2, "[batch_size, num_actions]"
max_id = comf.argmax_layer(q_value)
prob = layers.cast(
x=layers.one_hot(
input=max_id, depth=q_value.shape[-1]),
dtype="float32")
### exploration_rate could be a Variable
if not (isinstance(exploration_rate, float) and exploration_rate == 0):
prob = exploration_rate / float(q_value.shape[-1]) \
+ (1 - exploration_rate) * prob
return CategoricalDistribution(prob)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import parl.layers as layers
from parl.framework.algorithm import Model, RLAlgorithm
from parl.layers import common_functions as comf
from parl.model_zoo.simple_models import SimpleModelDeterministic
import numpy as np
from copy import deepcopy
import unittest
class TestAlgorithm(RLAlgorithm):
def __init__(self, model):
super(TestAlgorithm, self).__init__(
model, hyperparas=dict(), gpu_id=-1)
class TestAlgorithmParas(unittest.TestCase):
def test_sync_paras_in_one_program(self):
"""
Test case for copying parameters
"""
alg1 = TestAlgorithm(model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
alg2 = deepcopy(alg1)
batch_size = 10
sensor = np.random.uniform(
0, 1, [batch_size, alg1.model.dims]).astype("float32")
program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(program, startup_program):
x = layers.data(name='x', shape=[alg1.model.dims], dtype="float32")
try:
# too eary to sync before the layers are created
alg1.model.sync_paras_to(alg2.model, alg2.gpu_id)
self.assertTrue(False) # you shouldn't be here
except:
pass
## first let the program generates the actual variables by using the
## layer functions (before this step the layers haven't been instantiated yet!)
## the call of predict() function already covers all the layers
y0, _ = alg1.predict(inputs=dict(sensor=x), states=dict())
y1, _ = alg2.predict(inputs=dict(sensor=x), states=dict())
######################
exe = fluid.Executor(fluid.CPUPlace())
exe.run(startup_program)
outputs = exe.run(
program,
feed={'x': sensor},
## y and y1 are two dictionaries
fetch_list=y0.values() + y1.values())
self.assertNotEqual(
np.sum(outputs[0].flatten()), np.sum(outputs[1].flatten()))
## do the copying
alg1.model.sync_paras_to(alg2.model, alg2.gpu_id)
outputs = exe.run(
program,
feed={'x': sensor},
## y and y1 are two dictionaries
fetch_list=y0.values() + y1.values())
self.assertEqual(
np.sum(outputs[0].flatten()), np.sum(outputs[1].flatten()))
def test_sync_paras_between_programs(self):
"""
Test case for copying parameters between two different programs
"""
alg1 = TestAlgorithm(model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
alg2 = deepcopy(alg1)
batch_size = 10
sensor = np.random.uniform(
0, 1, [batch_size, alg1.model.dims]).astype("float32")
startup_program = fluid.Program()
program1 = fluid.Program()
program2 = fluid.Program()
with fluid.program_guard(program1, startup_program):
x1 = layers.data(
name='x', shape=[alg1.model.dims], dtype="float32")
y1, _ = alg1.predict(inputs=dict(sensor=x1), states=dict())
with fluid.program_guard(program2, startup_program):
x2 = layers.data(
name='x', shape=[alg1.model.dims], dtype="float32")
y2, _ = alg2.predict(inputs=dict(sensor=x2), states=dict())
exe = fluid.Executor(fluid.CPUPlace())
exe.run(startup_program)
alg1.model.sync_paras_to(alg2.model, alg2.gpu_id)
outputs1 = exe.run(program1,
feed={'x': sensor},
fetch_list=y1.values())
outputs2 = exe.run(program2,
feed={'x': sensor},
fetch_list=y2.values())
self.assertEqual(
np.sum(outputs1[0].flatten()), np.sum(outputs2[0].flatten()))
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import parl.layers as layers
from parl.framework.algorithm import Model
from parl.framework.computation_task import ComputationTask
import parl.framework.policy_distribution as pd
from parl.layers import common_functions as comf
from parl.algorithm_zoo.simple_algorithms import SimpleAC, SimpleQ
from parl.model_zoo.simple_models import SimpleModelDeterministic, SimpleModelAC, SimpleModelQ
from test_algorithm import TestAlgorithm
import numpy as np
from copy import deepcopy
import unittest
import math
class TestModelCNN(Model):
def __init__(self, width, height, num_actions):
super(TestModelCNN, self).__init__()
self.conv = layers.conv2d(
num_filters=1, filter_size=3, bias_attr=False)
self.mlp = comf.MLP([
dict(
size=32, act="relu", bias_attr=False), dict(
size=16, act="relu", bias_attr=False), dict(
size=num_actions, act="softmax", bias_attr=False)
])
self.height = height
self.width = width
def get_input_specs(self):
## image format CHW
return [("image", dict(shape=[1, self.height, self.width]))]
def get_action_specs(self):
return [("action", dict(shape=[1], dtype="int64"))]
def policy(self, inputs, states):
conv = self.conv(input=inputs.values()[0])
dist = pd.CategoricalDistribution(self.mlp(conv))
return dict(action=dist), states
def value(self, inputs, states):
v_value = layers.fill_constant(
shape=[inputs.values()[0].shape[0], 1], dtype="float32", value=0)
return dict(v_value=v_value)
class TestComputationTask(unittest.TestCase):
def test_predict(self):
"""
Test case for AC-learning and Q-learning predictions
"""
num_actions = 4
def test(input, ct, max):
action_counter = [0] * num_actions
total = 2000
for i in range(total):
actions, states = ct.predict(inputs=input)
assert not states, "states should be empty"
## actions["action"] is a batch of actions
for a in actions["action"]:
action_counter[a[0]] += 1
if max:
### if max, the first action will always be chosen
for i in range(num_actions):
prob = action_counter[i] / float(sum(action_counter))
self.assertAlmostEqual(
prob, 1.0 if i == 0 else 0.0, places=1)
else:
### the actions should be uniform
for i in range(num_actions):
prob = action_counter[i] / float(sum(action_counter))
self.assertAlmostEqual(prob, 1.0 / num_actions, places=1)
dims = 100
ac = SimpleAC(model=SimpleModelAC(
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(
size=32, act="relu", bias_attr=False), dict(
size=16, act="relu", bias_attr=False), dict(
size=num_actions, act="softmax", bias_attr=False)
]))
ac_cnn = SimpleAC(model=TestModelCNN(
width=84, height=84, num_actions=num_actions))
q = SimpleQ(model=SimpleModelQ(
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(
size=32, act="relu", bias_attr=False), dict(
size=16, act="relu", bias_attr=False), dict(
size=num_actions, bias_attr=False)
]))
batch_size = 10
height, width = 84, 84
sensor = np.zeros([batch_size, dims]).astype("float32")
image = np.zeros([batch_size, 1, height, width]).astype("float32")
ct0 = ComputationTask(algorithm=ac)
ct1 = ComputationTask(algorithm=q)
ct2 = ComputationTask(algorithm=ac_cnn)
test(dict(sensor=sensor), ct0, max=False)
test(dict(sensor=sensor), ct1, max=True)
test(dict(image=image), ct2, max=False)
def test_ct_para_sharing(self):
"""
Test case for two CTs sharing parameters
"""
alg = TestAlgorithm(model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
ct0 = ComputationTask(algorithm=alg)
ct1 = ComputationTask(algorithm=alg)
batch_size = 10
sensor = np.random.uniform(
0, 1, [batch_size, alg.model.dims]).astype("float32")
outputs0, _ = ct0.predict(inputs=dict(sensor=sensor))
outputs1, _ = ct1.predict(inputs=dict(sensor=sensor))
self.assertEqual(
np.sum(outputs0["continuous_action"].flatten()),
np.sum(outputs1["continuous_action"].flatten()))
def test_ct_para_sync(self):
"""
Test case for two CTs copying parameters
"""
alg = TestAlgorithm(model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
ct0 = ComputationTask(algorithm=alg)
ct1 = ComputationTask(algorithm=deepcopy(alg))
batch_size = 10
sensor = np.random.uniform(
0, 1, [batch_size, ct0.alg.model.dims]).astype("float32")
outputs0, _ = ct0.predict(inputs=dict(sensor=sensor))
outputs1, _ = ct1.predict(inputs=dict(sensor=sensor))
self.assertNotEqual(
np.sum(outputs0["continuous_action"].flatten()),
np.sum(outputs1["continuous_action"].flatten()))
ct0.alg.model.sync_paras_to(ct1.alg.model, ct1.alg.gpu_id)
outputs0, _ = ct0.predict(inputs=dict(sensor=sensor))
outputs1, _ = ct1.predict(inputs=dict(sensor=sensor))
self.assertEqual(
np.sum(outputs0["continuous_action"].flatten()),
np.sum(outputs1["continuous_action"].flatten()))
def test_ct_learning(self):
"""
Test training
"""
num_actions = 2
dims = 100
batch_size = 8
sensor = np.ones(
[batch_size, dims]).astype("float32") / dims # normalize
next_sensor = np.zeros([batch_size, dims]).astype("float32")
for on_policy in [True, False]:
if on_policy:
alg = SimpleAC(
model=SimpleModelAC(
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(
size=64, act="relu", bias_attr=False), dict(
size=32, act="relu", bias_attr=False),
dict(
size=num_actions, act="softmax")
]),
hyperparas=dict(lr=1e-1))
ct = ComputationTask(algorithm=alg)
else:
alg = SimpleQ(
model=SimpleModelQ(
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(
size=64, act="relu", bias_attr=False), dict(
size=32, act="relu", bias_attr=False),
dict(size=num_actions)
]),
update_ref_interval=100,
hyperparas=dict(lr=1e-1))
ct = ComputationTask(algorithm=alg)
for i in range(1000):
if on_policy:
outputs, _ = ct.predict(inputs=dict(sensor=sensor))
actions = outputs["action"]
else:
## randomly assemble a batch
actions = np.random.choice(
[0, 1], size=(batch_size, 1),
p=[0.5, 0.5]).astype("int")
rewards = (1 - actions).astype("float32")
cost = ct.learn(
inputs=dict(sensor=sensor),
next_inputs=dict(next_sensor=next_sensor),
episode_end=dict(episode_end=np.ones(
(batch_size, 1)).astype("float32")),
actions=dict(action=actions),
rewards=dict(reward=rewards))
print("final cost: %f" % cost["cost"])
### the policy should bias towards the first action
outputs, _ = ct.predict(inputs=dict(sensor=sensor))
for a in outputs["action"]:
self.assertEqual(a[0], 0)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import parl.layers as layers
from paddle.fluid.framework import Variable
class Feedforward(layers.Network):
"""
A feedforward network can contain a sequence of components,
where each component can be either a LayerFunc or a Feedforward.
The purpose of this class is to create a collection of LayerFuncs that can
be easily copied from one Network to another.
Examples of feedforward networks can be MLP and CNN.
"""
def __init__(self, components):
for i in range(len(components)):
setattr(self, "ff%06d" % i, components[i])
def __call__(self, input):
attrs = {
attr: getattr(self, attr)
for attr in dir(self) if "ff" in attr
}
for k in sorted(attrs.keys()):
input = attrs[k](input)
return input
class MLP(Feedforward):
def __init__(self, multi_fc_layers):
super(MLP, self).__init__([layers.fc(**c) for c in multi_fc_layers])
class CNN(Feedforward):
"""
Image CNN
"""
def __init__(self, multi_conv_layers):
super(CNN, self).__init__(
[layers.conv2d(**c) for c in multi_conv_layers])
def categorical_random(prob):
"""
Sample an id based on categorical distribution prob
"""
cumsum = layers.cumsum(x=prob)
r = layers.uniform_random_batch_size_like(
input=prob, min=0., max=1., shape=[-1])
index = layers.reduce_sum(layers.cast(cumsum < r, 'int'), dim=-1)
index = layers.reshape(index, index.shape + (1, ))
return index
def argmax_layer(input):
"""
Get the id of the max val of an input vector
"""
_, index = layers.topk(input, 1)
return index
def inner_prod(x, y):
"""
Get the inner product of two vectors
"""
return layers.reduce_sum(layers.elementwise_mul(x, y), dim=-1)
def sum_to_one_norm_layer(input):
eps = 1e-9 # avoid dividing 0
sum = layers.reduce_sum(input + eps, dim=-1)
return layers.elementwise_div(x=input, y=sum, axis=0)
def idx_select(input, idx):
"""
Given an input vector (Variable) and an idx (int or Variable),
select the entry of the vector according to the idx.
"""
assert isinstance(input, Variable)
assert len(input.shape) == 2
batch_size, num_entries = input.shape
if isinstance(idx, int):
## if idx is a constant int, then we create a variable
idx = layers.fill_constant(
shape=[batch_size, 1], dtype="int64", value=idx)
else:
assert isinstance(idx, Variable)
assert input.shape
select = layers.cast(
x=layers.one_hot(
input=idx, depth=num_entries), dtype="float32")
return inner_prod(select, input)
......@@ -15,34 +15,16 @@
Wrappers for fluid.layers so that the layers can share parameters conveniently.
"""
from paddle.fluid.executor import fetch_var
import paddle.fluid as fluid
from paddle.fluid.layers import *
from paddle.fluid.param_attr import ParamAttr
import paddle.fluid.layers as layers
import paddle.fluid.unique_name as unique_name
import warnings
from copy import deepcopy
import inspect
class LayerFunc(object):
def __init__(self, param_attr=False, bias_attr=False):
self.param_attr = param_attr
self.bias_attr = bias_attr
@property
def param_name(self):
if self.param_attr:
return self.param_attr.name
else:
return None
@property
def bias_name(self):
if self.bias_attr:
return self.bias_attr.name
else:
return None
def update_attr_name(name, default_name, attr, is_bias):
"""
Update the name in an attribute
......@@ -73,13 +55,131 @@ def update_attr_name(name, default_name, attr, is_bias):
return check_or_replace_name(new_name, attr)
class LayerFunc(object):
def __init__(self, param_attr=False, bias_attr=False):
self.param_attr = param_attr
self.bias_attr = bias_attr
def sync_paras_to(self, target_layer, gpu_id):
"""
Copy the paras from self to a target layer
"""
## isinstance can handle subclass
assert isinstance(target_layer, LayerFunc)
src_attrs = [self.param_attr, self.bias_attr]
target_attrs = [target_layer.param_attr, target_layer.bias_attr]
place = fluid.CPUPlace() if gpu_id < 0 \
else fluid.CUDAPlace(gpu_id)
for i, attrs in enumerate(zip(src_attrs, target_attrs)):
src_attr, target_attr = attrs
assert (src_attr and target_attr) \
or (not src_attr and not target_attr)
if not src_attr:
continue
src_var = fetch_var(src_attr.name)
target_var = fetch_var(target_attr.name, return_numpy=False)
target_var.set(src_var, place)
def __deepcopy__(self, memo):
cls = self.__class__
## __new__ won't init the class, we need to do that ourselves
copied = cls.__new__(cls)
## record in the memo that self has been copied to avoid recursive copying
memo[id(self)] = copied
## first copy all content
for k, v in self.__dict__.iteritems():
setattr(copied, k, deepcopy(v, memo))
## then we need to create new para names for self.param_attr and self.bias_attr
def create_new_para_name(attr):
if attr:
assert attr.name, "attr should have a name already!"
## remove the last number id but keep the name key
name_key = "_".join(attr.name.split("_")[:-1])
attr.name = unique_name.generate(name_key)
create_new_para_name(copied.param_attr)
create_new_para_name(copied.bias_attr)
## We require the user to sync the parameter values later, because
## this deepcopy is supposed to be called only before the startup
## program. This function will cause the computation graph change, so
## it cannot be called during the execution.
return copied
@property
def param_name(self):
if self.param_attr:
return self.param_attr.name
else:
return None
@property
def bias_name(self):
if self.bias_attr:
return self.bias_attr.name
else:
return None
class Network(object):
"""
A Network is an unordered set of LayerFuncs or Networks.
"""
def sync_paras_to(self, target_net, gpu_id):
assert not target_net is self, "cannot copy between identical networks"
assert isinstance(target_net, Network)
assert self.__class__.__name__ == target_net.__class__.__name__, \
"must be the same class for para syncing!"
for attr in self.__dict__:
if not attr in target_net.__dict__:
continue
val = getattr(self, attr)
target_val = getattr(target_net, attr)
assert type(val) == type(target_val)
### TODO: sync paras recursively
if isinstance(val, Network) or isinstance(val, LayerFunc):
val.sync_paras_to(target_val, gpu_id)
elif isinstance(val, tuple) or isinstance(val, list) or isinstance(
val, set):
for v, tv in zip(val, target_val):
v.sync_paras_to(tv, gpu_id)
elif isinstance(val, dict):
for k in val.keys():
assert k in target_val
val[k].sync_paras_to(target_val[k], gpu_id)
else:
# for any other type, we do not copy
pass
def check_caller_name():
stack = inspect.stack()
## we trace back to the call stack and make sure Network.__init__ is on the path
called_by_init = False
for s in stack:
try:
the_class = s[0].f_locals["self"].__class__
the_method = s[0].f_code.co_name
if issubclass(the_class, Network) and the_method == "__init__":
called_by_init = True
except:
pass
assert called_by_init, "parl.layers can only be called in Network.__init__()!"
def fc(size,
num_flatten_dims=1,
param_attr=None,
bias_attr=None,
use_mkldnn=False,
act=None,
is_test=False,
name=None):
"""
Return a function that creates a paddle.fluid.layers.fc.
......@@ -87,17 +187,18 @@ def fc(size,
default_name = "fc"
param_attr = update_attr_name(name, default_name, param_attr, False)
bias_attr = update_attr_name(name, default_name, bias_attr, True)
check_caller_name()
class FC_(LayerFunc):
def __init__(self):
super(FC_, self).__init__(param_attr, bias_attr)
def __call__(self, input):
def __call__(self, input, is_test=False):
return layers.fc(input=input,
size=size,
num_flatten_dims=num_flatten_dims,
param_attr=param_attr,
bias_attr=bias_attr,
param_attr=self.param_attr,
bias_attr=self.bias_attr,
use_mkldnn=use_mkldnn,
act=act,
is_test=is_test)
......@@ -116,6 +217,7 @@ def embedding(size,
Return a function that creates a paddle.fluid.layers.embedding.
"""
param_attr = update_attr_name(name, "embedding", param_attr, False)
check_caller_name()
class Embedding_(LayerFunc):
def __init__(self):
......@@ -128,7 +230,7 @@ def embedding(size,
is_sparse=is_sparse,
is_distributed=is_distributed,
padding_idx=padding_idx,
param_attr=param_attr,
param_attr=self.param_attr,
dtype=dtype)
return Embedding_()
......@@ -150,6 +252,7 @@ def dynamic_lstm(size,
default_name = "dynamic_lstm"
param_attr = update_attr_name(name, default_name, param_attr, False)
bias_attr = update_attr_name(name, default_name, bias_attr, True)
check_caller_name()
class DynamicLstm_(LayerFunc):
def __init__(self):
......@@ -159,8 +262,8 @@ def dynamic_lstm(size,
return layers.dynamic_lstm(
input=input,
size=size,
param_attr=param_attr,
bias_attr=bias_attr,
param_attr=self.param_attr,
bias_attr=self.bias_attr,
use_peepholes=use_peepholes,
is_reverse=is_reverse,
gate_activation=gate_activation,
......@@ -189,6 +292,7 @@ def dynamic_lstmp(size,
default_name = "dynamic_lstmp"
param_attr = update_attr_name(name, default_name, param_attr, False)
bias_attr = update_attr_name(name, default_name, bias_attr, True)
check_caller_name()
class DynamicLstmp_(LayerFunc):
def __init__(self):
......@@ -199,8 +303,8 @@ def dynamic_lstmp(size,
input=input,
size=size,
proj_size=proj_size,
param_attr=param_attr,
bias_attr=bias_attr,
param_attr=self.param_attr,
bias_attr=self.bias_attr,
use_peepholes=use_peepholes,
is_reverse=is_reverse,
gate_activation=gate_activation,
......@@ -226,6 +330,7 @@ def dynamic_gru(size,
default_name = "dynamic_gru"
param_attr = update_attr_name(name, default_name, param_attr, False)
bias_attr = update_attr_name(name, default_name, bias_attr, True)
check_caller_name()
class DynamicGru_(LayerFunc):
def __init__(self):
......@@ -235,8 +340,8 @@ def dynamic_gru(size,
return layers.dynamic_gru(
input=input,
size=size,
param_attr=param_attr,
bias_attr=bias_attr,
param_attr=self.param_attr,
bias_attr=self.bias_attr,
is_reverse=is_reverse,
gate_activation=gate_activation,
candidate_activation=candidate_activation,
......@@ -274,6 +379,7 @@ def sequence_conv(num_filters,
default_name = "sequence_conv"
param_attr = update_attr_name(name, default_name, param_attr, False)
bias_attr = update_attr_name(name, default_name, bias_attr, True)
check_caller_name()
class SequenceConv_(LayerFunc):
def __init__(self):
......@@ -286,8 +392,8 @@ def sequence_conv(num_filters,
filter_size=filter_size,
filter_stride=filter_stride,
padding=padding,
bias_attr=bias_attr,
param_attr=param_attr,
bias_attr=self.bias_attr,
param_attr=self.param_attr,
act=act)
return SequenceConv_()
......@@ -311,6 +417,7 @@ def conv2d(num_filters,
default_name = "conv2d"
param_attr = update_attr_name(name, default_name, param_attr, False)
bias_attr = update_attr_name(name, default_name, bias_attr, True)
check_caller_name()
class Conv2D_(LayerFunc):
def __init__(self):
......@@ -325,8 +432,8 @@ def conv2d(num_filters,
padding=padding,
dilation=dilation,
groups=groups,
param_attr=param_attr,
bias_attr=bias_attr,
param_attr=self.param_attr,
bias_attr=self.bias_attr,
use_cudnn=use_cudnn,
use_mkldnn=use_mkldnn,
act=act)
......@@ -351,6 +458,7 @@ def conv2d_transpose(num_filters,
default_name = "conv2d_transpose"
param_attr = update_attr_name(name, default_name, param_attr, False)
bias_attr = update_attr_name(name, default_name, bias_attr, True)
check_caller_name()
class Conv2DTranspose_(LayerFunc):
def __init__(self):
......@@ -365,8 +473,8 @@ def conv2d_transpose(num_filters,
padding=padding,
stride=stride,
dilation=dilation,
param_attr=param_attr,
bias_attr=bias_attr,
param_attr=self.param_attr,
bias_attr=self.bias_attr,
use_cudnn=use_cudnn,
act=act)
......@@ -380,6 +488,7 @@ def lstm_unit(forget_bias=0.0, param_attr=None, bias_attr=None, name=None):
default_name = "lstm_unit"
param_attr = update_attr_name(name, default_name, param_attr, False)
bias_attr = update_attr_name(name, default_name, bias_attr, True)
check_caller_name()
class LstmUnit_(LayerFunc):
def __init__(self):
......@@ -391,8 +500,8 @@ def lstm_unit(forget_bias=0.0, param_attr=None, bias_attr=None, name=None):
hidden_t_prev=hidden_t_prev,
cell_t_prev=cell_t_prev,
forget_bias=forget_bias,
param_attr=param_attr,
bias_attr=bias_attr)
param_attr=self.param_attr,
bias_attr=self.bias_attr)
return LstmUnit_()
......@@ -406,6 +515,7 @@ def row_conv(future_context_size, param_attr=None, act=None, name=None):
Return a function that creates a paddle.fluid.layers.row_conv.
"""
param_attr = update_attr_name(name, "row_conv", param_attr, False)
check_caller_name()
class RowConv_(LayerFunc):
def __init__(self):
......@@ -415,7 +525,7 @@ def row_conv(future_context_size, param_attr=None, act=None, name=None):
return layers.row_conv(
input=input,
future_context_size=future_context_size,
param_attr=param_attr,
param_attr=self.param_attr,
act=act)
return RowConv_()
......
......@@ -14,10 +14,11 @@
import unittest
import parl.layers as layers
from parl.layers import Network
class TestParamName(unittest.TestCase):
def test_name_number(self):
class MyNetWork(Network):
def __init__(self):
self.fc1 = layers.fc(100)
self.fc2 = layers.fc(100)
self.fc3 = layers.fc(100, bias_attr=False)
......@@ -33,37 +34,36 @@ class TestParamName(unittest.TestCase):
filter_size=3,
param_attr=self.embedding.param_attr,
name="my_conv2d")
self.dynamic_grus = []
for i in range(5):
self.dynamic_grus.append(layers.dynamic_gru(50))
class TestParamName(unittest.TestCase):
def test_name_number(self):
net = MyNetWork()
## fc1 and fc2 have different parameters
self.assertEqual(self.fc1.param_name, "fc.w_0")
self.assertEqual(self.fc2.param_name, "fc.w_1")
self.assertEqual(net.fc1.param_name, "fc.w_0")
self.assertEqual(net.fc2.param_name, "fc.w_1")
## fc3 has no bias and fc4 has no param; so the names are None
self.assertEqual(self.fc3.bias_name, None)
self.assertEqual(self.fc4.param_name, None)
self.assertEqual(self.fc4.bias_name, "fc.b_3")
self.assertEqual(net.fc3.bias_name, None)
self.assertEqual(net.fc4.param_name, None)
self.assertEqual(net.fc4.bias_name, "fc.b_3")
## fc5 has a custom name without a bias
self.assertEqual(self.fc5.param_name, "fc.w_4")
self.assertEqual(self.fc5.bias_name, None)
self.assertEqual(net.fc5.param_name, "fc.w_4")
self.assertEqual(net.fc5.bias_name, None)
## embedding layer has no bias
self.assertEqual(self.embedding.param_name, "embedding.w_0")
self.assertEqual(self.embedding.bias_name, None)
self.assertEqual(net.embedding.param_name, "embedding.w_0")
self.assertEqual(net.embedding.bias_name, None)
## embedding layer with a custom name
self.assertEqual(self.embedding_custom.param_name,
self.assertEqual(net.embedding_custom.param_name,
"embedding_custom.w_0")
## conv2d shares param with embedding; has a custom bias name
self.assertEqual(self.conv2d.param_name, "embedding.w_0")
self.assertEqual(self.conv2d.bias_name, "my_conv2d.b_0")
for i, gru in enumerate(self.dynamic_grus):
self.assertEqual(gru.param_name, "dynamic_gru.w_%d" % i)
self.assertEqual(net.conv2d.param_name, "embedding.w_0")
self.assertEqual(net.conv2d.bias_name, "my_conv2d.b_0")
if __name__ == '__main__':
......
......@@ -14,45 +14,67 @@
import unittest
import parl.layers as layers
from parl.layers import Network
import paddle.fluid as fluid
import numpy as np
class TestParamSharing(unittest.TestCase):
def __init__(self, *args, **kwargs):
super(TestParamSharing, self).__init__(*args, **kwargs)
class MyNetWork(Network):
def __init__(self):
self.fc1 = layers.fc(64, bias_attr=False)
self.fc2 = layers.fc(64, bias_attr=False)
self.fc3 = layers.fc(64, name="fc")
self.fc4 = layers.fc(64, name="fc")
## we bind the paras of self.embedding to those of self.fc1
self.embedding = layers.embedding(
(100, 64), param_attr=self.fc1.param_attr)
class TestParamSharing(unittest.TestCase):
def test_param_sharing(self):
"""
Test case for parameter sharing between layers of the same type
"""
main_program = fluid.Program()
startup_program = fluid.Program()
net = MyNetWork()
## we bind the paras of embedding to those of fc1
batch_size = 10
dict_size = 100
input_cx = np.random.uniform(0, 1, [batch_size, 100]).astype("float32")
input_x = np.random.randint(
dict_size, size=(batch_size, 1)).astype("int")
#################################
with fluid.program_guard(main_program, startup_program):
main_program1 = fluid.Program()
with fluid.program_guard(main_program1):
x = layers.data(name='x', shape=[100], dtype="float32")
y1 = self.fc1(input=x)
y11 = self.fc1(input=x)
y2 = self.fc2(input=x)
y3 = self.fc3(input=x)
y4 = self.fc4(input=x)
y1 = net.fc1(input=x)
y11 = net.fc1(input=x)
y2 = net.fc2(input=x)
y3 = net.fc3(input=x)
y4 = net.fc4(input=x)
main_program2 = fluid.Program()
with fluid.program_guard(main_program2):
x_ = layers.data(name='x', shape=[1], dtype="int")
cx_ = layers.cast(
x=layers.one_hot(
input=x_, depth=dict_size), dtype="float32")
y1_ = net.fc1(input=cx_)
y2_ = net.embedding(input=x_)
x1_ = layers.data(name='x1', shape=[100], dtype="float32")
y3_ = net.fc1(input=x1_)
#### we run the startup program only once to make sure
#### only one para init across the two programs
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
batch_size = 10
input_x = np.random.uniform(0, 1, [batch_size, 100]).astype("float32")
outputs = exe.run(main_program,
feed={"x": input_x},
fetch_list=[y1, y11, y2, y3, y4])
exe.run(fluid.default_startup_program())
######################################################
outputs = exe.run(main_program1,
feed={"x": input_cx},
fetch_list=[y1, y11, y2, y3, y4])
old_y1 = outputs[0]
self.assertEqual(
np.sum(outputs[0].flatten()), np.sum(outputs[1].flatten()))
self.assertNotEqual(
......@@ -60,35 +82,17 @@ class TestParamSharing(unittest.TestCase):
self.assertNotEqual(
np.sum(outputs[3].flatten()), np.sum(outputs[4].flatten()))
def test_manual_param_sharing(self):
"""
Test case for parameter sharing between layers of different types
"""
batch_size = 10
dict_size = 100
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
x = layers.data(name='x', shape=[1], dtype="int")
cx = layers.cast(
x=layers.one_hot(
input=x, depth=dict_size), dtype="float32")
## remove bias because embedding layer does not have one
y1 = self.fc1(input=cx)
y2 = self.embedding(input=x)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
input_x = np.random.randint(
dict_size, size=(batch_size, 1)).astype("int")
outputs = exe.run(main_program,
feed={'x': input_x},
fetch_list=[y1, y2])
outputs = exe.run(main_program2,
feed={'x': input_x,
'x1': input_cx},
fetch_list=[y1_, y2_, y3_])
### test two different layers sharing the same para matrix
self.assertEqual(
np.sum(outputs[0].flatten()), np.sum(outputs[1].flatten()))
### test if the same layer can have the same parameters across two different programs
self.assertEqual(
np.sum(outputs[2].flatten()), np.sum(old_y1.flatten()))
if __name__ == "__main__":
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import parl.layers as layers
from parl.framework.algorithm import Model
import parl.framework.policy_distribution as pd
from parl.layers import common_functions as comf
class SimpleModelDeterministic(Model):
def __init__(self, dims, mlp_layer_confs):
super(SimpleModelDeterministic, self).__init__()
self.dims = dims
self.mlp = comf.MLP(mlp_layer_confs)
def get_input_specs(self):
return [("sensor", dict(shape=[self.dims]))]
def get_action_specs(self):
return [("continuous_action", dict(shape=[self.dims]))]
def policy(self, inputs, states):
hidden = self.mlp(inputs.values()[0])
return dict(continuous_action=pd.Deterministic(hidden)), states
class SimpleModelAC(Model):
def __init__(self, dims, num_actions, mlp_layer_confs):
super(SimpleModelAC, self).__init__()
self.dims = dims
assert mlp_layer_confs[-1]["act"] == "softmax"
self.mlp = comf.MLP(mlp_layer_confs[:-1])
self.policy_mlp = comf.MLP(mlp_layer_confs[-1:])
self.value_layer = layers.fc(size=1)
def get_input_specs(self):
return [("sensor", dict(shape=[self.dims]))]
def get_action_specs(self):
return [("action", dict(shape=[1], dtype="int64"))]
def _perceive(self, inputs, states):
return self.mlp(inputs.values()[0])
def policy(self, inputs, states):
dist = pd.CategoricalDistribution(
self.policy_mlp(self._perceive(inputs, states)))
return dict(action=dist), states
def value(self, inputs, states):
return dict(v_value=self.value_layer(self._perceive(inputs, states)))
class SimpleModelQ(Model):
def __init__(self,
dims,
num_actions,
mlp_layer_confs,
estimated_total_num_batches=0):
super(SimpleModelQ, self).__init__()
self.dims = dims
self.num_actions = num_actions
assert "act" not in mlp_layer_confs[-1], "should be linear act"
self.mlp = comf.MLP(mlp_layer_confs)
self.estimated_total_num_batches = estimated_total_num_batches
def get_input_specs(self):
return [("sensor", dict(shape=[self.dims]))]
def get_action_specs(self):
return [("action", dict(shape=[1], dtype="int64"))]
def policy(self, inputs, states):
values = self.value(inputs, states)
q_value = values["q_value"]
return dict(action=pd.q_categorical_distribution(q_value)), states
def value(self, inputs, states):
return dict(q_value=self.mlp(inputs.values()[0]))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册