init(env, agent) -->
init_agent(agent, init_env(env))
start(env, agent) --> sensation, next_action
s = env_start(env)
a = agent_start(agent, s)
return s, a
step(env, agent, next_action) --> reward, sensation, next_action
r, s = env_step(env, next_action)
a = agent_step(agent, r, s)
return r, s, a
steps(env, agent, next_action, num_steps)
--> [reward, sensation, action, ..., next_action]
episode(env, agent) --> [sensation, action, reward, ..., r_T]
s, a = start(agent, env)
list = [s, a]
while s != terminal:
r, s, a = step(env, agent, a)
list = list + [r, s, a]
return list minus last two elements
episodes(env, agent,
num_episodes
) --> [episode_1, ..., episode_num_episodes]
Sim(env, agent) --> sim
init_agent(agent, init_env(env))
return [agent, env, Null]
env(sim) --> env
agent(sim) --> agent
next_action(sim) --> next_action
next_action(sim) = a
sim_start(sim) --> sensation, action
s = env_start(
env(sim)
)
a = agent_start(
agent(sim)
, s)
next_action(sim) = a
return s, a
sim_step(sim) --> reward, state, action
r, s = env_step(
env(sim)
, next_action(sim))
a = agent_step(
agent(sim)
, r, s)
next_action(sim) = a
return r, s, a
sim_steps(sim,num_steps) --> [reward, state, action, reward, ..., action]
sim_episode(sim) --> [s_0, a_0, r_1, s_1, ..., r_T]
s, a = start(agent
(sim)
, env
(sim)
)
list = [s, a]
while s != terminal:
r, s, a = sim_step(sim)
list = list + [r, s, a]
return list minus last two elements
sim_episodes(sim,num_episodes) --> [episode_1, ..., episode_num_episodes]