burlap.behavior.singleagent.Episode Java Examples

The following examples show how to use burlap.behavior.singleagent.Episode. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BeliefAgent.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Causes the agent to act for some fixed number of steps. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @param maxSteps the maximum number of steps to take in the environment
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminalOrMaxSteps(int maxSteps){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	int c = 0;
	while(!this.environment.isInTerminalState() && c < maxSteps){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);

		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
		
		c++;
		
	}
	
	return ea;
}
 
Example #2
Source File: LearningAlgorithmExperimenter.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Runs a trial for an agent generated by the given factor when interpreting trial length as a number of total steps.
 * @param agentFactory the agent factory used to generate the agent to test.
 */
protected void runStepBoundTrial(LearningAgentFactory agentFactory){
	
	//temporarily disable plotter data collection to avoid possible contamination for any actions taken by the agent generation
	//(e.g., if there is pre-test training)
	this.plotter.toggleDataCollection(false);
	
	LearningAgent agent = agentFactory.generateAgent();
	
	this.plotter.toggleDataCollection(true); //turn it back on to begin
	
	this.plotter.startNewTrial();
	
	int stepsRemaining = this.trialLength;
	while(stepsRemaining > 0){
		Episode ea = agent.runLearningEpisode(this.environmentSever, stepsRemaining);
		stepsRemaining -= ea.numTimeSteps()-1; //-1  because we want to subtract the number of actions, not the number of states seen
		this.plotter.endEpisode();
		this.environmentSever.resetEnvironment();
	}
	
	this.plotter.endTrial();
	
}
 
Example #3
Source File: ContinuousDomainTutorial.java    From burlap_examples with MIT License 6 votes vote down vote up
public static void IPSS(){

		InvertedPendulum ip = new InvertedPendulum();
		ip.physParams.actionNoise = 0.;
		RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
		TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
		ip.setRf(rf);
		ip.setTf(tf);
		SADomain domain = ip.generateDomain();

		State initialState = new InvertedPendulumState();

		SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
		ss.setForgetPreviousPlanResults(true);
		ss.toggleDebugPrinting(false);
		Policy p = new GreedyQPolicy(ss);

		Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
		System.out.println("Num steps: " + e.maxTimeStep());
		Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));

	}
 
Example #4
Source File: LSPI.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {

	Episode ea = maxSteps != -1 ? PolicyUtils.rollout(this.learningPolicy, env, maxSteps) : PolicyUtils.rollout(this.learningPolicy, env);

	this.updateDatasetWithLearningEpisode(ea);

	if(this.shouldRereunPolicyIteration(ea)){
		this.runPolicyIteration(this.maxNumPlanningIterations, this.maxChange);
		this.numStepsSinceLastLearningPI = 0;
	}
	else{
		this.numStepsSinceLastLearningPI += ea.numTimeSteps()-1;
	}

	if(episodeHistory.size() >= numEpisodesToStore){
		episodeHistory.poll();
	}
	episodeHistory.offer(ea);

	return ea;
}
 
Example #5
Source File: MLIRL.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Computes and returns the log-likelihood of the given trajectory under the current reward function parameters and weights it by the given weight.
 * @param ea the trajectory
 * @param weight the weight to assign the trajectory
 * @return the log-likelihood of the given trajectory under the current reward function parameters and weights it by the given weight.
 */
public double logLikelihoodOfTrajectory(Episode ea, double weight){
	double logLike = 0.;
	Policy p = new BoltzmannQPolicy((QProvider)this.request.getPlanner(), 1./this.request.getBoltzmannBeta());
	for(int i = 0; i < ea.numTimeSteps()-1; i++){
		this.request.getPlanner().planFromState(ea.state(i));
		double actProb = p.actionProb(ea.state(i), ea.action(i));
		logLike += Math.log(actProb);
	}
	logLike *= weight;
	return logLike;
}
 
Example #6
Source File: MLIRL.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Computes and returns the gradient of the log-likelihood of all trajectories
 * @return the gradient of the log-likelihood of all trajectories
 */
public FunctionGradient logLikelihoodGradient(){
	HashedAggregator<Integer> gradientSum = new HashedAggregator<Integer>();

	double [] weights = this.request.getEpisodeWeights();
	List<Episode> exampleTrajectories = this.request.getExpertEpisodes();

	for(int i = 0; i < exampleTrajectories.size(); i++){
		Episode ea = exampleTrajectories.get(i);
		double weight = weights[i];
		for(int t = 0; t < ea.numTimeSteps()-1; t++){
			this.request.getPlanner().planFromState(ea.state(t));
			FunctionGradient policyGrad = this.logPolicyGrad(ea.state(t), ea.action(t));
			//weigh it by trajectory strength
			for(FunctionGradient.PartialDerivative pd : policyGrad.getNonZeroPartialDerivatives()){
				double newVal = pd.value * weight;
				gradientSum.add(pd.parameterId, newVal);
			}

		}
	}

	FunctionGradient gradient = new FunctionGradient.SparseGradient(gradientSum.size());
	for(Map.Entry<Integer, Double> e : gradientSum.entrySet()){
		gradient.put(e.getKey(), e.getValue());
	}

	return gradient;
}
 
Example #7
Source File: MultipleIntentionsMLIRLRequest.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes
 * @param domain the domain of the problem
 * @param plannerFactory A {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.QGradientPlannerFactory} that produces {@link DifferentiableQFunction} objects.
 * @param expertEpisodes the expert trajectories
 * @param rf the {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF} model to use.
 * @param k the number of clusters
 */
public MultipleIntentionsMLIRLRequest(SADomain domain, QGradientPlannerFactory plannerFactory, List<Episode> expertEpisodes, DifferentiableRF rf, int k) {
	super(domain, null, expertEpisodes, rf);
	this.plannerFactory = plannerFactory;
	this.k = k;
	if(this.plannerFactory != null) {
		this.setPlanner((Planner) plannerFactory.generateDifferentiablePlannerForRequest(this));
	}
}
 
Example #8
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Test
public void testBFS() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));

	DeterministicPlanner planner = new BFS(this.domain, this.goalCondition, this.hashingFactory);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	Episode analysis = rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis, true);
}
 
Example #9
Source File: MacroAction.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double probabilityOfTermination(State s, Episode history) {
	if(history.actionSequence.size() >= actionSequence.size()){
		return 1.;
	}
	return 0.;
}
 
Example #10
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
public void evaluateEpisode(Episode analysis, Boolean expectOptimal) {
	if (expectOptimal) {
		Assert.assertEquals(this.gw.getHeight() + this.gw.getWidth() - 1, analysis.stateSequence.size());
		Assert.assertEquals(analysis.stateSequence.size()-1, analysis.actionSequence.size());
		Assert.assertEquals(analysis.actionSequence.size(), analysis.rewardSequence.size());
		Assert.assertEquals(-analysis.actionSequence.size(), analysis.discountedReturn(1.0), TestPlanning.delta);
	}

	Assert.assertEquals(true, domain.getModel().terminal(analysis.stateSequence.get(analysis.stateSequence.size()-1)));
	Assert.assertEquals(true, this.goalCondition.satisfies(analysis.stateSequence.get(analysis.stateSequence.size()-1)));
}
 
Example #11
Source File: PolicyUtils.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state
 * in the environment is reached or when the provided number of steps has been taken.
 * @param p the {@link Policy}
 * @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated.
 * @param numSteps the maximum number of steps to take in the environment.
 * @return An {@link Episode} object specifying the interaction with the environment.
 */
public static Episode rollout(Policy p, Environment env, int numSteps){

	Episode ea = new Episode(env.currentObservation());

	int nSteps;
	do{
		followAndRecordPolicy(p, env, ea);
		nSteps = ea.numTimeSteps();
	}while(!env.isInTerminalState() && nSteps < numSteps);

	return ea;
}
 
Example #12
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Test
public void testAStar() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));
	
	Heuristic mdistHeuristic = new Heuristic() {
		
		@Override
		public double h(State s) {

			GridAgent agent = ((GridWorldState)s).agent;
			GridLocation location = ((GridWorldState)s).locations.get(0);

			//get agent position
			int ax = agent.x;
			int ay = agent.y;
			
			//get location position
			int lx = location.x;
			int ly = location.y;
			
			//compute Manhattan distance
			double mdist = Math.abs(ax-lx) + Math.abs(ay-ly);
			
			return -mdist;
		}
	};
	
	//provide A* the heuristic as well as the reward function so that it can keep
	//track of the actual cost
	DeterministicPlanner planner = new AStar(domain, goalCondition,
		hashingFactory, mdistHeuristic);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	
	Episode analysis = PolicyUtils.rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis, true);
}
 
Example #13
Source File: LSPI.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Updates this object's {@link SARSData} to include the results of a learning episode.
 * @param ea the learning episode as an {@link Episode} object.
 */
protected void updateDatasetWithLearningEpisode(Episode ea){
	if(this.dataset == null){
		this.dataset = new SARSData(ea.numTimeSteps()-1);
	}
	for(int i = 0; i < ea.numTimeSteps()-1; i++){
		this.dataset.add(ea.state(i), ea.action(i), ea.reward(i+1), ea.state(i+1));
	}
}
 
Example #14
Source File: RTDP.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Performs Bellman updates only after a rollout is complete and in reverse order
 * @param initialState the initial state from which to plan
 */
protected void batchRTDP(State initialState){
	
	int totalStates = 0;
	
	int consecutiveSmallDeltas = 0;
	for(int i = 0; i < numRollouts; i++){
		
		Episode ea = PolicyUtils.rollout(rollOutPolicy, initialState, model, maxDepth);
		LinkedList <HashableState> orderedStates = new LinkedList<HashableState>();
		for(State s : ea.stateSequence){
			orderedStates.addFirst(this.stateHash(s));
		}
		
		double delta = this.performOrderedBellmanUpdates(orderedStates);
		totalStates += orderedStates.size();
		DPrint.cl(debugCode, "Pass: " + i + "; Num states: " + orderedStates.size() + " (total: " + totalStates + ")");
		
		if(delta < this.maxDelta){
			consecutiveSmallDeltas++;
			if(consecutiveSmallDeltas >= this.minNumRolloutsWithSmallValueChange){
				break;
			}
		}
		else{
			consecutiveSmallDeltas = 0;
		}
	}
	
	
}
 
Example #15
Source File: Option.java    From burlap with Apache License 2.0 5 votes vote down vote up
public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
	Random rand = RandomFactory.getMapped(0);
	State initial = env.currentObservation();
	State cur = initial;

	Episode episode = new Episode(cur);
	Episode history = new Episode(cur);
	double roll;
	double pT;
	int nsteps = 0;
	double r = 0.;
	double cd = 1.;
	do{
		Action a = o.policy(cur, history);
		EnvironmentOutcome eo = env.executeAction(a);
		nsteps++;
		r += cd*eo.r;
		cur = eo.op;
		cd *= discount;


		history.transition(a, eo.op, eo.r);

		AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
		episode.transition(annotatedAction, eo.op, r);


		pT = o.probabilityOfTermination(eo.op, history);
		roll = rand.nextDouble();

	}while(roll > pT && !env.isInTerminalState());

	EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);

	return eoo;

}
 
Example #16
Source File: QLTutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
public static void main(String[] args) {

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setMapToFourRooms();
		gwd.setProbSucceedTransitionDynamics(0.8);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//create environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);

		//create Q-learning
		QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(), 0.1, 0.1);

		//run Q-learning and store results in a list
		List<Episode> episodes = new ArrayList<Episode>(1000);
		for(int i = 0; i < 1000; i++){
			episodes.add(agent.runLearningEpisode(env));
			env.resetEnvironment();
		}

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, episodes);

	}
 
Example #17
Source File: QLTutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
	//initialize our episode object with the initial state of the environment
	Episode e = new Episode(env.currentObservation());

	//behave until a terminal state or max steps is reached
	State curState = env.currentObservation();
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		//select an action
		Action a = this.learningPolicy.action(curState);

		//take the action and observe outcome
		EnvironmentOutcome eo = env.executeAction(a);

		//record result
		e.transition(eo);

		//get the max Q value of the resulting state if it's not terminal, 0 otherwise
		double maxQ = eo.terminated ? 0. : this.value(eo.op);

		//update the old Q-value
		QValue oldQ = this.storedQ(curState, a);
		oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);


		//update state pointer to next environment state observed
		curState = eo.op;
		steps++;

	}

	return e;
}
 
Example #18
Source File: SubgoalOption.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double probabilityOfTermination(State s, Episode history) {
	if(terminationStates.satisfies(s) || !policy.definedFor(s)){
		return 1.;
	}
	return 0.;
}
 
Example #19
Source File: VITutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
public static void main(String [] args){

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));
		gwd.setMapToFourRooms();

		//only go in intended directon 80% of the time
		gwd.setProbSucceedTransitionDynamics(0.8);

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//setup vi with 0.99 discount factor, a value
		//function initialization that initializes all states to value 0, and which will
		//run for 30 iterations over the state space
		VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(0.0), 30);

		//run planning from our initial state
		Policy p = vi.planFromState(s);

		//evaluate the policy with one roll out visualize the trajectory
		Episode ea = PolicyUtils.rollout(p, s, domain.getModel());

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));

	}
 
Example #20
Source File: MLIRL.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Computes and returns the log-likelihood of all expert trajectories under the current reward function parameters.
 * @return the log-likelihood of all expert trajectories under the current reward function parameters.
 */
public double logLikelihood(){

	double [] weights = this.request.getEpisodeWeights();
	List<Episode> exampleTrajectories = this.request.getExpertEpisodes();

	double sum = 0.;
	for(int i = 0; i < exampleTrajectories.size(); i++){
		sum += this.logLikelihoodOfTrajectory(exampleTrajectories.get(i), weights[i]);
	}

	return sum;

}
 
Example #21
Source File: SubgoalOption.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public List<ActionProb> policyDistribution(State s, Episode history) {
	if(!(policy instanceof EnumerablePolicy)){
		throw new RuntimeException("SubgoalOption cannot return policy distribution because underlying policy is not an EnumberablePolicy");
	}
	return ((EnumerablePolicy)policy).policyDistribution(s);
}
 
Example #22
Source File: BeliefAgent.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Causes the agent to act until the environment reaches a termination condition. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminal(){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	while(!this.environment.isInTerminalState()){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);
		
		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
	}
	
	return ea;
}
 
Example #23
Source File: LSPI.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Returns whether LSPI should be rereun given the latest learning episode results. Default behavior is to return true
 * if the number of leanring episode steps plus the number of steps since the last run is greater than the {@link #numStepsSinceLastLearningPI} threshold.
 * @param ea the most recent learning episode
 * @return true if LSPI should be rerun; false otherwise.
 */
protected boolean shouldRereunPolicyIteration(Episode ea){
	if(this.numStepsSinceLastLearningPI+ea.numTimeSteps()-1 > this.minNewStepsForLearningPI){
		return true;
	}
	return false;
}
 
Example #24
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Test
public void testDFS() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));
	
	DeterministicPlanner planner = new DFS(this.domain, this.goalCondition, this.hashingFactory, -1 , true);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	Episode analysis = rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis);
}
 
Example #25
Source File: PotentialShapedRMax.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public Episode runLearningEpisode(Environment env) {
	return this.runLearningEpisode(env, -1);
}
 
Example #26
Source File: LSPI.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public Episode runLearningEpisode(Environment env) {
	return this.runLearningEpisode(env, -1);
}
 
Example #27
Source File: ApproximateQLearning.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public Episode runLearningEpisode(Environment env) {
	return this.runLearningEpisode(env, -1);
}
 
Example #28
Source File: ApprenticeshipLearning.java    From burlap with Apache License 2.0 4 votes vote down vote up
/**
 * Returns the initial state of a randomly chosen episode analysis
 * @param episodes the expert demonstrations
 * @return a random episode's initial state
 */
public static State getInitialState(List<Episode> episodes) {
	Random rando = new Random();
	Episode randomEpisode = episodes.get(rando.nextInt(episodes.size()));
	return randomEpisode.state(0);
}
 
Example #29
Source File: TestBlockDude.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void testDude(State s) {
	TerminalFunction tf = new BlockDudeTF();
	StateConditionTest sc = new TFGoalCondition(tf);

	AStar astar = new AStar(domain, sc, new SimpleHashableStateFactory(), new NullHeuristic());
	astar.toggleDebugPrinting(false);
	astar.planFromState(s);

	Policy p = new SDPlannerPolicy(astar);
	Episode ea = PolicyUtils.rollout(p, s, domain.getModel(), 100);

	State lastState = ea.stateSequence.get(ea.stateSequence.size() - 1);
	Assert.assertEquals(true, tf.isTerminal(lastState));
	Assert.assertEquals(true, sc.satisfies(lastState));
	Assert.assertEquals(-94.0, ea.discountedReturn(1.0), 0.001);

	/*
	BlockDude constructor = new BlockDude();
	Domain d = constructor.generateDomain();

	List<Integer> px = new ArrayList<Integer>();
	List <Integer> ph = new ArrayList<Integer>();

	ph.add(15);
	ph.add(3);
	ph.add(3);
	ph.add(3);
	ph.add(0);
	ph.add(0);
	ph.add(0);
	ph.add(1);
	ph.add(2);
	ph.add(0);
	ph.add(2);
	ph.add(3);
	ph.add(2);
	ph.add(2);
	ph.add(3);
	ph.add(3);
	ph.add(15);
	
	State o = BlockDude.getCleanState(d, px, ph, 6);
	o = BlockDude.setAgent(o, 9, 3, 1, 0);
	o = BlockDude.setExit(o, 1, 0);
	
	o = BlockDude.setBlock(o, 0, 5, 1);
	o = BlockDude.setBlock(o, 1, 6, 1);
	o = BlockDude.setBlock(o, 2, 14, 3);
	o = BlockDude.setBlock(o, 3, 16, 4);
	o = BlockDude.setBlock(o, 4, 17, 4);
	o = BlockDude.setBlock(o, 5, 17, 5);
	
	TerminalFunction tf = new SinglePFTF(d.getPropFunction(BlockDude.PFATEXIT));
	StateConditionTest sc = new SinglePFSCT(d.getPropFunction(BlockDude.PFATEXIT));

	RewardFunction rf = new UniformCostRF();

	AStar astar = new AStar(d, rf, sc, new DiscreteStateHashFactory(), new NullHeuristic());
	astar.toggleDebugPrinting(false);
	astar.planFromState(o);

	Policy p = new SDPlannerPolicy(astar);
	EpisodeAnalysis ea = p.evaluateBehavior(o, rf, tf, 100);

	State lastState = ea.stateSequence.get(ea.stateSequence.size() - 1);
	Assert.assertEquals(true, tf.isTerminal(lastState));
	Assert.assertEquals(true, sc.satisfies(lastState));
	Assert.assertEquals(-94.0, ea.getDiscountedReturn(1.0), 0.001);
	*/
}
 
Example #30
Source File: PotentialShapedRMax.java    From burlap with Apache License 2.0 4 votes vote down vote up
public List<Episode> getAllStoredLearningEpisodes() {
	return episodeHistory;
}