burlap.mdp.auxiliary.stateconditiontest.TFGoalCondition Java Examples

The following examples show how to use burlap.mdp.auxiliary.stateconditiontest.TFGoalCondition. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GridWorldDQN.java    From burlap_caffe with Apache License 2.0 6 votes vote down vote up
public GridWorldDQN(String solverFile, double gamma) {

        //create the domain
        gwdg = new GridWorldDomain(11, 11);
        gwdg.setMapToFourRooms();
        rf = new UniformCostRF();
        tf = new SinglePFTF(PropositionalFunction.findPF(gwdg.generatePfs(), GridWorldDomain.PF_AT_LOCATION));
        gwdg.setRf(rf);
        gwdg.setTf(tf);
        domain = gwdg.generateDomain();

        goalCondition = new TFGoalCondition(tf);

        //set up the initial state of the task
        initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

        //set up the state hashing system for tabular algorithms
        hashingFactory = new SimpleHashableStateFactory();

        //set up the environment for learners algorithms
        env = new SimulatedEnvironment(domain, initialState);

        dqn = new DQN(solverFile, actionSet, new NNGridStateConverter(), gamma);
    }
 
Example #2
Source File: BasicBehavior.java    From burlap_examples with MIT License 6 votes vote down vote up
public BasicBehavior(){
		gwdg = new GridWorldDomain(11, 11);
		gwdg.setMapToFourRooms();
		tf = new GridWorldTerminalFunction(10, 10);
		gwdg.setTf(tf);
		goalCondition = new TFGoalCondition(tf);
		domain = gwdg.generateDomain();

		initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));
		hashingFactory = new SimpleHashableStateFactory();

		env = new SimulatedEnvironment(domain, initialState);


//		VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap()));
//		observer.initGUI();
//		env.addObservers(observer);
	}
 
Example #3
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() {
	this.gw = new GridWorldDomain(11, 11);
	this.gw.setMapToFourRooms();
	this.gw.setRf(new UniformCostRF());
	TerminalFunction tf = new SinglePFTF(PropositionalFunction.findPF(gw.generatePfs(), PF_AT_LOCATION));
	this.gw.setTf(tf);
	this.domain = this.gw.generateDomain();
	this.goalCondition = new TFGoalCondition(tf);
	this.hashingFactory = new SimpleHashableStateFactory();
}
 
Example #4
Source File: TestBlockDude.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void testDude(State s) {
	TerminalFunction tf = new BlockDudeTF();
	StateConditionTest sc = new TFGoalCondition(tf);

	AStar astar = new AStar(domain, sc, new SimpleHashableStateFactory(), new NullHeuristic());
	astar.toggleDebugPrinting(false);
	astar.planFromState(s);

	Policy p = new SDPlannerPolicy(astar);
	Episode ea = PolicyUtils.rollout(p, s, domain.getModel(), 100);

	State lastState = ea.stateSequence.get(ea.stateSequence.size() - 1);
	Assert.assertEquals(true, tf.isTerminal(lastState));
	Assert.assertEquals(true, sc.satisfies(lastState));
	Assert.assertEquals(-94.0, ea.discountedReturn(1.0), 0.001);

	/*
	BlockDude constructor = new BlockDude();
	Domain d = constructor.generateDomain();

	List<Integer> px = new ArrayList<Integer>();
	List <Integer> ph = new ArrayList<Integer>();

	ph.add(15);
	ph.add(3);
	ph.add(3);
	ph.add(3);
	ph.add(0);
	ph.add(0);
	ph.add(0);
	ph.add(1);
	ph.add(2);
	ph.add(0);
	ph.add(2);
	ph.add(3);
	ph.add(2);
	ph.add(2);
	ph.add(3);
	ph.add(3);
	ph.add(15);
	
	State o = BlockDude.getCleanState(d, px, ph, 6);
	o = BlockDude.setAgent(o, 9, 3, 1, 0);
	o = BlockDude.setExit(o, 1, 0);
	
	o = BlockDude.setBlock(o, 0, 5, 1);
	o = BlockDude.setBlock(o, 1, 6, 1);
	o = BlockDude.setBlock(o, 2, 14, 3);
	o = BlockDude.setBlock(o, 3, 16, 4);
	o = BlockDude.setBlock(o, 4, 17, 4);
	o = BlockDude.setBlock(o, 5, 17, 5);
	
	TerminalFunction tf = new SinglePFTF(d.getPropFunction(BlockDude.PFATEXIT));
	StateConditionTest sc = new SinglePFSCT(d.getPropFunction(BlockDude.PFATEXIT));

	RewardFunction rf = new UniformCostRF();

	AStar astar = new AStar(d, rf, sc, new DiscreteStateHashFactory(), new NullHeuristic());
	astar.toggleDebugPrinting(false);
	astar.planFromState(o);

	Policy p = new SDPlannerPolicy(astar);
	EpisodeAnalysis ea = p.evaluateBehavior(o, rf, tf, 100);

	State lastState = ea.stateSequence.get(ea.stateSequence.size() - 1);
	Assert.assertEquals(true, tf.isTerminal(lastState));
	Assert.assertEquals(true, sc.satisfies(lastState));
	Assert.assertEquals(-94.0, ea.getDiscountedReturn(1.0), 0.001);
	*/
}
 
Example #5
Source File: PlotTest.java    From burlap_examples with MIT License 2 votes vote down vote up
public static void main(String [] args){

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate

		//ends when the agent reaches a location
		final TerminalFunction tf = new SinglePFTF(
				PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));

		//reward function definition
		final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

		gw.setTf(tf);
		gw.setRf(rf);


		final OOSADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));



		//initial state generator
		final ConstantStateGenerator sg = new ConstantStateGenerator(s);


		//set up the state hashing system for looking up states
		final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory();


		/**
		 * Create factory for Q-learning agent
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-learning";
			}

			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		//define learning environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, sg);

		//define experiment
		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env,
				10, 100, qLearningFactory);

		exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);


		//start experiment
		exp.startExperiment();


	}
 
Example #6
Source File: GoalBasedRF.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes with transitions to goal states, indicated by the terminal function, returning a reward of 1 and all others returning 0
 * @param tf {@link TerminalFunction} object that specifies goal states. 
 */
public GoalBasedRF(TerminalFunction tf) {
	this(new TFGoalCondition(tf));
}
 
Example #7
Source File: GoalBasedRF.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes with transitions to goal states, indicated by the terminal function, returning the given reward and all others returning 0.
 * @param tf {@link TerminalFunction} object that specifies goal states.
 * @param goalReward the reward returned for transitions to goal states.
 */
public GoalBasedRF(TerminalFunction tf, double goalReward) {
	this(new TFGoalCondition(tf), goalReward);
}
 
Example #8
Source File: GoalBasedRF.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes with transitions to goal states, indicated by the terminal function, returning the given reward and all others returning 0.
 * @param tf {@link TerminalFunction} object that specifies goal states.
 * @param goalReward the reward returned for transitions to goal states.
 * @param defaultReward the default reward returned for all non-goal state transitions.
 */
public GoalBasedRF(TerminalFunction tf, double goalReward, double defaultReward) {
	this(new TFGoalCondition(tf), goalReward, defaultReward);
}