burlap.domain.singleagent.gridworld.GridWorldDomain Java Examples

The following examples show how to use burlap.domain.singleagent.gridworld.GridWorldDomain. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GridWorldDQN.java    From burlap_caffe with Apache License 2.0 6 votes vote down vote up
public GridWorldDQN(String solverFile, double gamma) {

        //create the domain
        gwdg = new GridWorldDomain(11, 11);
        gwdg.setMapToFourRooms();
        rf = new UniformCostRF();
        tf = new SinglePFTF(PropositionalFunction.findPF(gwdg.generatePfs(), GridWorldDomain.PF_AT_LOCATION));
        gwdg.setRf(rf);
        gwdg.setTf(tf);
        domain = gwdg.generateDomain();

        goalCondition = new TFGoalCondition(tf);

        //set up the initial state of the task
        initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

        //set up the state hashing system for tabular algorithms
        hashingFactory = new SimpleHashableStateFactory();

        //set up the environment for learners algorithms
        env = new SimulatedEnvironment(domain, initialState);

        dqn = new DQN(solverFile, actionSet, new NNGridStateConverter(), gamma);
    }
 
Example #2
Source File: HelloGridWorld.java    From burlap_examples with MIT License 6 votes vote down vote up
public static void main(String[] args) {

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate
		SADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		State s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

		//create visualizer and explorer
		Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap());
		VisualExplorer exp = new VisualExplorer(domain, v, s);

		//set control keys to use w-s-a-d
		exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
		exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
		exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");
		exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");

		exp.initGUI();

	}
 
Example #3
Source File: BasicBehavior.java    From burlap_examples with MIT License 6 votes vote down vote up
public BasicBehavior(){
		gwdg = new GridWorldDomain(11, 11);
		gwdg.setMapToFourRooms();
		tf = new GridWorldTerminalFunction(10, 10);
		gwdg.setTf(tf);
		goalCondition = new TFGoalCondition(tf);
		domain = gwdg.generateDomain();

		initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));
		hashingFactory = new SimpleHashableStateFactory();

		env = new SimulatedEnvironment(domain, initialState);


//		VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap()));
//		observer.initGUI();
//		env.addObservers(observer);
	}
 
Example #4
Source File: TestHashing.java    From burlap with Apache License 2.0 5 votes vote down vote up
public void testSimpleHashFactoryLargeState(HashableStateFactory factory, int width, int numRandomStates, boolean moveLocObjects) {
	GridWorldDomain gw = new GridWorldDomain(width, width);
	SADomain domain = (SADomain)gw.generateDomain();
	State startState = this.generateLargeGW(domain, width);
	Set<HashableState> hashedStates = this.generateRandomStates(domain, startState, factory, width, numRandomStates, moveLocObjects);
	Set<Integer> hashes = new HashSet<Integer>();
	for (HashableState hs : hashedStates) {
		hashes.add(hs.hashCode());
	}
	System.out.println("Hashed states: " + hashedStates.size() + ", hashes: " + hashes.size());
}
 
Example #5
Source File: Main.java    From cs7641-assignment4 with MIT License 5 votes vote down vote up
/**
 * This method takes care of visualizing the grid, rewards, and specific policy on a nice
 * BURLAP-predefined GUI. I found this very useful to understand how the algorithm was working.
 */
private static void visualize(Problem map, ValueFunction valueFunction, Policy policy, State initialState, SADomain domain, HashableStateFactory hashingFactory, String title) {
	List<State> states = StateReachability.getReachableStates(initialState, domain, hashingFactory);
	ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(states, map.getWidth(), map.getWidth(), valueFunction, policy);
	gui.setTitle(title);
	gui.setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
	gui.initGUI();
}
 
Example #6
Source File: TestGridWorld.java    From burlap with Apache License 2.0 5 votes vote down vote up
public void assertPFs(State s, boolean[] expectedValues) {
	OOState os = (OOState)s;
	PropositionalFunction atLocation = domain.propFunction(GridWorldDomain.PF_AT_LOCATION);
	List<GroundedProp> gpAt = atLocation.allGroundings(os);
	Assert.assertEquals(1, gpAt.size());
	Assert.assertEquals(expectedValues[0], gpAt.get(0).isTrue((OOState)s));
	
	PropositionalFunction pfWallNorth = domain.propFunction(GridWorldDomain.PF_WALL_NORTH);
	List<GroundedProp> gpWallNorth = pfWallNorth.allGroundings(os);
	Assert.assertEquals(1, gpWallNorth.size());
	Assert.assertEquals(expectedValues[1], gpWallNorth.get(0).isTrue((OOState)s));
	
	
	PropositionalFunction pfWallSouth = domain.propFunction(GridWorldDomain.PF_WALL_SOUTH);
	List<GroundedProp> gpWallSouth = pfWallSouth.allGroundings(os);
	Assert.assertEquals(1, gpWallSouth.size());
	Assert.assertEquals(expectedValues[2], gpWallSouth.get(0).isTrue((OOState)s));
	
	
	PropositionalFunction pfWallEast = domain.propFunction(GridWorldDomain.PF_WALL_EAST);
	List<GroundedProp> gpWallEast = pfWallEast.allGroundings(os);
	Assert.assertEquals(1, gpWallEast.size());
	Assert.assertEquals(expectedValues[3], gpWallEast.get(0).isTrue((OOState)s));
	
	
	PropositionalFunction pfWallWest = domain.propFunction(GridWorldDomain.PF_WALL_WEST);
	List<GroundedProp> gpWallWest = pfWallWest.allGroundings(os);
	Assert.assertEquals(1, gpWallWest.size());
	Assert.assertEquals(expectedValues[4], gpWallWest.get(0).isTrue((OOState)s));
	

}
 
Example #7
Source File: TestGridWorld.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() {
	this.gw = new GridWorldDomain(11,11);
	gw.setMapToFourRooms();
	gw.setProbSucceedTransitionDynamics(1.0);
	this.domain = gw.generateDomain(); //generate the grid world domain
	
}
 
Example #8
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() {
	this.gw = new GridWorldDomain(11, 11);
	this.gw.setMapToFourRooms();
	this.gw.setRf(new UniformCostRF());
	TerminalFunction tf = new SinglePFTF(PropositionalFunction.findPF(gw.generatePfs(), PF_AT_LOCATION));
	this.gw.setTf(tf);
	this.domain = this.gw.generateDomain();
	this.goalCondition = new TFGoalCondition(tf);
	this.hashingFactory = new SimpleHashableStateFactory();
}
 
Example #9
Source File: AnalysisRunner.java    From omscs-cs7641-machine-learning-assignment-4 with GNU Lesser General Public License v3.0 5 votes vote down vote up
public void simpleValueFunctionVis(ValueFunction valueFunction, Policy p, 
		State initialState, Domain domain, HashableStateFactory hashingFactory, String title){

	List<State> allStates = StateReachability.getReachableStates(initialState,
			(SADomain)domain, hashingFactory);
	ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(
			allStates, valueFunction, p);
	gui.setTitle(title);
	gui.initGUI();

}
 
Example #10
Source File: QLTutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
public static void main(String[] args) {

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setMapToFourRooms();
		gwd.setProbSucceedTransitionDynamics(0.8);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//create environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);

		//create Q-learning
		QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(), 0.1, 0.1);

		//run Q-learning and store results in a list
		List<Episode> episodes = new ArrayList<Episode>(1000);
		for(int i = 0; i < 1000; i++){
			episodes.add(agent.runLearningEpisode(env));
			env.resetEnvironment();
		}

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, episodes);

	}
 
Example #11
Source File: VITutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
public static void main(String [] args){

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));
		gwd.setMapToFourRooms();

		//only go in intended directon 80% of the time
		gwd.setProbSucceedTransitionDynamics(0.8);

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//setup vi with 0.99 discount factor, a value
		//function initialization that initializes all states to value 0, and which will
		//run for 30 iterations over the state space
		VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(0.0), 30);

		//run planning from our initial state
		Policy p = vi.planFromState(s);

		//evaluate the policy with one roll out visualize the trajectory
		Episode ea = PolicyUtils.rollout(p, s, domain.getModel());

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));

	}
 
Example #12
Source File: IRLExample.java    From burlap_examples with MIT License 5 votes vote down vote up
/**
 * Creates a visual explorer that you can use to to record trajectories. Use the "`" key to reset to a random initial state
 * Use the wasd keys to move north south, east, and west, respectively. To enable recording,
 * first open up the shell and type: "rec -b" (you only need to type this one). Then you can move in the explorer as normal.
 * Each demonstration begins after an environment reset.
 * After each demonstration that you want to keep, go back to the shell and type "rec -r"
 * If you reset the environment before you type that,
 * the episode will be discarded. To temporarily view the episodes you've created, in the shell type "episode -v". To actually record your
 * episodes to file, type "rec -w path/to/save/directory base_file_name" For example "rec -w irl_demos demo"
 * A recommendation for examples is to record two demonstrations that both go to the pink cell while avoiding blue ones
 * and do so from two different start locations on the left (if you keep resetting the environment, it will change where the agent starts).
 */
public void launchExplorer(){
	SimulatedEnvironment env = new SimulatedEnvironment(this.domain, this.sg);
	VisualExplorer exp = new VisualExplorer(this.domain, env, this.v, 800, 800);
	exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
	exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
	exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");
	exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");

	//exp.enableEpisodeRecording("r", "f", "irlDemo");

	exp.initGUI();
}
 
Example #13
Source File: IRLExample.java    From burlap_examples with MIT License 5 votes vote down vote up
public IRLExample(){

		this.gwd = new GridWorldDomain(5 ,5);
		this.gwd.setNumberOfLocationTypes(5);
		gwd.makeEmptyMap();
		this.domain = gwd.generateDomain();
		State bs = this.basicState();
		this.sg = new LeftSideGen(5, bs);
		this.v = GridWorldVisualizer.getVisualizer(this.gwd.getMap());

	}
 
Example #14
Source File: GridWorldDQN.java    From burlap_caffe with Apache License 2.0 5 votes vote down vote up
@Override
public void vectorizeState(State state, FloatPointer input) {
    GridWorldState gwState = (GridWorldState) state;

    int width = gwdg.getWidth();

    input.fill(0);

    ObjectInstance agent = gwState.object(GridWorldDomain.CLASS_AGENT);
    int x = (Integer)agent.get(GridWorldDomain.VAR_X);
    int y = (Integer)agent.get(GridWorldDomain.VAR_Y);

    input.put((long)(y*width + x), 1);
}
 
Example #15
Source File: BasicBehavior.java    From burlap_examples with MIT License 4 votes vote down vote up
public void manualValueFunctionVis(ValueFunction valueFunction, Policy p){

		List<State> allStates = StateReachability.getReachableStates(initialState, domain, hashingFactory);

		//define color function
		LandmarkColorBlendInterpolation rb = new LandmarkColorBlendInterpolation();
		rb.addNextLandMark(0., Color.RED);
		rb.addNextLandMark(1., Color.BLUE);

		//define a 2D painter of state values, specifying which attributes correspond to the x and y coordinates of the canvas
		StateValuePainter2D svp = new StateValuePainter2D(rb);
		svp.setXYKeys("agent:x", "agent:y", new VariableDomain(0, 11), new VariableDomain(0, 11), 1, 1);

		//create our ValueFunctionVisualizer that paints for all states
		//using the ValueFunction source and the state value painter we defined
		ValueFunctionVisualizerGUI gui = new ValueFunctionVisualizerGUI(allStates, svp, valueFunction);

		//define a policy painter that uses arrow glyphs for each of the grid world actions
		PolicyGlyphPainter2D spp = new PolicyGlyphPainter2D();
		spp.setXYKeys("agent:x", "agent:y", new VariableDomain(0, 11), new VariableDomain(0, 11), 1, 1);

		spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_NORTH, new ArrowActionGlyph(0));
		spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_SOUTH, new ArrowActionGlyph(1));
		spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_EAST, new ArrowActionGlyph(2));
		spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_WEST, new ArrowActionGlyph(3));
		spp.setRenderStyle(PolicyGlyphPainter2D.PolicyGlyphRenderStyle.DISTSCALED);


		//add our policy renderer to it
		gui.setSpp(spp);
		gui.setPolicy(p);

		//set the background color for places where states are not rendered to grey
		gui.setBgColor(Color.GRAY);

		//start it
		gui.initGUI();



	}
 
Example #16
Source File: IRLExample.java    From burlap_examples with MIT License 4 votes vote down vote up
public LocationFeatures(OODomain domain, int numLocations){
	this.numLocations = numLocations;
	this.inLocationPF = domain.propFunction(GridWorldDomain.PF_AT_LOCATION);
}
 
Example #17
Source File: IRLExample.java    From burlap_examples with MIT License 4 votes vote down vote up
/**
 * Runs MLIRL on the trajectories stored in the "irlDemo" directory and then visualizes the learned reward function.
 */
public void runIRL(String pathToEpisodes){

	//create reward function features to use
	LocationFeatures features = new LocationFeatures(this.domain, 5);

	//create a reward function that is linear with respect to those features and has small random
	//parameter values to start
	LinearStateDifferentiableRF rf = new LinearStateDifferentiableRF(features, 5);
	for(int i = 0; i < rf.numParameters(); i++){
		rf.setParameter(i, RandomFactory.getMapped(0).nextDouble()*0.2 - 0.1);
	}

	//load our saved demonstrations from disk
	List<Episode> episodes = Episode.readEpisodes(pathToEpisodes);

	//use either DifferentiableVI or DifferentiableSparseSampling for planning. The latter enables receding horizon IRL,
	//but you will probably want to use a fairly large horizon for this kind of reward function.
	double beta = 10;
	//DifferentiableVI dplanner = new DifferentiableVI(this.domain, rf, 0.99, beta, new SimpleHashableStateFactory(), 0.01, 100);
	DifferentiableSparseSampling dplanner = new DifferentiableSparseSampling(this.domain, rf, 0.99, new SimpleHashableStateFactory(), 10, -1, beta);

	dplanner.toggleDebugPrinting(false);

	//define the IRL problem
	MLIRLRequest request = new MLIRLRequest(domain, dplanner, episodes, rf);
	request.setBoltzmannBeta(beta);

	//run MLIRL on it
	MLIRL irl = new MLIRL(request, 0.1, 0.1, 10);
	irl.performIRL();

	//get all states in the domain so we can visualize the learned reward function for them
	List<State> allStates = StateReachability.getReachableStates(basicState(), this.domain, new SimpleHashableStateFactory());

	//get a standard grid world value function visualizer, but give it StateRewardFunctionValue which returns the
	//reward value received upon reaching each state which will thereby let us render the reward function that is
	//learned rather than the value function for it.
	ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(
			allStates,
			5,
			5,
			new RewardValueProjection(rf),
			new GreedyQPolicy((QProvider) request.getPlanner())
	);

	gui.initGUI();


}
 
Example #18
Source File: Episode.java    From burlap with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	GridWorldDomain gwd = new GridWorldDomain(11, 11);
	SADomain domain = gwd.generateDomain();
	State s = new GridWorldState(new GridAgent(1, 3));

	Policy p = new RandomPolicy(domain);
	Episode ea = PolicyUtils.rollout(p, s, domain.getModel(), 30);

	String yamlOut = ea.serialize();

	System.out.println(yamlOut);

	System.out.println("\n\n");

	Episode read = Episode.parseEpisode(yamlOut);

	System.out.println(read.actionString());
	System.out.println(read.state(0).toString());
	System.out.println(read.actionSequence.size());
	System.out.println(read.stateSequence.size());

}
 
Example #19
Source File: GridAgent.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public String className() {
	return GridWorldDomain.CLASS_AGENT;
}
 
Example #20
Source File: GridLocation.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public String className() {
	return GridWorldDomain.CLASS_LOCATION;
}
 
Example #21
Source File: TestGridWorld.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void testGridWorld(State s) {
	ActionType northActionType = domain.getAction(GridWorldDomain.ACTION_NORTH);
	ActionType eastActionType = domain.getAction(GridWorldDomain.ACTION_EAST);
	ActionType southActionType = domain.getAction(GridWorldDomain.ACTION_SOUTH);
	ActionType westActionType = domain.getAction(GridWorldDomain.ACTION_WEST);
	
	List<Action> northActions = northActionType.allApplicableActions(s);
	Assert.assertEquals(1, northActions.size());
	
	List<Action> eastActions = eastActionType.allApplicableActions(s);
	Assert.assertEquals(1, eastActions.size());
	
	List<Action> southActions = southActionType.allApplicableActions(s);
	Assert.assertEquals(1, southActions.size());
	
	List<Action> westActions = westActionType.allApplicableActions(s);
	Assert.assertEquals(1, westActions.size());

	Action north = northActions.get(0);
	Action south = southActions.get(0);
	Action east = eastActions.get(0);
	Action west = westActions.get(0);
	
	// AtLocation, WallNorth, WallSouth, WallEast, WallWest
	this.assertPFs(s, new boolean[] {false, false, true, false, true});
	s = domain.getModel().sample(s, north).op;
	this.assertPFs(s, new boolean[] {false, false, false, false, true});
	s = domain.getModel().sample(s, east).op;
	this.assertPFs(s, new boolean[] {false, false, false, false, false});
	s = domain.getModel().sample(s, north).op;
	s = domain.getModel().sample(s, north).op;
	s = domain.getModel().sample(s, north).op;
	s = domain.getModel().sample(s, north).op;
	this.assertPFs(s, new boolean[] {false, false, false, true, true});
	s = domain.getModel().sample(s, north).op;
	s = domain.getModel().sample(s, east).op;
	s = domain.getModel().sample(s, east).op;
	s = domain.getModel().sample(s, east).op;
	this.assertPFs(s, new boolean[] {false, false, true, true, false});
	s = domain.getModel().sample(s, north).op;
	s = domain.getModel().sample(s, north).op;
	s = domain.getModel().sample(s, east).op;
	this.assertPFs(s, new boolean[] {false, true, true, false, false});
	s = domain.getModel().sample(s, east).op;
	s = domain.getModel().sample(s, north).op;
	s = domain.getModel().sample(s, north).op;
	this.assertPFs(s, new boolean[] {false, true, false, false, true});
	s = domain.getModel().sample(s, east).op;
	s = domain.getModel().sample(s, south).op;
	s = domain.getModel().sample(s, north).op;
	s = domain.getModel().sample(s, west).op;
	this.assertPFs(s, new boolean[] {false, true, false, false, true});
	s = domain.getModel().sample(s, east).op;
	s = domain.getModel().sample(s, east).op;
	s = domain.getModel().sample(s, east).op;
	s = domain.getModel().sample(s, east).op;
	this.assertPFs(s, new boolean[] {true, true, false, true, false});
}
 
Example #22
Source File: BasicBehavior.java    From burlap_examples with MIT License 3 votes vote down vote up
public void simpleValueFunctionVis(ValueFunction valueFunction, Policy p){

		List<State> allStates = StateReachability.getReachableStates(initialState, domain, hashingFactory);
		ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(allStates, 11, 11, valueFunction, p);
		gui.initGUI();

	}
 
Example #23
Source File: OptionsExample.java    From burlap_examples with MIT License 3 votes vote down vote up
public static void testOptions(){

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setMapToFourRooms();
		SADomain domain = gwd.generateDomain();

		Option swToNorth = createRoomOption("swToNorth", domain, 1, 5, 0, 0, 4, 4);
		Option swToEast = createRoomOption("swToEast", domain, 5, 1, 0, 0, 4, 4);

		Option seToWest = createRoomOption("seToWest", domain, 5, 1, 6, 0, 10, 3);
		Option seToNorth = createRoomOption("seToNorth", domain, 8, 4, 6, 0, 10, 3);

		Option neToSouth = createRoomOption("neToSouth", domain, 8, 4, 6, 5, 10, 10);
		Option neToWest = createRoomOption("neToWest", domain, 5, 8, 6, 5, 10, 10);

		Option nwToEast = createRoomOption("nwToEast", domain, 5, 8, 0, 6, 4, 10);
		Option nwToSouth = createRoomOption("nwToSouth", domain, 1, 5, 0, 6, 4, 10);

		List<Episode> episodes = new ArrayList<Episode>();

		episodes.add(optionExecuteResult(domain, swToNorth, new GridWorldState(0, 0)));
		episodes.add(optionExecuteResult(domain, swToEast, new GridWorldState(0, 0)));

		episodes.add(optionExecuteResult(domain, seToWest, new GridWorldState(10, 0)));
		episodes.add(optionExecuteResult(domain, seToNorth, new GridWorldState(10, 0)));

		episodes.add(optionExecuteResult(domain, neToSouth, new GridWorldState(10, 10)));
		episodes.add(optionExecuteResult(domain, neToWest, new GridWorldState(10, 10)));

		episodes.add(optionExecuteResult(domain, nwToEast, new GridWorldState(0, 10)));
		episodes.add(optionExecuteResult(domain, nwToSouth, new GridWorldState(0, 10)));


		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		EpisodeSequenceVisualizer evis = new EpisodeSequenceVisualizer(v, domain, episodes);


	}
 
Example #24
Source File: PlotTest.java    From burlap_examples with MIT License 2 votes vote down vote up
public static void main(String [] args){

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate

		//ends when the agent reaches a location
		final TerminalFunction tf = new SinglePFTF(
				PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));

		//reward function definition
		final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

		gw.setTf(tf);
		gw.setRf(rf);


		final OOSADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));



		//initial state generator
		final ConstantStateGenerator sg = new ConstantStateGenerator(s);


		//set up the state hashing system for looking up states
		final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory();


		/**
		 * Create factory for Q-learning agent
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-learning";
			}

			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		//define learning environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, sg);

		//define experiment
		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env,
				10, 100, qLearningFactory);

		exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);


		//start experiment
		exp.startExperiment();


	}