burlap.mdp.singleagent.environment.SimulatedEnvironment Java Examples

The following examples show how to use burlap.mdp.singleagent.environment.SimulatedEnvironment. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java    From cs7641-assignment4 with MIT License 6 votes vote down vote up
/**
 * Runs a learning experiment and shows some cool charts. Apparently, this is only useful for
 * Q-Learning, so I only call this method when Q-Learning is selected and the appropriate flag
 * is enabled.
 */
private static void learningExperimenter(Problem problem, LearningAgent agent, SimulatedEnvironment simulatedEnvironment) {
	LearningAlgorithmExperimenter experimenter = new LearningAlgorithmExperimenter(simulatedEnvironment, 10, problem.getNumberOfIterations(Algorithm.QLearning), new LearningAgentFactory() {

		public String getAgentName() {
			return Algorithm.QLearning.getTitle();
		}

		public LearningAgent generateAgent() {
			return agent;
		}
	});

	/*
	 * Try different PerformanceMetric values below to display different charts.
	 */
	experimenter.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);
	experimenter.startExperiment();
}
 
Example #2
Source File: GridWorldDQN.java    From burlap_caffe with Apache License 2.0 6 votes vote down vote up
public GridWorldDQN(String solverFile, double gamma) {

        //create the domain
        gwdg = new GridWorldDomain(11, 11);
        gwdg.setMapToFourRooms();
        rf = new UniformCostRF();
        tf = new SinglePFTF(PropositionalFunction.findPF(gwdg.generatePfs(), GridWorldDomain.PF_AT_LOCATION));
        gwdg.setRf(rf);
        gwdg.setTf(tf);
        domain = gwdg.generateDomain();

        goalCondition = new TFGoalCondition(tf);

        //set up the initial state of the task
        initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

        //set up the state hashing system for tabular algorithms
        hashingFactory = new SimpleHashableStateFactory();

        //set up the environment for learners algorithms
        env = new SimulatedEnvironment(domain, initialState);

        dqn = new DQN(solverFile, actionSet, new NNGridStateConverter(), gamma);
    }
 
Example #3
Source File: GradientDescentSarsaLam.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily
 * selects the action with the highest Q-value and breaks ties uniformly randomly.
 * @param initialState the initial state of the planning problem
 * @return a {@link burlap.behavior.policy.GreedyQPolicy}.
 */
@Override
public GreedyQPolicy planFromState(State initialState) {

	if(this.model == null){
		throw new RuntimeException("Planning requires a model, but none is provided.");
	}

	SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState);

	int eCount = 0;
	do{
		this.runLearningEpisode(env);
		eCount++;
	}while(eCount < numEpisodesForPlanning && maxWeightChangeInLastEpisode > maxWeightChangeForPlanningTermination);

	return new GreedyQPolicy(this);

}
 
Example #4
Source File: QLearning.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily
 * selects the action with the highest Q-value and breaks ties uniformly randomly.
 * @param initialState the initial state of the planning problem
 * @return a {@link burlap.behavior.policy.GreedyQPolicy}.
 */
@Override
public GreedyQPolicy planFromState(State initialState) {

	if(this.model == null){
		throw new RuntimeException("QLearning (and its subclasses) cannot execute planFromState because a model is not specified.");
	}

	SimulatedEnvironment env = new SimulatedEnvironment(this.domain, initialState);

	int eCount = 0;
	do{
		this.runLearningEpisode(env, this.maxEpisodeSize);
		eCount++;
	}while(eCount < numEpisodesForPlanning && maxQChangeInLastEpisode > maxQChangeForPlanningTermination);


	return new GreedyQPolicy(this);

}
 
Example #5
Source File: BasicBehavior.java    From burlap_examples with MIT License 6 votes vote down vote up
public BasicBehavior(){
		gwdg = new GridWorldDomain(11, 11);
		gwdg.setMapToFourRooms();
		tf = new GridWorldTerminalFunction(10, 10);
		gwdg.setTf(tf);
		goalCondition = new TFGoalCondition(tf);
		domain = gwdg.generateDomain();

		initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));
		hashingFactory = new SimpleHashableStateFactory();

		env = new SimulatedEnvironment(domain, initialState);


//		VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap()));
//		observer.initGUI();
//		env.addObservers(observer);
	}
 
Example #6
Source File: ExampleGridWorld.java    From burlap_examples with MIT License 6 votes vote down vote up
public static void main(String [] args){

		ExampleGridWorld gen = new ExampleGridWorld();
		gen.setGoalLocation(10, 10);
		SADomain domain = gen.generateDomain();
		State initialState = new EXGridState(0, 0);
		SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState);

		Visualizer v = gen.getVisualizer();
		VisualExplorer exp = new VisualExplorer(domain, env, v);

		exp.addKeyAction("w", ACTION_NORTH, "");
		exp.addKeyAction("s", ACTION_SOUTH, "");
		exp.addKeyAction("d", ACTION_EAST, "");
		exp.addKeyAction("a", ACTION_WEST, "");

		exp.initGUI();


	}
 
Example #7
Source File: Main.java    From cs7641-assignment4 with MIT License 5 votes vote down vote up
/**
 * Here is where the magic happens. In this method is where I loop through the specific number
 * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use
 * this method to run all three algorithms. The specific details are specified through the
 * PlannerFactory interface.
 * 
 * This method collects all the information from the algorithm and packs it in an Analysis
 * instance that later gets dumped on the console.
 */
private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) {
	ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState);
	SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator);
	Planner planner = null;
	Policy policy = null;
	for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) {
		long startTime = System.nanoTime();
		planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment);
		policy = planner.planFromState(initialState);

		/*
		 * If we haven't converged, following the policy will lead the agent wandering around
		 * and it might never reach the goal. To avoid this, we need to set the maximum number
		 * of steps to take before terminating the policy rollout. I decided to set this maximum
		 * at the number of grid locations in our map (width * width). This should give the
		 * agent plenty of room to wander around.
		 * 
		 * The smaller this number is, the faster the algorithm will run.
		 */
		int maxNumberOfSteps = problem.getWidth() * problem.getWidth();

		Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps);
		analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000);
	}

	if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) {
		learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment);
	}

	if (SHOW_VISUALIZATION && planner != null && policy != null) {
		visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle());
	}
}
 
Example #8
Source File: IRLExample.java    From burlap_examples with MIT License 5 votes vote down vote up
/**
 * Creates a visual explorer that you can use to to record trajectories. Use the "`" key to reset to a random initial state
 * Use the wasd keys to move north south, east, and west, respectively. To enable recording,
 * first open up the shell and type: "rec -b" (you only need to type this one). Then you can move in the explorer as normal.
 * Each demonstration begins after an environment reset.
 * After each demonstration that you want to keep, go back to the shell and type "rec -r"
 * If you reset the environment before you type that,
 * the episode will be discarded. To temporarily view the episodes you've created, in the shell type "episode -v". To actually record your
 * episodes to file, type "rec -w path/to/save/directory base_file_name" For example "rec -w irl_demos demo"
 * A recommendation for examples is to record two demonstrations that both go to the pink cell while avoiding blue ones
 * and do so from two different start locations on the left (if you keep resetting the environment, it will change where the agent starts).
 */
public void launchExplorer(){
	SimulatedEnvironment env = new SimulatedEnvironment(this.domain, this.sg);
	VisualExplorer exp = new VisualExplorer(this.domain, env, this.v, 800, 800);
	exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
	exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
	exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");
	exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");

	//exp.enableEpisodeRecording("r", "f", "irlDemo");

	exp.initGUI();
}
 
Example #9
Source File: MCVideo.java    From burlap_examples with MIT License 5 votes vote down vote up
public static void main(String[] args) {

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		NormalizedVariableFeatures features = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));
		FourierBasis fb = new FourierBasis(features, 4);

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();

		SimulatedEnvironment env = new SimulatedEnvironment(domain,
				new MCState(mcGen.physParams.valleyPos(), 0));
		EnvironmentServer envServ = new EnvironmentServer(env, vob);

		for(int i = 0; i < 100; i++){
			PolicyUtils.rollout(p, envServ);
			envServ.resetEnvironment();
		}

		System.out.println("Finished");

	}
 
Example #10
Source File: QLTutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
public static void main(String[] args) {

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setMapToFourRooms();
		gwd.setProbSucceedTransitionDynamics(0.8);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//create environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);

		//create Q-learning
		QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(), 0.1, 0.1);

		//run Q-learning and store results in a list
		List<Episode> episodes = new ArrayList<Episode>(1000);
		for(int i = 0; i < 1000; i++){
			episodes.add(agent.runLearningEpisode(env));
			env.resetEnvironment();
		}

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, episodes);

	}
 
Example #11
Source File: ActorCritic.java    From burlap with Apache License 2.0 5 votes vote down vote up
public void planFromState(State initialState) {

		if(this.model == null){
			throw new RuntimeException("Planning requires a model, but none is provided.");
		}

		SimulatedEnvironment env = new SimulatedEnvironment(this.model, initialState);

		for(int i = 0; i < numEpisodesForPlanning; i++){
			this.runLearningEpisode(env, this.maxEpisodeSize);
		}
	}
 
Example #12
Source File: BFSMarkovOptionModel.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public EnvironmentOutcome sample(State s, Action a) {
	if(!(a instanceof Option)){
		return model.sample(s, a);
	}

	Option o = (Option)a;

	SimulatedEnvironment env = new SimulatedEnvironment(model, s);
	return o.control(env, discount);
}
 
Example #13
Source File: TigerDomain.java    From burlap with Apache License 2.0 4 votes vote down vote up
/**
 * Main method for interacting with the tiger domain via an {@link EnvironmentShell}
 * By default, the TerminalExplorer interacts with the partially observable environment ({@link burlap.mdp.singleagent.pomdp.SimulatedPOEnvironment}),
 * which means you only get to see the observations that the agent would. However, if you set the first command-line argument
 * to be "h", then the explorer will explorer the underlying fully observable MDP states.
 * @param args either empty or ["h"]; provide "h" to explorer the underlying fully observable tiger MDP.
 */
public static void main(String [] args){


	TigerDomain dgen = new TigerDomain(false);
	PODomain domain = (PODomain)dgen.generateDomain();

	StateGenerator tigerGenerator = TigerDomain.randomSideStateGenerator(0.5);

	Environment observableEnv = new SimulatedEnvironment(domain, tigerGenerator);
	Environment poEnv = new SimulatedPOEnvironment(domain, tigerGenerator);

	Environment envTouse = poEnv;
	if(args.length > 0 && args[0].equals("h")){
	    envTouse = observableEnv;
	}

	EnvironmentShell shell = new EnvironmentShell(domain, envTouse);
	shell.start();

	
	
}
 
Example #14
Source File: ExampleOOGridWorld.java    From burlap_examples with MIT License 4 votes vote down vote up
public static void main(String [] args){

		ExampleOOGridWorld gen = new ExampleOOGridWorld();
		OOSADomain domain = gen.generateDomain();
		State initialState = new GenericOOState(new ExGridAgent(0, 0), new EXGridLocation(10, 10, "loc0"));
		SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState);

		Visualizer v = gen.getVisualizer();
		VisualExplorer exp = new VisualExplorer(domain, env, v);

		exp.addKeyAction("w", ACTION_NORTH, "");
		exp.addKeyAction("s", ACTION_SOUTH, "");
		exp.addKeyAction("d", ACTION_EAST, "");
		exp.addKeyAction("a", ACTION_WEST, "");

		exp.initGUI();


	}
 
Example #15
Source File: ContinuousDomainTutorial.java    From burlap_examples with MIT License 4 votes vote down vote up
public static void MCLSPIRBF(){

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();
		MCState s = new MCState(mcGen.physParams.valleyPos(), 0.);

		NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		RBFFeatures rbf = new RBFFeatures(inputFeatures, true);
		FlatStateGridder gridder = new FlatStateGridder()
				.gridDimension("x", mcGen.physParams.xmin, mcGen.physParams.xmax, 5)
				.gridDimension("v", mcGen.physParams.vmin, mcGen.physParams.vmax, 5);

		List<State> griddedStates = gridder.gridState(s);
		DistanceMetric metric = new EuclideanDistance();
		for(State g : griddedStates){
			rbf.addRBF(new GaussianRBF(inputFeatures.features(g), metric, 0.2));
		}

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(rbf, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();


		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);
		env.addObservers(vob);

		for(int i = 0; i < 5; i++){
			PolicyUtils.rollout(p, env);
			env.resetEnvironment();
		}

		System.out.println("Finished");


	}
 
Example #16
Source File: OptionsExample.java    From burlap_examples with MIT License 4 votes vote down vote up
public static Episode optionExecuteResult(SADomain domain, Option o, State s){
	SimulatedEnvironment env = new SimulatedEnvironment(domain, s);
	EnvironmentOptionOutcome eo = o.control(env, 0.99);
	return eo.episode;
}
 
Example #17
Source File: ContinuousDomainTutorial.java    From burlap_examples with MIT License 3 votes vote down vote up
public static void MCLSPIFB(){

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));

		FourierBasis fb = new FourierBasis(inputFeatures, 4);

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();

		SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0.));
		env.addObservers(vob);

		for(int i = 0; i < 5; i++){
			PolicyUtils.rollout(p, env);
			env.resetEnvironment();
		}

		System.out.println("Finished");


	}
 
Example #18
Source File: ContinuousDomainTutorial.java    From burlap_examples with MIT License 2 votes vote down vote up
public static void LLSARSA(){

		LunarLanderDomain lld = new LunarLanderDomain();
		OOSADomain domain = lld.generateDomain();

		LLState s = new LLState(new LLAgent(5, 0, 0), new LLBlock.LLPad(75, 95, 0, 10, "pad"));

		ConcatenatedObjectFeatures inputFeatures = new ConcatenatedObjectFeatures()
				.addObjectVectorizion(LunarLanderDomain.CLASS_AGENT, new NumericVariableFeatures());

		int nTilings = 5;
		double resolution = 10.;

		double xWidth = (lld.getXmax() - lld.getXmin()) / resolution;
		double yWidth = (lld.getYmax() - lld.getYmin()) / resolution;
		double velocityWidth = 2 * lld.getVmax() / resolution;
		double angleWidth = 2 * lld.getAngmax() / resolution;



		TileCodingFeatures tilecoding = new TileCodingFeatures(inputFeatures);
		tilecoding.addTilingsForAllDimensionsWithWidths(
				new double []{xWidth, yWidth, velocityWidth, velocityWidth, angleWidth},
				nTilings,
				TilingArrangement.RANDOM_JITTER);




		double defaultQ = 0.5;
		DifferentiableStateActionValue vfa = tilecoding.generateVFA(defaultQ/nTilings);
		GradientDescentSarsaLam agent = new GradientDescentSarsaLam(domain, 0.99, vfa, 0.02, 0.5);

		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);
		List<Episode> episodes = new ArrayList<Episode>();
		for(int i = 0; i < 5000; i++){
			Episode ea = agent.runLearningEpisode(env);
			episodes.add(ea);
			System.out.println(i + ": " + ea.maxTimeStep());
			env.resetEnvironment();
		}

		Visualizer v = LLVisualizer.getVisualizer(lld.getPhysParams());
		new EpisodeSequenceVisualizer(v, domain, episodes);

	}
 
Example #19
Source File: PolicyUtils.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * This method will return the an episode that results from following the given policy from state s. The episode will terminate
 * when the policy reaches a terminal state.
 * @param p the {@link Policy} to roll out
 * @param s the state from which to roll out the policy
 * @param model the model from which to sample
 * @return an EpisodeAnalysis object that records the events from following the policy.
 */
public static Episode rollout(Policy p, State s, SampleModel model){
	return rollout(p, new SimulatedEnvironment(model, s));
}
 
Example #20
Source File: PolicyUtils.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * This method will return the an episode that results from following the given policy from state s. The episode will terminate
 * when the policy reaches a terminal state or when the number of steps surpasses maxSteps.
 * @param p the {@link Policy} to roll out
 * @param s the state from which to roll out the policy
 * @param model the model from which to same state transitions
 * @param maxSteps the maximum number of steps to take before terminating the policy rollout.
 * @return an EpisodeAnalysis object that records the events from following the policy.
 */
public static Episode rollout(Policy p, State s, SampleModel model, int maxSteps){
	return rollout(p, new SimulatedEnvironment(model, s), maxSteps);
}
 
Example #21
Source File: PlotTest.java    From burlap_examples with MIT License 2 votes vote down vote up
public static void main(String [] args){

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate

		//ends when the agent reaches a location
		final TerminalFunction tf = new SinglePFTF(
				PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));

		//reward function definition
		final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

		gw.setTf(tf);
		gw.setRf(rf);


		final OOSADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));



		//initial state generator
		final ConstantStateGenerator sg = new ConstantStateGenerator(s);


		//set up the state hashing system for looking up states
		final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory();


		/**
		 * Create factory for Q-learning agent
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-learning";
			}

			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		//define learning environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, sg);

		//define experiment
		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env,
				10, 100, qLearningFactory);

		exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);


		//start experiment
		exp.startExperiment();


	}
 
Example #22
Source File: VisualExplorer.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes with a domain and initial state, automatically creating a {@link burlap.mdp.singleagent.environment.SimulatedEnvironment}
 * as the environment with which to interact. The created {@link burlap.mdp.singleagent.environment.SimulatedEnvironment} will
 * have a {@link burlap.mdp.singleagent.common.NullRewardFunction} and {@link burlap.mdp.auxiliary.common.NullTermination} functions set.
 * @param domain the domain to explore
 * @param painter the 2D state visualizer
 * @param baseState the initial state from which to explore
 */
public VisualExplorer(SADomain domain, Visualizer painter, State baseState){
	Environment env = new SimulatedEnvironment(domain, baseState);
	this.init(domain, env, painter, 800, 800);
}
 
Example #23
Source File: EnvironmentShell.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a shell for a {@link SimulatedEnvironment} rooted at the input state using std in and std out.
 * @param domain the BURLAP domain
 * @param s the initial state for the simulated environment that will be created.
 */
public EnvironmentShell(SADomain domain, State s){
	this(domain, new SimulatedEnvironment(domain, s), System.in, System.out);
}
 
Example #24
Source File: PlannerFactory.java    From cs7641-assignment4 with MIT License votes vote down vote up
Planner createPlanner(int episodeIndex, SADomain domain, HashableStateFactory hashingFactory, SimulatedEnvironment simulatedEnvironment);