burlap.mdp.auxiliary.common.ConstantStateGenerator Java Examples

The following examples show how to use burlap.mdp.auxiliary.common.ConstantStateGenerator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LSPI.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily
 * selects the action with the highest Q-value and breaks ties uniformly randomly.
 * @param initialState the initial state of the planning problem
 * @return a {@link burlap.behavior.policy.GreedyQPolicy}.
 */
@Override
public GreedyQPolicy planFromState(State initialState) {

	if(this.model == null){
		throw new RuntimeException("LSPI cannot execute planFromState because the reward function and/or terminal function for planning have not been set. Use the initializeForPlanning method to set them.");
	}

	if(planningCollector == null){
		this.planningCollector = new SARSCollector.UniformRandomSARSCollector(this.actionTypes);
	}
	this.dataset = this.planningCollector.collectNInstances(new ConstantStateGenerator(initialState), this.model, this.numSamplesForPlanning, Integer.MAX_VALUE, this.dataset);
	return this.runPolicyIteration(this.maxNumPlanningIterations, this.maxChange);


}
 
Example #2
Source File: Main.java    From cs7641-assignment4 with MIT License 5 votes vote down vote up
/**
 * Here is where the magic happens. In this method is where I loop through the specific number
 * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use
 * this method to run all three algorithms. The specific details are specified through the
 * PlannerFactory interface.
 * 
 * This method collects all the information from the algorithm and packs it in an Analysis
 * instance that later gets dumped on the console.
 */
private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) {
	ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState);
	SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator);
	Planner planner = null;
	Policy policy = null;
	for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) {
		long startTime = System.nanoTime();
		planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment);
		policy = planner.planFromState(initialState);

		/*
		 * If we haven't converged, following the policy will lead the agent wandering around
		 * and it might never reach the goal. To avoid this, we need to set the maximum number
		 * of steps to take before terminating the policy rollout. I decided to set this maximum
		 * at the number of grid locations in our map (width * width). This should give the
		 * agent plenty of room to wander around.
		 * 
		 * The smaller this number is, the faster the algorithm will run.
		 */
		int maxNumberOfSteps = problem.getWidth() * problem.getWidth();

		Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps);
		analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000);
	}

	if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) {
		learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment);
	}

	if (SHOW_VISUALIZATION && planner != null && policy != null) {
		visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle());
	}
}
 
Example #3
Source File: SimulatedEnvironment.java    From burlap with Apache License 2.0 5 votes vote down vote up
public SimulatedEnvironment(SADomain domain, State initialState) {

		this.stateGenerator = new ConstantStateGenerator(initialState);
		this.curState = initialState;
		if(domain.getModel() == null){
			throw new RuntimeException("SimulatedEnvironment requires a Domain with a model, but the input domain does not have one.");
		}
		this.model = domain.getModel();
	}
 
Example #4
Source File: SimulatedEnvironment.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public void setCurStateTo(State s) {
	if(this.stateGenerator == null){
		this.stateGenerator = new ConstantStateGenerator(s);
	}
	this.curState = s;
}
 
Example #5
Source File: SimulatedEnvironment.java    From burlap with Apache License 2.0 4 votes vote down vote up
public SimulatedEnvironment(SampleModel model, State initialState) {

		this.stateGenerator = new ConstantStateGenerator(initialState);
		this.curState = initialState;
		this.model = model;
	}
 
Example #6
Source File: GameEpisode.java    From burlap with Apache License 2.0 3 votes vote down vote up
public static void main(String[] args) {

		GridGame gg = new GridGame();
		OOSGDomain domain = gg.generateDomain();
		State s = GridGame.getTurkeyInitialState();

		JointRewardFunction jr = new GridGame.GGJointRewardFunction(domain);
		TerminalFunction tf = new GridGame.GGTerminalFunction(domain);
		World world = new World(domain, jr, tf, new ConstantStateGenerator(s));
		DPrint.toggleCode(world.getDebugId(),false);

		SGAgent ragent1 = new RandomSGAgent();
		SGAgent ragent2 = new RandomSGAgent();

		SGAgentType type = new SGAgentType("agent", domain.getActionTypes());

		world.join(ragent1);
		world.join(ragent2);

		GameEpisode ga = world.runGame(20);
		System.out.println(ga.maxTimeStep());

		String serialized = ga.serialize();
		System.out.println(serialized);

		GameEpisode read = GameEpisode.parse(serialized);
		System.out.println(read.maxTimeStep());
		System.out.println(read.state(0).toString());


	}
 
Example #7
Source File: PlotTest.java    From burlap_examples with MIT License 2 votes vote down vote up
public static void main(String [] args){

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate

		//ends when the agent reaches a location
		final TerminalFunction tf = new SinglePFTF(
				PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));

		//reward function definition
		final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

		gw.setTf(tf);
		gw.setRf(rf);


		final OOSADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));



		//initial state generator
		final ConstantStateGenerator sg = new ConstantStateGenerator(s);


		//set up the state hashing system for looking up states
		final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory();


		/**
		 * Create factory for Q-learning agent
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-learning";
			}

			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		//define learning environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, sg);

		//define experiment
		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env,
				10, 100, qLearningFactory);

		exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);


		//start experiment
		exp.startExperiment();


	}
 
Example #8
Source File: World.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes the world.
 * @param domain the SGDomain the world will use
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param initialState the initial state of the world every time a new game starts
 */
public World(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, State initialState){
	this.init(domain, domain.getJointActionModel(), jr, tf, new ConstantStateGenerator(initialState), new IdentityStateMapping());
}