burlap.behavior.singleagent.planning.stochastic.valueiteration.ValueIteration Java Examples

The following examples show how to use burlap.behavior.singleagent.planning.stochastic.valueiteration.ValueIteration. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java    From cs7641-assignment4 with MIT License 5 votes vote down vote up
private static Problem createProblem2() {
	String[] map = new String[] {
			"111111111111111111111",
			"X00010001000100000101",
			"101110101L1010S110101",
			"100010101000100010101",
			"11101010101111S110101",
			"100010100000100000001",
			"1011101S1010101110101",
			"100010101010001000101",
			"101010101011111010111",
			"101000001000100010001",
			"1110101M111010M110101",
			"100010100010100000101",
			"101110101010101111S01",
			"100010001010001010001",
			"111011101010111010111",
			"101010001010001000101",
			"10101011101L001011101",
			"1000001S0000101010001",
			"101011110110101010101",
			"10100000001000100010G",
			"111111111111111111111",
	};

	HashMap<Algorithm, Integer> numIterationsHashMap = new HashMap<Algorithm, Integer>();
	numIterationsHashMap.put(Algorithm.ValueIteration, 100);
	numIterationsHashMap.put(Algorithm.PolicyIteration, 20);
	numIterationsHashMap.put(Algorithm.QLearning, 1000);
	
	HashMap<HazardType, Double> hazardRewardsHashMap = new HashMap<HazardType, Double>();
	hazardRewardsHashMap.put(HazardType.SMALL, -1.0);
	hazardRewardsHashMap.put(HazardType.MEDIUM, -2.0);
	hazardRewardsHashMap.put(HazardType.LARGE, -3.0);

	return new Problem(map, numIterationsHashMap, -0.1, 10, hazardRewardsHashMap);
}
 
Example #2
Source File: AnalysisRunner.java    From omscs-cs7641-machine-learning-assignment-4 with GNU Lesser General Public License v3.0 5 votes vote down vote up
private static List<State> getAllStates(Domain domain,
		 RewardFunction rf, TerminalFunction tf,State initialState){
	ValueIteration vi = new ValueIteration(
			domain,
			rf,
			tf,
			0.99,
			new SimpleHashableStateFactory(),
			.5, 100);
	vi.planFromState(initialState);

	return vi.getAllStates();
}
 
Example #3
Source File: Main.java    From cs7641-assignment4 with MIT License 4 votes vote down vote up
private static Problem createProblem1() {
	/*
	 * The surface can be described as follows:
	 * 
	 * X — The starting point of the agent.
	 * 0 — Represents a safe cell where the agent can move.
	 * 1 — Represents a wall. The agent can't move to this cell.
	 * G — Represents the goal that the agent wants to achieve.
	 * S — Represents a small hazard. The agent will be penalized.
	 * M — Represents a medium hazard. The agent will be penalized.
	 * L — Represents a large hazard. The agent will be penalized.
	 */
	String[] map = new String[] {
			"X0011110",
			"01000S10",
			"010M110S",
			"0M0000M1",
			"01111010",
			"00L010S0",
			"0S001000",
			"000000SG",
	};

	/*
	 * Make sure to specify the specific number of iterations for each algorithm. If you don't
	 * do this, I'm still nice and use 100 as the default value, but that wouldn't make sense
	 * all the time.
	 */
	HashMap<Algorithm, Integer> numIterationsHashMap = new HashMap<Algorithm, Integer>();
	numIterationsHashMap.put(Algorithm.ValueIteration, 50);
	numIterationsHashMap.put(Algorithm.PolicyIteration, 10);
	numIterationsHashMap.put(Algorithm.QLearning, 500);

	/*
	 * These are the specific rewards for each one of the hazards. Here you can be creative and
	 * play with different values as you see fit.
	 */
	HashMap<HazardType, Double> hazardRewardsHashMap = new HashMap<HazardType, Double>();
	hazardRewardsHashMap.put(HazardType.SMALL, -1.0);
	hazardRewardsHashMap.put(HazardType.MEDIUM, -2.0);
	hazardRewardsHashMap.put(HazardType.LARGE, -3.0);

	/*
	 * Notice how I specify below the specific default reward for cells with nothing on them (we
	 * want regular cells to have a small penalty that encourages our agent to find the goal),
	 * and the reward for the cell representing the goal (something nice and large so the agent
	 * is happy).
	 */
	return new Problem(map, numIterationsHashMap, -0.1, 10, hazardRewardsHashMap);
}
 
Example #4
Source File: BasicBehavior.java    From burlap_examples with MIT License 3 votes vote down vote up
public void valueIterationExample(String outputPath){

		Planner planner = new ValueIteration(domain, 0.99, hashingFactory, 0.001, 100);
		Policy p = planner.planFromState(initialState);

		PolicyUtils.rollout(p, initialState, domain.getModel()).write(outputPath + "vi");

		simpleValueFunctionVis((ValueFunction)planner, p);
		//manualValueFunctionVis((ValueFunction)planner, p);

	}
 
Example #5
Source File: MinecraftSolver.java    From burlapcraft with GNU Lesser General Public License v3.0 3 votes vote down vote up
public static void stocasticPlan(double gamma){

		MinecraftDomainGenerator simdg = new MinecraftDomainGenerator();
		
		SADomain domain = simdg.generateDomain();

		State initialState = MinecraftStateGeneratorHelper.getCurrentState(BurlapCraft.currentDungeon);
		
		Planner planner = new ValueIteration(domain, gamma, new SimpleHashableStateFactory(false), 0.001, 1000);
		
		Policy p = planner.planFromState(initialState);
		
		MinecraftEnvironment me = new MinecraftEnvironment();
		PolicyUtils.rollout(p, me);
	}
 
Example #6
Source File: QMDP.java    From burlap with Apache License 2.0 3 votes vote down vote up
/**
 * Initializes and creates a {@link burlap.behavior.singleagent.planning.stochastic.valueiteration.ValueIteration} planner
 * to solve the underling MDP. You should call the {@link #forceMDPPlanningFromAllStates()} method after construction
 * to have the constructed {@link burlap.behavior.singleagent.planning.stochastic.valueiteration.ValueIteration} instance
 * perform planning.
 * @param domain the POMDP domain
 * @param rf the POMDP hidden state reward function
 * @param tf the POMDP hidden state terminal function
 * @param discount the discount factor
 * @param hashingFactory the {@link burlap.statehashing.HashableStateFactory} to use for the {@link burlap.behavior.singleagent.planning.stochastic.valueiteration.ValueIteration} instance to use.
 * @param maxDelta the maximum value function change threshold that will cause planning to terminiate
 * @param maxIterations the maximum number of value iteration iterations.
 */
public QMDP(PODomain domain, RewardFunction rf, TerminalFunction tf, double discount, HashableStateFactory hashingFactory, double maxDelta, int maxIterations){
	this.domain = domain;
	ValueIteration vi = new ValueIteration(domain, discount, hashingFactory, maxDelta, maxIterations);
	this.mdpQSource = vi;
	this.solverInit(domain, discount, hashingFactory);
}