burlap.behavior.policy.Policy Java Examples

The following examples show how to use burlap.behavior.policy.Policy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: QLearning.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes the algorithm. By default the agent will only save the last learning episode and a call to the {@link #planFromState(State)} method
 * will cause the valueFunction to use only one episode for planning; this should probably be changed to a much larger value if you plan on using this
 * algorithm as a planning algorithm.
 * @param domain the domain in which to learn
 * @param gamma the discount factor
 * @param hashingFactory the state hashing factory to use for Q-lookups
 * @param qInitFunction a {@link burlap.behavior.valuefunction.QFunction} object that can be used to initialize the Q-values.
 * @param learningRate the learning rate
 * @param learningPolicy the learning policy to follow during a learning episode.
 * @param maxEpisodeSize the maximum number of steps the agent will take in a learning episode for the agent stops trying.
 */
protected void QLInit(SADomain domain, double gamma, HashableStateFactory hashingFactory,
					  QFunction qInitFunction, double learningRate, Policy learningPolicy, int maxEpisodeSize){
	
	this.solverInit(domain, gamma, hashingFactory);
	this.qFunction = new HashMap<HashableState, QLearningStateNode>();
	this.learningRate = new ConstantLR(learningRate);
	this.learningPolicy = learningPolicy;
	this.maxEpisodeSize = maxEpisodeSize;
	this.qInitFunction = qInitFunction;
	
	numEpisodesForPlanning = 1;
	maxQChangeForPlanningTermination = 0.;

	
}
 
Example #2
Source File: GradientDescentSarsaLam.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes SARSA(\lambda) By default the agent will only save the last learning episode and a call to the {@link #planFromState(State)} method
 * will cause the valueFunction to use only one episode for planning; this should probably be changed to a much larger value if you plan on using this
 * algorithm as a planning algorithm.
 * @param domain the domain in which to learn
 * @param gamma the discount factor
 * @param vfa the value function approximation method to use for estimate Q-values
 * @param learningRate the learning rate
 * @param learningPolicy the learning policy to follow during a learning episode.
 * @param maxEpisodeSize the maximum number of steps the agent will take in an episode before terminating
 * @param lambda specifies the strength of eligibility traces (0 for one step, 1 for full propagation)
 */
protected void GDSLInit(SADomain domain, double gamma, DifferentiableStateActionValue vfa,
						double learningRate, Policy learningPolicy, int maxEpisodeSize, double lambda){
	
	this.solverInit(domain, gamma, null);
	this.vfa = vfa;
	this.learningRate = new ConstantLR(learningRate);
	this.learningPolicy = learningPolicy;
	this.maxEpisodeSize = maxEpisodeSize;
	this.lambda = lambda;

	
	numEpisodesForPlanning = 1;
	maxWeightChangeForPlanningTermination = 0.;

	
}
 
Example #3
Source File: DeepQLearner.java    From burlap_caffe with Apache License 2.0 6 votes vote down vote up
public DeepQLearner(SADomain domain, double gamma, int replayStartSize, Policy policy, DQN vfa, StateMapping stateMapping) {
    super(domain, gamma, vfa, stateMapping);

    if (replayStartSize > 0) {
        System.out.println(String.format("Starting with random policy for %d frames", replayStartSize));

        this.replayStartSize = replayStartSize;
        this.trainingPolicy = policy;
        setLearningPolicy(new RandomPolicy(domain));
        runningRandomPolicy = true;
    } else {
        setLearningPolicy(policy);

        runningRandomPolicy = false;
    }
}
 
Example #4
Source File: ContinuousDomainTutorial.java    From burlap_examples with MIT License 6 votes vote down vote up
public static void IPSS(){

		InvertedPendulum ip = new InvertedPendulum();
		ip.physParams.actionNoise = 0.;
		RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
		TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
		ip.setRf(rf);
		ip.setTf(tf);
		SADomain domain = ip.generateDomain();

		State initialState = new InvertedPendulumState();

		SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
		ss.setForgetPreviousPlanResults(true);
		ss.toggleDebugPrinting(false);
		Policy p = new GreedyQPolicy(ss);

		Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
		System.out.println("Num steps: " + e.maxTimeStep());
		Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));

	}
 
Example #5
Source File: ValueFunctionVisualizerGUI.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * A method for creating common 2D arrow glyped value function and policy visualization. The value of states
 * will be represented by colored cells from red (lowest value) to blue (highest value). North-south-east-west
 * actions will be rendered with arrows using {@link burlap.behavior.singleagent.auxiliary.valuefunctionvis.common.ArrowActionGlyph}
 * objects. The GUI will not be launched by default; call the {@link #initGUI()} on the returned object to start it.
 * @param states the states whose value should be rendered.
 * @param valueFunction the valueFunction that can return the state values.
 * @param p the policy to render
 * @param xVar the variable key for the x variable
 * @param yVar the variable key for the y variable
 * @param xRange xRange the range of the x variable
 * @param yRange the range of the y variable
 * @param xWidth the width of each rendered state within the x domain
 * @param yWidth the width of the each rendered state within the y domain
 * @param northActionName the name of the north action
 * @param southActionName the name of the south action
 * @param eastActionName the name of the east action
 * @param westActionName the name of the west action
 * @return a {@link burlap.behavior.singleagent.auxiliary.valuefunctionvis.ValueFunctionVisualizerGUI}
 */
public static ValueFunctionVisualizerGUI createGridWorldBasedValueFunctionVisualizerGUI(List <State> states, ValueFunction valueFunction, Policy p,
															 Object xVar, Object yVar,
															 VariableDomain xRange,
															 VariableDomain yRange,
															 double xWidth,
															 double yWidth,
															 String northActionName,
															 String southActionName,
															 String eastActionName,
															 String westActionName){


	StateValuePainter2D svp = new StateValuePainter2D();
	svp.setXYKeys(xVar, yVar, xRange, yRange, xWidth, yWidth);


	PolicyGlyphPainter2D spp = ArrowActionGlyph.getNSEWPolicyGlyphPainter(xVar, yVar, xRange, yRange, xWidth, yWidth,
			northActionName, southActionName, eastActionName, westActionName);

	ValueFunctionVisualizerGUI gui = new ValueFunctionVisualizerGUI(states, svp, valueFunction);
	gui.setSpp(spp);
	gui.setPolicy(p);
	gui.setBgColor(Color.GRAY);


	return gui;

}
 
Example #6
Source File: AnalysisRunner.java    From omscs-cs7641-machine-learning-assignment-4 with GNU Lesser General Public License v3.0 5 votes vote down vote up
public void simpleValueFunctionVis(ValueFunction valueFunction, Policy p, 
		State initialState, Domain domain, HashableStateFactory hashingFactory, String title){

	List<State> allStates = StateReachability.getReachableStates(initialState,
			(SADomain)domain, hashingFactory);
	ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(
			allStates, valueFunction, p);
	gui.setTitle(title);
	gui.initGUI();

}
 
Example #7
Source File: AnalysisRunner.java    From omscs-cs7641-machine-learning-assignment-4 with GNU Lesser General Public License v3.0 5 votes vote down vote up
public void runPolicyIteration(BasicGridWorld gen, Domain domain,
			State initialState, RewardFunction rf, TerminalFunction tf, boolean showPolicyMap) {
		System.out.println("//Policy Iteration Analysis//");
		PolicyIteration pi = null;
		Policy p = null;
		EpisodeAnalysis ea = null;
		int increment = MAX_ITERATIONS/NUM_INTERVALS;
		for(int numIterations = increment;numIterations<=MAX_ITERATIONS;numIterations+=increment ){
			long startTime = System.nanoTime();
			pi = new PolicyIteration(
					domain,
					rf,
					tf,
					0.99,
					hashingFactory,
					-1, 1, numIterations);
	
			// run planning from our initial state
			p = pi.planFromState(initialState);
			AnalysisAggregator.addMillisecondsToFinishPolicyIteration((int) (System.nanoTime()-startTime)/1000000);

			// evaluate the policy with one roll out visualize the trajectory
			ea = p.evaluateBehavior(initialState, rf, tf);
			AnalysisAggregator.addPolicyIterationReward(calcRewardInEpisode(ea));
			AnalysisAggregator.addStepsToFinishPolicyIteration(ea.numTimeSteps());
		}

//		Visualizer v = gen.getVisualizer();
//		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));
		AnalysisAggregator.printPolicyIterationResults();

		MapPrinter.printPolicyMap(getAllStates(domain,rf,tf,initialState), p, gen.getMap());
		System.out.println("\n\n");

		//visualize the value function and policy.
		if(showPolicyMap){
			simpleValueFunctionVis(pi, p, initialState, domain, hashingFactory, "Policy Iteration");
		}
	}
 
Example #8
Source File: SubgoalOption.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes.
 * @param name the name of the option
 * @param p the option's policy
 * @param init the initiation states of the option
 * @param terminationStates the deterministic termination states of the option.
 */
public SubgoalOption(String name, Policy p, StateConditionTest init, StateConditionTest terminationStates){
	this.name = name;
	this.policy = p;
	this.initiationTest = init;
	this.terminationStates = terminationStates;
	
}
 
Example #9
Source File: MLIRL.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Computes and returns the gradient of the Boltzmann policy for the given state and action.
 * @param s the state in which the policy is queried
 * @param ga the action for which the policy is queried.
 * @return s the gradient of the Boltzmann policy for the given state and action.
 */
public FunctionGradient logPolicyGrad(State s, Action ga){

	Policy p = new BoltzmannQPolicy((QProvider)this.request.getPlanner(), 1./this.request.getBoltzmannBeta());
	double invActProb = 1./p.actionProb(s, ga);
	FunctionGradient gradient = BoltzmannPolicyGradient.computeBoltzmannPolicyGradient(s, ga, (DifferentiableQFunction)this.request.getPlanner(), this.request.getBoltzmannBeta());

	for(FunctionGradient.PartialDerivative pd : gradient.getNonZeroPartialDerivatives()){
		double newVal = pd.value * invActProb;
		gradient.put(pd.parameterId, newVal);
	}

	return gradient;

}
 
Example #10
Source File: MLIRL.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Computes and returns the log-likelihood of the given trajectory under the current reward function parameters and weights it by the given weight.
 * @param ea the trajectory
 * @param weight the weight to assign the trajectory
 * @return the log-likelihood of the given trajectory under the current reward function parameters and weights it by the given weight.
 */
public double logLikelihoodOfTrajectory(Episode ea, double weight){
	double logLike = 0.;
	Policy p = new BoltzmannQPolicy((QProvider)this.request.getPlanner(), 1./this.request.getBoltzmannBeta());
	for(int i = 0; i < ea.numTimeSteps()-1; i++){
		this.request.getPlanner().planFromState(ea.state(i));
		double actProb = p.actionProb(ea.state(i), ea.action(i));
		logLike += Math.log(actProb);
	}
	logLike *= weight;
	return logLike;
}
 
Example #11
Source File: ApprenticeshipLearning.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Computes a policy that models the expert trajectories included in the request object.
 * @param request the IRL problem description
 * @return the computed {@link Policy}
 */
public static Policy getLearnedPolicy(ApprenticeshipLearningRequest request) {
	if (!request.isValid()) {
		return null;
	}
	if (request.getUsingMaxMargin()) {
		return ApprenticeshipLearning.maxMarginMethod(request);
	}
	return ApprenticeshipLearning.projectionMethod(request);
}
 
Example #12
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Test
public void testBFS() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));

	DeterministicPlanner planner = new BFS(this.domain, this.goalCondition, this.hashingFactory);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	Episode analysis = rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis, true);
}
 
Example #13
Source File: MCVideo.java    From burlap_examples with MIT License 5 votes vote down vote up
public static void main(String[] args) {

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		NormalizedVariableFeatures features = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));
		FourierBasis fb = new FourierBasis(features, 4);

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();

		SimulatedEnvironment env = new SimulatedEnvironment(domain,
				new MCState(mcGen.physParams.valleyPos(), 0));
		EnvironmentServer envServ = new EnvironmentServer(env, vob);

		for(int i = 0; i < 100; i++){
			PolicyUtils.rollout(p, envServ);
			envServ.resetEnvironment();
		}

		System.out.println("Finished");

	}
 
Example #14
Source File: VITutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
public static void main(String [] args){

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));
		gwd.setMapToFourRooms();

		//only go in intended directon 80% of the time
		gwd.setProbSucceedTransitionDynamics(0.8);

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//setup vi with 0.99 discount factor, a value
		//function initialization that initializes all states to value 0, and which will
		//run for 30 iterations over the state space
		VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(0.0), 30);

		//run planning from our initial state
		Policy p = vi.planFromState(s);

		//evaluate the policy with one roll out visualize the trajectory
		Episode ea = PolicyUtils.rollout(p, s, domain.getModel());

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));

	}
 
Example #15
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Test
public void testDFS() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));
	
	DeterministicPlanner planner = new DFS(this.domain, this.goalCondition, this.hashingFactory, -1 , true);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	Episode analysis = rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis);
}
 
Example #16
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Test
public void testAStar() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));
	
	Heuristic mdistHeuristic = new Heuristic() {
		
		@Override
		public double h(State s) {

			GridAgent agent = ((GridWorldState)s).agent;
			GridLocation location = ((GridWorldState)s).locations.get(0);

			//get agent position
			int ax = agent.x;
			int ay = agent.y;
			
			//get location position
			int lx = location.x;
			int ly = location.y;
			
			//compute Manhattan distance
			double mdist = Math.abs(ax-lx) + Math.abs(ay-ly);
			
			return -mdist;
		}
	};
	
	//provide A* the heuristic as well as the reward function so that it can keep
	//track of the actual cost
	DeterministicPlanner planner = new AStar(domain, goalCondition,
		hashingFactory, mdistHeuristic);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	
	Episode analysis = PolicyUtils.rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis, true);
}
 
Example #17
Source File: Main.java    From cs7641-assignment4 with MIT License 5 votes vote down vote up
/**
 * Here is where the magic happens. In this method is where I loop through the specific number
 * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use
 * this method to run all three algorithms. The specific details are specified through the
 * PlannerFactory interface.
 * 
 * This method collects all the information from the algorithm and packs it in an Analysis
 * instance that later gets dumped on the console.
 */
private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) {
	ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState);
	SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator);
	Planner planner = null;
	Policy policy = null;
	for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) {
		long startTime = System.nanoTime();
		planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment);
		policy = planner.planFromState(initialState);

		/*
		 * If we haven't converged, following the policy will lead the agent wandering around
		 * and it might never reach the goal. To avoid this, we need to set the maximum number
		 * of steps to take before terminating the policy rollout. I decided to set this maximum
		 * at the number of grid locations in our map (width * width). This should give the
		 * agent plenty of room to wander around.
		 * 
		 * The smaller this number is, the faster the algorithm will run.
		 */
		int maxNumberOfSteps = problem.getWidth() * problem.getWidth();

		Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps);
		analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000);
	}

	if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) {
		learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment);
	}

	if (SHOW_VISUALIZATION && planner != null && policy != null) {
		visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle());
	}
}
 
Example #18
Source File: Main.java    From cs7641-assignment4 with MIT License 5 votes vote down vote up
/**
 * This method takes care of visualizing the grid, rewards, and specific policy on a nice
 * BURLAP-predefined GUI. I found this very useful to understand how the algorithm was working.
 */
private static void visualize(Problem map, ValueFunction valueFunction, Policy policy, State initialState, SADomain domain, HashableStateFactory hashingFactory, String title) {
	List<State> states = StateReachability.getReachableStates(initialState, domain, hashingFactory);
	ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(states, map.getWidth(), map.getWidth(), valueFunction, policy);
	gui.setTitle(title);
	gui.setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
	gui.initGUI();
}
 
Example #19
Source File: DeepQTester.java    From burlap_caffe with Apache License 2.0 4 votes vote down vote up
public DeepQTester(Policy policy, ExperienceMemory memory, StateMapping stateMapping) {
    this.policy = policy;
    this.memory = memory;
    this.stateMapping = stateMapping;
}
 
Example #20
Source File: ApprenticeshipLearning.java    From burlap with Apache License 2.0 4 votes vote down vote up
public static Policy generateRandomPolicy(SADomain domain) {
	return new burlap.behavior.policy.RandomPolicy(domain);
}
 
Example #21
Source File: BasicBehavior.java    From burlap_examples with MIT License 4 votes vote down vote up
public void AStarExample(String outputPath){

		Heuristic mdistHeuristic = new Heuristic() {

			public double h(State s) {
				GridAgent a = ((GridWorldState)s).agent;
				double mdist = Math.abs(a.x-10) + Math.abs(a.y-10);

				return -mdist;
			}
		};

		DeterministicPlanner planner = new AStar(domain, goalCondition, hashingFactory, mdistHeuristic);
		Policy p = planner.planFromState(initialState);

		PolicyUtils.rollout(p, initialState, domain.getModel()).write(outputPath + "astar");

	}
 
Example #22
Source File: TestBlockDude.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void testDude(State s) {
	TerminalFunction tf = new BlockDudeTF();
	StateConditionTest sc = new TFGoalCondition(tf);

	AStar astar = new AStar(domain, sc, new SimpleHashableStateFactory(), new NullHeuristic());
	astar.toggleDebugPrinting(false);
	astar.planFromState(s);

	Policy p = new SDPlannerPolicy(astar);
	Episode ea = PolicyUtils.rollout(p, s, domain.getModel(), 100);

	State lastState = ea.stateSequence.get(ea.stateSequence.size() - 1);
	Assert.assertEquals(true, tf.isTerminal(lastState));
	Assert.assertEquals(true, sc.satisfies(lastState));
	Assert.assertEquals(-94.0, ea.discountedReturn(1.0), 0.001);

	/*
	BlockDude constructor = new BlockDude();
	Domain d = constructor.generateDomain();

	List<Integer> px = new ArrayList<Integer>();
	List <Integer> ph = new ArrayList<Integer>();

	ph.add(15);
	ph.add(3);
	ph.add(3);
	ph.add(3);
	ph.add(0);
	ph.add(0);
	ph.add(0);
	ph.add(1);
	ph.add(2);
	ph.add(0);
	ph.add(2);
	ph.add(3);
	ph.add(2);
	ph.add(2);
	ph.add(3);
	ph.add(3);
	ph.add(15);
	
	State o = BlockDude.getCleanState(d, px, ph, 6);
	o = BlockDude.setAgent(o, 9, 3, 1, 0);
	o = BlockDude.setExit(o, 1, 0);
	
	o = BlockDude.setBlock(o, 0, 5, 1);
	o = BlockDude.setBlock(o, 1, 6, 1);
	o = BlockDude.setBlock(o, 2, 14, 3);
	o = BlockDude.setBlock(o, 3, 16, 4);
	o = BlockDude.setBlock(o, 4, 17, 4);
	o = BlockDude.setBlock(o, 5, 17, 5);
	
	TerminalFunction tf = new SinglePFTF(d.getPropFunction(BlockDude.PFATEXIT));
	StateConditionTest sc = new SinglePFSCT(d.getPropFunction(BlockDude.PFATEXIT));

	RewardFunction rf = new UniformCostRF();

	AStar astar = new AStar(d, rf, sc, new DiscreteStateHashFactory(), new NullHeuristic());
	astar.toggleDebugPrinting(false);
	astar.planFromState(o);

	Policy p = new SDPlannerPolicy(astar);
	EpisodeAnalysis ea = p.evaluateBehavior(o, rf, tf, 100);

	State lastState = ea.stateSequence.get(ea.stateSequence.size() - 1);
	Assert.assertEquals(true, tf.isTerminal(lastState));
	Assert.assertEquals(true, sc.satisfies(lastState));
	Assert.assertEquals(-94.0, ea.getDiscountedReturn(1.0), 0.001);
	*/
}
 
Example #23
Source File: Episode.java    From burlap with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	GridWorldDomain gwd = new GridWorldDomain(11, 11);
	SADomain domain = gwd.generateDomain();
	State s = new GridWorldState(new GridAgent(1, 3));

	Policy p = new RandomPolicy(domain);
	Episode ea = PolicyUtils.rollout(p, s, domain.getModel(), 30);

	String yamlOut = ea.serialize();

	System.out.println(yamlOut);

	System.out.println("\n\n");

	Episode read = Episode.parseEpisode(yamlOut);

	System.out.println(read.actionString());
	System.out.println(read.state(0).toString());
	System.out.println(read.actionSequence.size());
	System.out.println(read.stateSequence.size());

}
 
Example #24
Source File: VIModelLearningPlanner.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public Policy modelPlannedPolicy() {
	return modelPolicy;
}
 
Example #25
Source File: UnmodeledFavoredPolicy.java    From burlap with Apache License 2.0 4 votes vote down vote up
public UnmodeledFavoredPolicy(Policy sourcePolicy, KWIKModel model, List <ActionType> actionTypes){
	this.sourcePolicy = sourcePolicy;
	this.model = model;
	this.allActionTypes = actionTypes;
}
 
Example #26
Source File: PotentialShapedRMax.java    From burlap with Apache License 2.0 4 votes vote down vote up
protected Policy createUnmodeledFavoredPolicy(){
	return new UnmodeledFavoredPolicy(
			this.modelPlanner.modelPlannedPolicy(),
			this.model,
			this.getActionTypes());
}
 
Example #27
Source File: ARTDP.java    From burlap with Apache License 2.0 4 votes vote down vote up
/**
 * Sets the policy to the provided one. Should be a policy that operates on a {@link QProvider}. Will automatically set its
 * Q-source to this object.
 * @param policy the policy to use.
 */
public void setPolicy(SolverDerivedPolicy policy){
	this.policy = (Policy)policy;
	policy.setSolver(this);
	
}
 
Example #28
Source File: PolicyRenderLayer.java    From burlap with Apache License 2.0 4 votes vote down vote up
public PolicyRenderLayer(Collection <State> states, StatePolicyPainter spp, Policy policy){
	this.statesToVisualize = states;
	this.spp = spp;
	this.policy = policy;
}
 
Example #29
Source File: BeliefSparseSampling.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public Policy planFromState(State initialState){
	this.mdpPlanner.planFromState(initialState);
	return new GreedyQPolicy(this);
}
 
Example #30
Source File: QMDP.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public Policy planFromState(State initialState) {
	this.forceMDPPlanningFromAllStates();
	return new GreedyQPolicy(this);
}