burlap.statehashing.HashableStateFactory Java Examples

The following examples show how to use burlap.statehashing.HashableStateFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestHashing.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Test
public void testSimpleHashFactoryIdentifierDependent() {
	SADomain domain = (SADomain)this.gridWorldTest.getDomain();
	State startState = this.gridWorldTest.generateState();
	HashableStateFactory factory = new SimpleHashableStateFactory(false);
	Set<HashableState> hashedStates = this.getReachableHashedStates(startState, domain, factory);
	assert(hashedStates.size() == 104);
	
	Set<HashableState> renamedStates = new HashSet<HashableState>();
	for (HashableState state : hashedStates) {
		State source = state.s();
		State renamed = this.renameObjects((GridWorldState)source.copy());
		HashableState renamedHashed = factory.hashState(renamed);
		renamedStates.add(renamedHashed);
	}
	hashedStates.addAll(renamedStates);
	assert(hashedStates.size() == 208);
}
 
Example #2
Source File: QLearning.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes the algorithm. By default the agent will only save the last learning episode and a call to the {@link #planFromState(State)} method
 * will cause the valueFunction to use only one episode for planning; this should probably be changed to a much larger value if you plan on using this
 * algorithm as a planning algorithm.
 * @param domain the domain in which to learn
 * @param gamma the discount factor
 * @param hashingFactory the state hashing factory to use for Q-lookups
 * @param qInitFunction a {@link burlap.behavior.valuefunction.QFunction} object that can be used to initialize the Q-values.
 * @param learningRate the learning rate
 * @param learningPolicy the learning policy to follow during a learning episode.
 * @param maxEpisodeSize the maximum number of steps the agent will take in a learning episode for the agent stops trying.
 */
protected void QLInit(SADomain domain, double gamma, HashableStateFactory hashingFactory,
					  QFunction qInitFunction, double learningRate, Policy learningPolicy, int maxEpisodeSize){
	
	this.solverInit(domain, gamma, hashingFactory);
	this.qFunction = new HashMap<HashableState, QLearningStateNode>();
	this.learningRate = new ConstantLR(learningRate);
	this.learningPolicy = learningPolicy;
	this.maxEpisodeSize = maxEpisodeSize;
	this.qInitFunction = qInitFunction;
	
	numEpisodesForPlanning = 1;
	maxQChangeForPlanningTermination = 0.;

	
}
 
Example #3
Source File: MADPPlannerFactory.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes.
 * @param domain the domain in which to perform planing
 * @param agentDefinitions the definitions of the agent types
 * @param jointModel the joint action model
 * @param jointRewardFunction the joint reward function
 * @param terminalFunction the terminal state function
 * @param discount the discount
 * @param hashingFactory the hashing factory to use for storing states
 * @param qInit the q-value initialization function to use.
 * @param backupOperator the backup operator that defines the solution concept being solved
 * @param maxDelta the threshold that causes VI to terminate when the max Q-value change is less than it
 * @param maxIterations the maximum number of iterations allowed
 */
public MAVIPlannerFactory(SGDomain domain, List<SGAgentType> agentDefinitions, JointModel jointModel, JointRewardFunction jointRewardFunction, TerminalFunction terminalFunction,
						  double discount, HashableStateFactory hashingFactory, QFunction qInit, SGBackupOperator backupOperator, double maxDelta, int maxIterations){
	
	this.domain = domain;
	this.agentDefinitions = agentDefinitions;
	this.jointModel = jointModel;
	this.jointRewardFunction = jointRewardFunction;
	this.terminalFunction = terminalFunction;
	this.discount = discount;
	this.hashingFactory = hashingFactory;
	this.qInit = qInit;
	this.backupOperator = backupOperator;
	this.maxDelta = maxDelta;
	this.maxIterations = maxIterations;
	
}
 
Example #4
Source File: MADPPlannerFactory.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes.
 * @param domain the domain in which to perform planing
 * @param jointModel the joint action model
 * @param jointRewardFunction the joint reward function
 * @param terminalFunction the terminal state function
 * @param discount the discount
 * @param hashingFactory the hashing factory to use for storing states
 * @param qInit the q-value initialization function to use.
 * @param backupOperator the backup operator that defines the solution concept being solved
 * @param maxDelta the threshold that causes VI to terminate when the max Q-value change is less than it
 * @param maxIterations the maximum number of iterations allowed
 */
public MAVIPlannerFactory(SGDomain domain, JointModel jointModel, JointRewardFunction jointRewardFunction, TerminalFunction terminalFunction,
						  double discount, HashableStateFactory hashingFactory, QFunction qInit, SGBackupOperator backupOperator, double maxDelta, int maxIterations){
	
	this.domain = domain;
	this.jointModel = jointModel;
	this.jointRewardFunction = jointRewardFunction;
	this.terminalFunction = terminalFunction;
	this.discount = discount;
	this.hashingFactory = hashingFactory;
	this.qInit = qInit;
	this.backupOperator = backupOperator;
	this.maxDelta = maxDelta;
	this.maxIterations = maxIterations;
	
}
 
Example #5
Source File: ExponentialDecayLR.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes with an initial learning rate and decay rate for a state or state-action (or state feature-action) dependent learning rate that will decay to a value no smaller than minimumLearningRate
 * If this learning rate function is to be used for state state features, rather than states,
 * then the hashing factory can be null;
 * @param initialLearningRate the initial learning rate for each state or state-action
 * @param decayRate the exponential base by which the learning rate is decayed
 * @param minimumLearningRate the smallest value to which the learning rate will decay
 * @param hashingFactory how to hash and compare states
 * @param useSeparateLRPerStateAction whether to have an independent learning rate for each state-action pair, rather than just each state
 */
public ExponentialDecayLR(double initialLearningRate, double decayRate, double minimumLearningRate, HashableStateFactory hashingFactory, boolean useSeparateLRPerStateAction){
	if(decayRate > 1 || decayRate < 0){
		throw new RuntimeException("Decay rate must be <= 1 and >= 0");
	}
	this.initialLearningRate = initialLearningRate;
	this.decayRate = decayRate;
	this.minimumLR = minimumLearningRate;
	
	this.useStateWise = true;
	this.useStateActionWise = useSeparateLRPerStateAction;
	this.hashingFactory = hashingFactory;
	this.stateWiseMap = new HashMap<HashableState, ExponentialDecayLR.StateWiseLearningRate>();
	this.featureWiseMap = new HashMap<Integer, ExponentialDecayLR.StateWiseLearningRate>();
	
}
 
Example #6
Source File: MAQLFactory.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes. The policy will be defaulted to a epsilon-greedy max welfare policy.
 * @param d the domain in which to perform learing
 * @param discount the discount factor
 * @param learningRate the constant learning rate
 * @param hashFactory the hashing factory used to index states and Q-values
 * @param qInit the default Q-value to which all initial Q-values will be initialized
 * @param backupOperator the backup operator to use that defines the solution concept being learned
 * @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world.
 */
public MAQLFactory(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory, double qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues){
	this.domain = d;
	this.discount = discount;
	this.learningRate = new ConstantLR(learningRate);
	this.hashingFactory = hashFactory;
	this.qInit = new ConstantValueFunction(qInit);
	this.backupOperator = backupOperator;
	this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues;
	this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(0.1));
}
 
Example #7
Source File: ExponentialDecayLR.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes with an initial learning rate and decay rate for a state or state-action (or state feature-action) dependent learning rate. 
 * Minimum learning rate that can be returned will be Double.MIN_NORMAL. If this learning rate function is to be used for state state features, rather than states,
 * then the hashing factory can be null;
 * @param initialLearningRate the initial learning rate for each state or state-action
 * @param decayRate the exponential base by which the learning rate is decayed
 * @param hashingFactory how to hash and compare states
 * @param useSeparateLRPerStateAction whether to have an independent learning rate for each state-action pair, rather than just each state
 */
public ExponentialDecayLR(double initialLearningRate, double decayRate, HashableStateFactory hashingFactory, boolean useSeparateLRPerStateAction){
	if(decayRate > 1 || decayRate < 0){
		throw new RuntimeException("Decay rate must be <= 1 and >= 0");
	}
	this.initialLearningRate = initialLearningRate;
	this.decayRate = decayRate;
	
	this.useStateWise = true;
	this.useStateActionWise = useSeparateLRPerStateAction;
	this.hashingFactory = hashingFactory;
	this.stateWiseMap = new HashMap<HashableState, ExponentialDecayLR.StateWiseLearningRate>();
	this.featureWiseMap = new HashMap<Integer, ExponentialDecayLR.StateWiseLearningRate>();
	
}
 
Example #8
Source File: TDLambda.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the algorithm.
 * @param gamma the discount factor
 * @param hashingFactory the state hashing factory to use for hashing states and performing equality checks. 
 * @param learningRate the learning rate that affects how quickly the estimated value function is adjusted.
 * @param vinit a method of initializing the value function for previously unvisited states.
 * @param lambda indicates the strength of eligibility traces. Use 1 for Monte-carlo-like traces and 0 for single step backups
 */
public TDLambda(double gamma, HashableStateFactory hashingFactory, double learningRate, ValueFunction vinit, double lambda) {
	this.gamma = gamma;
	this.hashingFactory = hashingFactory;
	
	this.learningRate = new ConstantLR(learningRate);
	vInitFunction = vinit;
	this.lambda = lambda;
	
	
	vIndex = new HashMap<HashableState, VValue>();
}
 
Example #9
Source File: PolicyIteration.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the valueFunction.
 * @param domain the domain in which to plan
 * @param gamma the discount factor
 * @param hashingFactory the state hashing factor to use
 * @param maxDelta when the maximum change in the value function is smaller than this value, policy evaluation will terminate. Similarly, when the maximum value value function change between policy iterations is smaller than this value planning will terminate.
 * @param maxEvaluationIterations when the number iterations of value iteration used to evaluate a policy exceeds this value, policy evaluation will terminate.
 * @param maxPolicyIterations when the number of policy iterations passes this value, planning will terminate.
 */
public PolicyIteration(SADomain domain, double gamma, HashableStateFactory hashingFactory, double maxDelta, int maxEvaluationIterations, int maxPolicyIterations){
	this.DPPInit(domain, gamma, hashingFactory);
	
	this.maxEvalDelta = maxDelta;
	this.maxPIDelta = maxDelta;
	this.maxIterations = maxEvaluationIterations;
	this.maxPolicyIterations = maxPolicyIterations;
	
	this.evaluativePolicy = new GreedyQPolicy(this.getCopyOfValueFunction());
}
 
Example #10
Source File: TestHashing.java    From burlap with Apache License 2.0 5 votes vote down vote up
public Set<HashableState> generateRandomStates(SADomain domain, State state, HashableStateFactory factory, int width, int numStates, boolean moveLocations) {
	Set<HashableState> hashedStates = new HashSet<HashableState>();
	Random random = new Random();
	int misses = 0;
	int prevSize = 0;
	while (hashedStates.size() < numStates) {
		if (hashedStates.size() == prevSize) {
			misses++;
		}
		if (misses > 100) {
			break;
		}
		prevSize = hashedStates.size();
		if (prevSize > 0 && prevSize % 10000 == 0) {
			System.out.println("\t" + prevSize);
		}
		GridWorldState copy = (GridWorldState)state.copy();
		copy.touchAgent().x = random.nextInt(width);
		copy.agent.y = random.nextInt(width);

		
		if (moveLocations) {
			List<GridLocation> locations = copy.deepTouchLocations();
			for(GridLocation loc : locations){
				loc.x = random.nextInt(width);
				loc.y = random.nextInt(width);
			}
		}
		hashedStates.add(factory.hashState(copy));
	}
	return hashedStates;
}
 
Example #11
Source File: StateReachability.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the list of {@link State} objects that are reachable from a source state.
 * @param from the source state
 * @param inDomain the domain of the state
 * @param usingHashFactory the state hashing factory to use for indexing states and testing equality.
 * @return the list of {@link State} objects that are reachable from a source state.
 */
public static List <State> getReachableStates(State from, SADomain inDomain, HashableStateFactory usingHashFactory){
	Set<HashableState> hashed = getReachableHashedStates(from, inDomain, usingHashFactory);
	List<State> states = new ArrayList<State>(hashed.size());
	for(HashableState sh : hashed){
		states.add(sh.s());
	}
	return states;

}
 
Example #12
Source File: BeliefSparseSampling.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the planner.
 * @param domain the POMDP domain
 * @param discount the discount factor
 * @param hashingFactory the Belief MDP {@link burlap.statehashing.HashableStateFactory} that {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use.
 * @param h the height of the {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} tree.
 * @param c the number of samples {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use. Set to -1 to use the full BeliefMDP transition dynamics.
 */
public BeliefSparseSampling(PODomain domain, double discount, HashableStateFactory hashingFactory, int h, int c){

	this.solverInit(domain, discount, hashingFactory);
	BeliefMDPGenerator bdgen = new BeliefMDPGenerator(domain);
	this.beliefMDP = (SADomain)bdgen.generateDomain();
	
	this.mdpPlanner = new SparseSampling(this.beliefMDP, discount, hashingFactory, h, Math.max(1, c));
	if(c < 1){
		this.mdpPlanner.setComputeExactValueFunction(true);
	}
	
}
 
Example #13
Source File: StateReachability.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Finds the set of states that are reachable under a policy from a source state. Reachability under a source policy means
 * that the space of actions considered are those that have non-zero probability of being selected by the
 * policy and all possible outcomes of those states are considered.
 * @param domain the domain containing the model to use for evaluating reachable states
 * @param p the policy that must be followed
 * @param from the source {@link State} from which the policy would be initiated.
 * @param usingHashFactory the {@link burlap.statehashing.HashableStateFactory} used to hash states and test equality.
 * @return a {@link java.util.List} of {@link State} objects that could be reached.
 */
public static List<State> getPolicyReachableStates(SADomain domain, EnumerablePolicy p, State from, HashableStateFactory usingHashFactory){

	Set<HashableState> hashed = getPolicyReachableHashedStates(domain, p, from, usingHashFactory);
	List<State> states = new ArrayList<State>(hashed.size());
	for(HashableState sh : hashed){
		states.add(sh.s());
	}
	return states;

}
 
Example #14
Source File: SparseSampling.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes. Note that you can have h and c set to values that ensure epsilon optimality by using the {@link #setHAndCByMDPError(double, double, int)} method, but in
 * general this will result in very large values that will be intractable. If you set c = -1, then the full transition dynamics will be used. You should
 * only use the full transition dynamics if the number of possible transitions from each state is small and if the model implements {@link burlap.mdp.singleagent.model.FullModel}
 * @param domain the planning domain
 * @param gamma the discount factor
 * @param hashingFactory the state hashing factory for matching generated states with their state nodes.
 * @param h the height of the tree
 * @param c the number of transition dynamics samples used. If set to -1, then the full transition dynamics are used.
 */
public SparseSampling(SADomain domain, double gamma, HashableStateFactory hashingFactory, int h, int c){
	this.solverInit(domain, gamma, hashingFactory);
	this.h = h;
	this.c = c;
	this.nodesByHeight = new HashMap<SparseSampling.HashedHeightState, SparseSampling.StateNode>();
	this.rootLevelQValues = new HashMap<HashableState, List<QValue>>();
	if(this.c < 0){
		this.computeExactValueFunction = true;
	}

	this.debugCode = 7369430;
}
 
Example #15
Source File: TabularModel.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes.
 * @param sourceDomain the source domain whose actions will be modeled.
 * @param hashingFactory the hashing factory to index states
 * @param nConfident the number of observed transitions to be confident in the model's prediction.
 */
public TabularModel(SADomain sourceDomain, HashableStateFactory hashingFactory, int nConfident){
	this.sourceDomain = sourceDomain;
	this.hashingFactory = hashingFactory;
	this.stateNodes = new HashMap<HashableState, TabularModel.StateNode>();
	this.terminalStates = new HashSet<HashableState>();
	this.nConfident = nConfident;

}
 
Example #16
Source File: Main.java    From cs7641-assignment4 with MIT License 5 votes vote down vote up
/**
 * This method takes care of visualizing the grid, rewards, and specific policy on a nice
 * BURLAP-predefined GUI. I found this very useful to understand how the algorithm was working.
 */
private static void visualize(Problem map, ValueFunction valueFunction, Policy policy, State initialState, SADomain domain, HashableStateFactory hashingFactory, String title) {
	List<State> states = StateReachability.getReachableStates(initialState, domain, hashingFactory);
	ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(states, map.getWidth(), map.getWidth(), valueFunction, policy);
	gui.setTitle(title);
	gui.setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
	gui.initGUI();
}
 
Example #17
Source File: QLTutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
public QLTutorial(SADomain domain, double gamma, HashableStateFactory hashingFactory,
				  QFunction qinit, double learningRate, double epsilon){

	this.solverInit(domain, gamma, hashingFactory);
	this.qinit = qinit;
	this.learningRate = learningRate;
	this.qValues = new HashMap<HashableState, List<QValue>>();
	this.learningPolicy = new EpsilonGreedy(this, epsilon);

}
 
Example #18
Source File: DFS.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor of DFS with specification of depth limit, whether to maintain a closed list that affects exploration, and whether paths
 * generated by options should be explored first.
 * @param domain the domain in which to plan
 * @param gc indicates the goal states
 * @param hashingFactory the state hashing factory to use
 * @param maxDepth depth limit of DFS. -1 specifies no limit.
 * @param maintainClosed whether to maintain a closed list or not
 * @param optionsFirst whether to explore paths generated by options first.
 */
protected void DFSInit(SADomain domain, StateConditionTest gc, HashableStateFactory hashingFactory, int maxDepth, boolean maintainClosed, boolean optionsFirst){
	this.deterministicPlannerInit(domain, gc, hashingFactory);
	this.maxDepth = maxDepth;
	this.maintainClosed = maintainClosed;
	if(optionsFirst){
		this.setOptionsFirst();
	}
	
	rand = RandomFactory.getMapped(0);
}
 
Example #19
Source File: TestHashing.java    From burlap with Apache License 2.0 5 votes vote down vote up
public Set<HashableState> generateStates(SADomain domain, State state, HashableStateFactory factory, int width) {
	Set<HashableState> hashedStates = new HashSet<HashableState>();
	for (int i = 0; i < width; ++i) {
		for (int j =0 ; j < width; ++j) {
			GridWorldState copy = (GridWorldState)state.copy();
			copy.touchAgent().x = i;
			copy.agent.y = j;
			hashedStates.add(factory.hashState(copy));
		}
	}
	return hashedStates;
}
 
Example #20
Source File: DifferentiableVI.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the valueFunction.
 * @param domain the domain in which to plan
 * @param rf the differentiable reward function that will be used
 * @param gamma the discount factor
 * @param boltzBeta the scaling factor in the boltzmann distribution used for the state value function. The larger the value, the more deterministic.
 * @param hashingFactory the state hashing factor to use
 * @param maxDelta when the maximum change in the value function is smaller than this value, VI will terminate.
 * @param maxIterations when the number of VI iterations exceeds this value, VI will terminate.
 */
public DifferentiableVI(SADomain domain, DifferentiableRF rf, double gamma, double boltzBeta, HashableStateFactory hashingFactory, double maxDelta, int maxIterations){

	this.DPPInit(domain, gamma, hashingFactory);

	this.rf = rf;
	this.maxDelta = maxDelta;
	this.maxIterations = maxIterations;
	this.operator = new DifferentiableSoftmaxOperator(boltzBeta);
	this.boltzBeta = boltzBeta;

}
 
Example #21
Source File: SGNaiveQLAgent.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes with a default Q-value of 0 and a 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory) {
	this.init(d);
	this.discount = discount;
	this.learningRate = new ConstantLR(learningRate);
	this.hashFactory = hashFactory;
	this.qInit = new ConstantValueFunction(0.);
	
	this.qMap = new HashMap<HashableState, List<QValue>>();
	stateRepresentations = new HashMap<HashableState, State>();
	this.policy = new EpsilonGreedy(this, 0.1);
	
	this.storedMapAbstraction = new ShallowIdentityStateMapping();
}
 
Example #22
Source File: SGNaiveQLAgent.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes with a default 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param defaultQ the default to which all Q-values will be initialized
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, double defaultQ, HashableStateFactory hashFactory) {
	this.init(d);
	this.discount = discount;
	this.learningRate = new ConstantLR(learningRate);
	this.hashFactory = hashFactory;
	this.qInit = new ConstantValueFunction(defaultQ);
	
	this.qMap = new HashMap<HashableState, List<QValue>>();
	stateRepresentations = new HashMap<HashableState, State>();
	this.policy = new EpsilonGreedy(this, 0.1);
	
	this.storedMapAbstraction = new ShallowIdentityStateMapping();
}
 
Example #23
Source File: SGNaiveQLAgent.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes with a default 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param qInitizalizer the Q-value initialization method
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, QFunction qInitizalizer, HashableStateFactory hashFactory) {
	this.init(d);
	this.discount = discount;
	this.learningRate = new ConstantLR(learningRate);
	this.hashFactory = hashFactory;
	this.qInit = qInitizalizer;
	
	this.qMap = new HashMap<HashableState, List<QValue>>();
	stateRepresentations = new HashMap<HashableState, State>();
	this.policy = new EpsilonGreedy(this, 0.1);
	
	this.storedMapAbstraction = new ShallowIdentityStateMapping();
}
 
Example #24
Source File: Main.java    From cs7641-assignment4 with MIT License 5 votes vote down vote up
/**
 * Here is where the magic happens. In this method is where I loop through the specific number
 * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use
 * this method to run all three algorithms. The specific details are specified through the
 * PlannerFactory interface.
 * 
 * This method collects all the information from the algorithm and packs it in an Analysis
 * instance that later gets dumped on the console.
 */
private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) {
	ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState);
	SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator);
	Planner planner = null;
	Policy policy = null;
	for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) {
		long startTime = System.nanoTime();
		planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment);
		policy = planner.planFromState(initialState);

		/*
		 * If we haven't converged, following the policy will lead the agent wandering around
		 * and it might never reach the goal. To avoid this, we need to set the maximum number
		 * of steps to take before terminating the policy rollout. I decided to set this maximum
		 * at the number of grid locations in our map (width * width). This should give the
		 * agent plenty of room to wander around.
		 * 
		 * The smaller this number is, the faster the algorithm will run.
		 */
		int maxNumberOfSteps = problem.getWidth() * problem.getWidth();

		Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps);
		analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000);
	}

	if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) {
		learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment);
	}

	if (SHOW_VISUALIZATION && planner != null && policy != null) {
		visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle());
	}
}
 
Example #25
Source File: MAQLFactory.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes. The policy will be defaulted to a epsilon-greey max wellfare policy.
 * @param d the domain in which to perform learing
 * @param discount the discount factor
 * @param learningRate the learning rate function
 * @param hashFactory the hashing factory used to index states and Q-values
 * @param qInit the Q-value initialization function
 * @param backupOperator the backup operator to use that defines the solution concept being learned
 * @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world.
 * @param learningPolicy the learningPolicy to follow
 */
public void init(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, QFunction qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues, PolicyFromJointPolicy learningPolicy){
	this.domain = d;
	this.discount = discount;
	this.learningRate = learningRate;
	this.hashingFactory = hashFactory;
	this.qInit = qInit;
	this.backupOperator = backupOperator;
	this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues;
	this.learningPolicy = learningPolicy;
	
}
 
Example #26
Source File: PolicyIteration.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the valueFunction.
 * @param domain the domain in which to plan
 * @param gamma the discount factor
 * @param hashingFactory the state hashing factor to use
 * @param maxPIDelta when the maximum value value function change between policy iterations is smaller than this value planning will terminate.
 * @param maxEvalDelta when the maximum change in the value function is smaller than this value, policy evaluation will terminate.
 * @param maxEvaluationIterations when the number iterations of value iteration used to evaluate a policy exceeds this value, policy evaluation will terminate.
 * @param maxPolicyIterations when the number of policy iterations passes this value, planning will terminate.
 */
public PolicyIteration(SADomain domain, double gamma, HashableStateFactory hashingFactory, double maxPIDelta, double maxEvalDelta, int maxEvaluationIterations, int maxPolicyIterations){
	this.DPPInit(domain, gamma, hashingFactory);
	
	this.maxEvalDelta = maxEvalDelta;
	this.maxPIDelta = maxPIDelta;
	this.maxIterations = maxEvaluationIterations;
	this.maxPolicyIterations = maxPolicyIterations;
	
	this.evaluativePolicy = new GreedyQPolicy(this.getCopyOfValueFunction());
}
 
Example #27
Source File: MAQLFactory.java    From burlap with Apache License 2.0 4 votes vote down vote up
public CoCoQLearningFactory(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, QFunction qInit, boolean queryOtherAgentsForTheirQValues, double epsilon){
	this.init(d, discount, learningRate, hashFactory, qInit, new CoCoQ(), queryOtherAgentsForTheirQValues, new PolicyFromJointPolicy(new EGreedyMaxWellfare(epsilon)));
}
 
Example #28
Source File: MDPSolver.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public void setHashingFactory(HashableStateFactory hashingFactory) {
	this.hashingFactory = hashingFactory;
}
 
Example #29
Source File: TestHashing.java    From burlap with Apache License 2.0 4 votes vote down vote up
public Set <HashableState> getReachableHashedStates(State from, SADomain inDomain, HashableStateFactory usingHashFactory){
	
	Set<HashableState> hashedStates = new HashSet<HashableState>();
	HashableState shi = usingHashFactory.hashState(from);
	List <ActionType> actionTypes = inDomain.getActionTypes();
	
	LinkedList <HashableState> openList = new LinkedList<HashableState>();
	openList.offer(shi);
	hashedStates.add(shi);
	while(!openList.isEmpty()){
		HashableState sh = openList.poll();

		List<Action> gas = ActionUtils.allApplicableActionsForTypes(actionTypes, sh.s());
		for(Action ga : gas){
			List <TransitionProb> tps = ((FullModel)inDomain.getModel()).transitions(sh.s(), ga);
			for(TransitionProb tp : tps){
				HashableState nsh = usingHashFactory.hashState(tp.eo.op);
				
				for (HashableState hashedState : hashedStates) {
					boolean sameObject = (hashedState == nsh);
					boolean valueEquals = (hashedState.equals(nsh));
					boolean hashEquals = (hashedState.hashCode() == nsh.hashCode());
					if (sameObject || valueEquals) {
						assert(hashEquals); // Same state, hashes need to be equal
					}
					if (!hashEquals) {
						assert(!sameObject && !valueEquals);
					}
				}
				
				if(!hashedStates.contains(nsh)){
					openList.offer(nsh);
					hashedStates.add(nsh);
				}
			}
			
		}
		
	}
	
	return hashedStates;
}
 
Example #30
Source File: MDPSolver.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public HashableStateFactory getHashingFactory(){
	return this.hashingFactory;
}