burlap.mdp.core.StateTransitionProb Java Examples

The following examples show how to use burlap.mdp.core.StateTransitionProb. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FullStateModel.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Method to implement the {@link SampleStateModel#sample(State, Action)} method when the
 * {@link FullStateModel#stateTransitions(State, Action)} method is implemented. Operates by calling
 * the {@link FullStateModel#stateTransitions(State, Action)} method, rolls a random number, and selects a
 * transition according the probability specified by {@link FullStateModel#stateTransitions(State, Action)}.
 * @param model the {@link FullStateModel} with the implemented {@link FullStateModel#stateTransitions(State, Action)} method.
 * @param s the input state
 * @param a the action to be applied in the input state
 * @return a sampled state transition ({@link State}).
 */
public static State sampleByEnumeration(FullStateModel model, State s, Action a){

	List<StateTransitionProb> tps = model.stateTransitions(s, a);
	double roll = RandomFactory.getMapped(0).nextDouble();
	double sum = 0;
	for(StateTransitionProb tp : tps){
		sum += tp.p;
		if(roll < sum){
			return tp.s;
		}
	}

	throw new RuntimeException("Transition probabilities did not sum to one, they summed to " + sum);

}
 
Example #2
Source File: FactoredModel.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public List<TransitionProb> transitions(State s, Action a) {

	if(!(this.stateModel instanceof FullStateModel)){
		throw new RuntimeException("Factored Model cannot enumerate transition distribution, because the state model does not implement FullStateModel");
	}

	List<StateTransitionProb> stps = ((FullStateModel)this.stateModel).stateTransitions(s, a);
	List<TransitionProb> tps = new ArrayList<TransitionProb>(stps.size());
	for(StateTransitionProb stp : stps){
		double r = this.rf.reward(s, a, stp.s);
		boolean t = this.tf.isTerminal(stp.s);
		TransitionProb tp = new TransitionProb(stp.p, new EnvironmentOutcome(s, a, stp.s, r, t));
		tps.add(tp);
	}

	return tps;
}
 
Example #3
Source File: IPModel.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {
	if(this.physParams.actionNoise == 0.){
		return FullStateModel.Helper.deterministicTransition(this, s, a);
	}
	throw new RuntimeException("Transition Probabilities for the Inverted Pendulum with continuous action noise cannot be enumerated.");
}
 
Example #4
Source File: GridGameStandardMechanics.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Iterates through a list of transition probability objects and combines any that refer to the same state
 * @param srcTPs and initial list of transition probability objects
 * @return an output list of transition probability objects in which any duplicate states have been mereged
 */
protected List <StateTransitionProb> combineDuplicateTransitionProbabilities(List <StateTransitionProb> srcTPs){
	
	double totalProb = 0.;
	List <StateTransitionProb> result = new ArrayList<StateTransitionProb>(srcTPs.size());
	Set <Integer> marked = new HashSet<Integer>();
	for(int i = 0; i < srcTPs.size(); i++){
		if(marked.contains(i)){
			continue;
		}
		StateTransitionProb tp = srcTPs.get(i);
		double sumP = tp.p;
		for(int j = i+1; j < srcTPs.size(); j++){
			if(marked.contains(j)){
				continue;
			}
			StateTransitionProb cmpTP = srcTPs.get(j);
			if(this.agentsEqual((OOState)tp.s, (OOState)cmpTP.s)){
				sumP += cmpTP.p;
				marked.add(j);
			}
		}
		result.add(new StateTransitionProb(tp.s, sumP));
		totalProb += sumP;
	}
	
	if(Math.abs(1. - totalProb) > 0.000000000001){
		throw new RuntimeException("Error, transition probabilities do not sum to 1");
	}
	
	return result;
	
}
 
Example #5
Source File: MAValueIteration.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Finds and stores all states that are reachable from input state s.
 * @param s the state from which all reachable states will be indexed
 * @return true if input s was not previously indexed resulting in new states being found; false if s was already previously indexed resulting in no change in the discovered state set.
 */
public boolean performStateReachabilityFrom(State s){

	FullJointModel model = (FullJointModel)this.jointModel;

	HashableState shi = this.hashingFactory.hashState(s);
	if(this.states.contains(shi)){
		return false;
	}
	
	this.states.add(shi);
	
	LinkedList<HashableState> openQueue = new LinkedList<HashableState>();
	openQueue.add(shi);
	
	while(!openQueue.isEmpty()){
		
		HashableState sh = openQueue.poll();

		//expand
		List<JointAction> jas = JointAction.getAllJointActionsFromTypes(sh.s(), this.agentDefinitions);
		for(JointAction ja : jas){
			List<StateTransitionProb> tps = model.stateTransitions(sh.s(), ja);
			for(StateTransitionProb tp : tps){
				HashableState shp = this.hashingFactory.hashState(tp.s);
				if(!this.states.contains(shp)){
					this.states.add(shp);
					openQueue.add(shp);
				}
			}
		}
		
	}
	
	
	DPrint.cl(this.debugCode, "Finished State reachability; " + this.states.size() + " unique states found.");
	
	
	return true;
}
 
Example #6
Source File: MADynamicProgramming.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Generates the transition information for the given state and joint aciton
 * @param s the state in which the joint action is applied
 * @param ja the joint action applied to the given state
 */
public JointActionTransitions(State s, JointAction ja){
	FullJointModel model = (FullJointModel)MADynamicProgramming.this.jointModel;
	this.ja = ja;
	this.tps = model.stateTransitions(s, ja);
	this.jrs = new ArrayList<double[]>(this.tps.size());
	for(StateTransitionProb tp : this.tps){
		double[] jr = MADynamicProgramming.this.jointRewardFunction.reward(s, ja, tp.s);
		this.jrs.add(jr);
	}
}
 
Example #7
Source File: BWModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {
	return FullStateModel.Helper.deterministicTransition(this, s, a);
}
 
Example #8
Source File: MinecraftModel.java    From burlapcraft with GNU Lesser General Public License v3.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {
	return FullStateModel.Helper.deterministicTransition(this, s, a);
}
 
Example #9
Source File: CPClassicModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {
	return FullStateModel.Helper.deterministicTransition(this, s, a);
}
 
Example #10
Source File: CPCorrectModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {
	return FullStateModel.Helper.deterministicTransition(this, s, a);
}
 
Example #11
Source File: FrostbiteModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {
	return FullStateModel.Helper.deterministicTransition(this, s, a);
}
 
Example #12
Source File: GraphDefinedDomain.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {

	int aId = ((GraphActionType.GraphAction)a).aId;

	List <StateTransitionProb> result = new ArrayList<StateTransitionProb>();

	int n = (Integer)s.get(VAR);

	Map<Integer, Set<NodeTransitionProbability>> actionMap = transitionDynamics.get(n);
	Set<NodeTransitionProbability> transitions = actionMap.get(aId);

	for(NodeTransitionProbability ntp : transitions){

		State ns = s.copy();
		((MutableState)ns).set(VAR, ntp.transitionTo);

		StateTransitionProb tp = new StateTransitionProb(ns, ntp.probability);
		result.add(tp);

	}


	return result;

}
 
Example #13
Source File: LunarLanderModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {
	return FullStateModel.Helper.deterministicTransition(this, s, a);
}
 
Example #14
Source File: MountainCar.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {
	return FullStateModel.Helper.deterministicTransition(this, s, a);
}
 
Example #15
Source File: BlockDudeModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, Action a) {
	return FullStateModel.Helper.deterministicTransition(this, s, a);
}
 
Example #16
Source File: StaticRepeatedGameModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<StateTransitionProb> stateTransitions(State s, JointAction ja) {
	return FullJointModel.Helper.deterministicTransition(this, s, ja);
}
 
Example #17
Source File: MADynamicProgramming.java    From burlap with Apache License 2.0 3 votes vote down vote up
@Override
public JAQValue getQValueFor(State s, JointAction ja) {
	
	
	
	JointActionTransitions jat = new JointActionTransitions(s, ja);
	double sumQ = 0.;
	
	if(!MADynamicProgramming.this.terminalFunction.isTerminal(s)){
	
		for(int i = 0; i < jat.tps.size(); i++){
			StateTransitionProb tp = jat.tps.get(i);
			double p = tp.p;
			HashableState sh = MADynamicProgramming.this.hashingFactory.hashState(tp.s);
			double r = jat.jrs.get(i)[this.agentNum];
			double vprime = this.getValue(sh);
			
			
			double contribution = r + MADynamicProgramming.this.discount*vprime;
			double weightedContribution = p*contribution;
			
			sumQ += weightedContribution;
			
		}
		
	}
	
	JAQValue q = new JAQValue(s, ja, sumQ);
	
	
	return q;
}
 
Example #18
Source File: FullJointModel.java    From burlap with Apache License 2.0 3 votes vote down vote up
/**
 * A helper method for deterministic transition dynamics. This method will return a list containing
 * one {@link StateTransitionProb} object which is assigned probability 1
 * and whose state is determined by querying the {@link #sample(State, JointAction)}
 * method.
 * @param model the {@link JointModel} to use.
 * @param s the state in which the joint action would be executed
 * @param ja the joint action to be performed in the state.
 * @return a list containing one {@link StateTransitionProb} object which is assigned probability 1
 */
public static List<StateTransitionProb> deterministicTransition(JointModel model, State s, JointAction ja){
	List <StateTransitionProb> res = new ArrayList<StateTransitionProb>();
	State sp = model.sample(s, ja);
	StateTransitionProb tp = new StateTransitionProb(sp, 1.);
	res.add(tp);
	return res;
}
 
Example #19
Source File: FullJointModel.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the transition probabilities for applying the provided {@link JointAction} action in the given state.
 * Transition probabilities are specified as list of {@link StateTransitionProb} objects. The list
 * is only required to contain transitions with non-zero probability.
 * @param s the state in which the joint action is performed
 * @param ja the joint action performed
 * @return a list of state {@link StateTransitionProb} objects.
 */
List<StateTransitionProb> stateTransitions(State s, JointAction ja);
 
Example #20
Source File: FullStateModel.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Method to easily implement the {@link FullStateModel#stateTransitions(State, Action)} method for deterministic domains.
 * Operates by getting a transition from the {@link SampleStateModel#sample(State, Action)} method and wraps
 * it in a {@link StateTransitionProb} with probability 1 and then returns a list of that just one element.
 * @param model the {@link SampleStateModel} with an implemented {@link SampleStateModel#sample(State, Action)} method.
 * @param s the input state
 * @param a the action taken.
 * @return a List consisting of the single deterministic {@link StateTransitionProb}
 */
public static List<StateTransitionProb> deterministicTransition(SampleStateModel model, State s, Action a){
	return Arrays.asList(new StateTransitionProb(model.sample(s, a), 1.));
}
 
Example #21
Source File: FullStateModel.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the set of possible transitions when {@link Action} is applied in {@link State} s. The returned
 * list only needs to include transitions that have non-zero probability of occurring.
 * @param s the source state
 * @param a the action to be applied in the source state
 * @return the probability distribution of the state transition function specified as a list of {@link StateTransitionProb} objects.
 */
List<StateTransitionProb> stateTransitions(State s, Action a);