burlap.mdp.core.state.State Java Examples

The following examples show how to use burlap.mdp.core.state.State. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FrostbiteDomain.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Main function to test the domain.
 * Note: The termination conditions are not checked when testing the domain this way, which means it is
 * impossible to win or die and might trigger bugs. To enable them, uncomment the code in the "update" function.
 *
 * @param args command line args
 */
public static void main(String[] args) {
	FrostbiteDomain fd = new FrostbiteDomain();
	SADomain d = fd.generateDomain();
	State s = new FrostbiteState();

	Visualizer vis = FrostbiteVisualizer.getVisualizer();
	VisualExplorer exp = new VisualExplorer(d, vis, s);

	exp.addKeyAction("a", ACTION_WEST, "");
	exp.addKeyAction("d", ACTION_EAST, "");
	exp.addKeyAction("w", ACTION_NORTH, "");
	exp.addKeyAction("s", ACTION_SOUTH, "");
	exp.addKeyAction("x", ACTION_IDLE, "");

	exp.initGUI();
}
 
Example #2
Source File: LinearDiffRFVInit.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public double reward(State s, Action a, State sprime) {

	double [] features;
	if(this.rfFeaturesAreForNextState){
		features = this.rfFvGen.features(sprime);
	}
	else{
		features = this.rfFvGen.features(s);
	}
	double sum = 0.;
	for(int i = 0; i < features.length; i++){
		sum += features[i] * this.parameters[i];
	}
	return sum;

}
 
Example #3
Source File: GridGame.java    From burlap with Apache License 2.0 6 votes vote down vote up
public static State getTurkeyInitialState(){

		GenericOOState s = new GenericOOState(
				new GGAgent(0, 0, 0, "agent0"),
				new GGAgent(2, 0, 1, "agent1"),
				new GGGoal(0, 3, 1, "g0"),
				new GGGoal(1, 2, 0, "g1"),
				new GGGoal(2, 3, 2, "g2"),
				new GGWall.GGHorizontalWall(0, 0, 1, 1, "w0"),
				new GGWall.GGHorizontalWall(2, 2, 1, 1, "w1")

		);

		setBoundaryWalls(s, 3, 4);
		
		return s;
		
	}
 
Example #4
Source File: BlockDudeModel.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public State sample(State s, Action a) {

	BlockDudeState bs = (BlockDudeState)s.copy();
	String aname = a.actionName();
	if(aname.equals(ACTION_WEST)){
		moveHorizontally(bs, -1);
	}
	else if(aname.equals(ACTION_EAST)){
		moveHorizontally(bs, 1);
	}
	else if(aname.equals(ACTION_UP)){
		moveUp(bs);
	}
	else if(aname.equals(ACTION_PICKUP)){
		putdownBlock(bs);
	}
	else if(aname.equals(ACTION_PUT_DOWN)){
		pickupBlock(bs);
	}
	else {
		throw new RuntimeException("Unknown action " + aname);
	}
	return bs;
}
 
Example #5
Source File: GoldBlockTF.java    From burlapcraft with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
public boolean isTerminal(State s) {
	OOState os = (OOState)s;

	BCAgent a = (BCAgent)os.object(CLASS_AGENT);

	HelperGeometry.Pose agentPose = HelperGeometry.Pose.fromXyz(a.x, a.y, a.z);

	HelperGeometry.Pose goalPose = getGoalPose(s);

	//are they at goal location or dead
	double distance = goalPose.distance(agentPose);
	//System.out.println("Distance: " + distance + " goal at: " + goalPose);

	if (distance < 0.5) {
		return true;
	} else {
		return false;
	}
}
 
Example #6
Source File: RewardValueProjection.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public List<QValue> qValues(State s) {

	if(this.domain != null){
		List<Action> actions = ActionUtils.allApplicableActionsForTypes(this.domain.getActionTypes(), s);
		List<QValue> qs = new ArrayList<QValue>(actions.size());
		for(Action ga : actions){
			qs.add(new QValue(s, ga, this.qValue(s, ga)));
		}
		return qs;
	}

	if(this.projectionType == RewardProjectionType.DESTINATIONSTATE){
		return Arrays.asList(new QValue(s, null, this.rf.reward(null, null, s)));
	}
	else if(this.projectionType == RewardProjectionType.SOURCESTATE){
		return Arrays.asList(new QValue(s, null, this.rf.reward(null, null, s)));
	}
	else if(this.projectionType == RewardProjectionType.STATEACTION){
		throw new RuntimeException("RewardValueProjection cannot generate all state-action Q-values because it was not" +
				"provided the Domain to enumerate the actions. Use the RewardValueProjection(RewardFunction, RewardProjectionType, Domain) " +
				"constructor to specify it.");
	}

	throw new RuntimeException("Unknown RewardProjectionType... this shouldn't happen.");
}
 
Example #7
Source File: BlockDudeLevelConstructor.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the initial {@link State} of the first level.
 * @param domain the domain to which the state will belong.
 * @return the initial {@link State} of the first level.
 */
public static State getLevel1(Domain domain){

	int [][] map = new int[25][25];
	addFloor(map);

	map[3][1] = 1;
	map[3][2] = 1;

	map[7][1] = 1;

	map[11][1] = 1;
	map[11][2] = 1;


	BlockDudeState s = new BlockDudeState(
			new BlockDudeAgent(15, 1, 1, false),
			new BlockDudeMap(map),
			BlockDudeCell.exit(0, 1),
			BlockDudeCell.block("b0", 9, 1),
			BlockDudeCell.block("b1", 13, 1)
	);

	return s;
}
 
Example #8
Source File: QLearning.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily
 * selects the action with the highest Q-value and breaks ties uniformly randomly.
 * @param initialState the initial state of the planning problem
 * @return a {@link burlap.behavior.policy.GreedyQPolicy}.
 */
@Override
public GreedyQPolicy planFromState(State initialState) {

	if(this.model == null){
		throw new RuntimeException("QLearning (and its subclasses) cannot execute planFromState because a model is not specified.");
	}

	SimulatedEnvironment env = new SimulatedEnvironment(this.domain, initialState);

	int eCount = 0;
	do{
		this.runLearningEpisode(env, this.maxEpisodeSize);
		eCount++;
	}while(eCount < numEpisodesForPlanning && maxQChangeInLastEpisode > maxQChangeForPlanningTermination);


	return new GreedyQPolicy(this);

}
 
Example #9
Source File: GameSequenceVisualizer.java    From burlap with Apache License 2.0 6 votes vote down vote up
private void updatePropTextArea(State s){

		if(!(domain instanceof OODomain) || !(s instanceof OOState)){
			return ;
		}

	    StringBuilder buf = new StringBuilder();
		
		List <PropositionalFunction> props = ((OODomain)domain).propFunctions();
		for(PropositionalFunction pf : props){
			List<GroundedProp> gps = pf.allGroundings((OOState)s);
			for(GroundedProp gp : gps){
				if(gp.isTrue((OOState)s)){
					buf.append(gp.toString()).append("\n");
				}
			}
		}
		
		propViewer.setText(buf.toString());
		
		
		
	}
 
Example #10
Source File: LearningAgentToSGAgentInterface.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public EnvironmentOutcome executeAction(Action ga) {

	State prevState = this.currentState;
	synchronized(this.nextAction){
		this.nextAction.val = ga;
		this.nextAction.notifyAll();
	}


	synchronized(this.nextState){
		while(this.nextState.val == null){
			try{
				nextState.wait();
			} catch(InterruptedException ex){
				ex.printStackTrace();
			}
		}
		this.nextState.val = null;
	}

	EnvironmentOutcome eo = new EnvironmentOutcome(prevState, ga, this.currentState, this.lastReward, this.curStateIsTerminal);

	return eo;
}
 
Example #11
Source File: ExampleOOGridWorld.java    From burlap_examples with MIT License 5 votes vote down vote up
public State sample(State s, Action a) {

			s = s.copy();
			GenericOOState gs = (GenericOOState)s;
			ExGridAgent agent = (ExGridAgent)gs.touch(CLASS_AGENT);
			int curX = agent.x;
			int curY = agent.y;

			int adir = actionDir(a);

			//sample direction with random roll
			double r = Math.random();
			double sumProb = 0.;
			int dir = 0;
			for(int i = 0; i < 4; i++){
				sumProb += this.transitionProbs[adir][i];
				if(r < sumProb){
					dir = i;
					break; //found direction
				}
			}

			//get resulting position
			int [] newPos = this.moveResult(curX, curY, dir);

			//set the new position
			agent.x = newPos[0];
			agent.y = newPos[1];

			//return the state we just modified
			return gs;
		}
 
Example #12
Source File: PolicyUtils.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the probability of the policy taking action a in state s by searching for the action
 * in the returned policy distribution from the provided {@link EnumerablePolicy}.
 * @param p the {@link EnumerablePolicy}
 * @param s the state in which the action would be taken
 * @param a the action being queried
 * @return the probability of this policy taking action ga in state s
 */
public static double actionProbFromEnum(EnumerablePolicy p, State s, Action a){
	List <ActionProb> probs = p.policyDistribution(s);
	if(probs == null || probs.isEmpty()){
		throw new PolicyUndefinedException();
	}
	for(ActionProb ap : probs){
		if(ap.ga.equals(a)){
			return ap.pSelection;
		}
	}
	return 0.;
}
 
Example #13
Source File: TestHashing.java    From burlap with Apache License 2.0 5 votes vote down vote up
public State generateLargeGW(SADomain domain, int width) {

		GridWorldState state = new GridWorldState(new GridAgent());

		for (int i = 0; i < width; i++) {
			state.locations.add(new GridLocation(i, width - 1 - i, "loc"+i));
		}
		return state;
	}
 
Example #14
Source File: ApprenticeshipLearning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public List<ActionProb> policyDistribution(State s) {
	HashableState hashableState = this.hashFactory.hashState(s);

	// If this state has not yet been visited, we need to compute a new distribution of actions
	if (!this.stateActionDistributionMapping.containsKey(hashableState)) {
		this.addNewDistributionForState(s);
	}
	return new ArrayList<ActionProb>(this.stateActionDistributionMapping.get(hashableState));
}
 
Example #15
Source File: LinearStateDiffVF.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double value(State s) {

	double [] features = this.fvgen.features(s);

	double sum = 0.;
	for(int i = 0; i < features.length; i++){
		sum += features[i] * this.parameters[i];
	}
	return sum;
}
 
Example #16
Source File: FittedVI.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Runs a single iteration of value iteration. Note that if the state samples have not been set, it will throw a runtime exception.
 * @return the maximum change in the value function.
 */
public double runIteration(){

	if(this.samples == null){
		throw new RuntimeException("FittedVI cannot run value iteration because the state samples have not been set. Use the setSamples method or the constructor to set them.");
	}

	SparseSampling ss = new SparseSampling(this.domain, this.gamma, this.hashingFactory, this.planningDepth, this.transitionSamples);
	ss.setModel(this.model);
	ss.setValueForLeafNodes(this.leafNodeInit);
	ss.toggleDebugPrinting(false);

	List <SupervisedVFA.SupervisedVFAInstance> instances = new ArrayList<SupervisedVFA.SupervisedVFAInstance>(this.samples.size());
	List <Double> oldVs = new ArrayList<Double>(this.samples.size());
	for(State s : this.samples){
		oldVs.add(this.valueFunction.value(s));
		instances.add(new SupervisedVFA.SupervisedVFAInstance(s, Helper.maxQ(ss, s)));
	}

	this.valueFunction = this.valueFunctionTrainer.train(instances);

	double maxDiff = 0.;
	for(int i = 0; i < this.samples.size(); i++){
		double newV = this.valueFunction.value(this.samples.get(i));
		double diff = Math.abs(newV - oldVs.get(i));
		maxDiff = Math.max(maxDiff, diff);
	}

	return maxDiff;

}
 
Example #17
Source File: RandomPolicy.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public Action action(State s) {
	List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, s);
	if(gas.isEmpty()){
		throw new PolicyUndefinedException();
	}
	Action selection = gas.get(this.rand.nextInt(this.actionTypes.size()));
	return selection;
}
 
Example #18
Source File: StateReachability.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Finds the set of states that are reachable under a policy from a source state. Reachability under a source policy means
 * that the space of actions considered are those that have non-zero probability of being selected by the
 * policy and all possible outcomes of those states are considered.
 * @param domain the domain containing the model to use for evaluating reachable states
 * @param p the policy that must be followed
 * @param from the source {@link State} from which the policy would be initiated.
 * @param usingHashFactory the {@link burlap.statehashing.HashableStateFactory} used to hash states and test equality.
 * @return a {@link java.util.List} of {@link State} objects that could be reached.
 */
public static List<State> getPolicyReachableStates(SADomain domain, EnumerablePolicy p, State from, HashableStateFactory usingHashFactory){

	Set<HashableState> hashed = getPolicyReachableHashedStates(domain, p, from, usingHashFactory);
	List<State> states = new ArrayList<State>(hashed.size());
	for(HashableState sh : hashed){
		states.add(sh.s());
	}
	return states;

}
 
Example #19
Source File: ObservationUtilities.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * A helper method for easily implementing the {@link ObservationFunction#sample(State, Action)} method that
 * samples an observation by first getting all non-zero probability observations, as returned by the {@link DiscreteObservationFunction#probabilities(State, Action)}
 * method, and then sampling from the enumerated distribution. Note that enumerating all observation probabilities may be computationally
 * inefficient; therefore, it may be better to directly implement the {@link ObservationFunction#sample(State, Action)}
 * method with efficient domain specific code.
 * @param of the {@link ObservationFunction} to use.
 * @param state the true MDP state
 * @param action the action that led to the MDP state
 * @return an observation represented with a {@link State}.
 */
public static State sampleByEnumeration(DiscreteObservationFunction of, State state, Action action){
	List<ObservationProbability> obProbs = of.probabilities(state, action);
	Random rand = RandomFactory.getMapped(0);
	double r = rand.nextDouble();
	double sumProb = 0.;
	for(ObservationProbability op : obProbs){
		sumProb += op.p;
		if(r < sumProb){
			return op.observation;
		}
	}

	throw new RuntimeException("Could not sample observaiton because observation probabilities did not sum to 1; they summed to " + sumProb);
}
 
Example #20
Source File: IISimpleHashableState.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Computes the hash code for the input state.
 * @param s the input state for which a hash code is to be computed
 * @return the hash code
 */
protected final int computeHashCode(State s){

	if(s instanceof OOState){
		return computeOOHashCode((OOState)s);
	}
	return computeFlatHashCode(s);

}
 
Example #21
Source File: SDPlannerPolicy.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public boolean definedFor(State s) {
	if(this.dp == null){
		throw new RuntimeException("The valueFunction used by this Policy is not defined; therefore, the policy is undefined.");
	}
	if(this.dp.hasCachedPlanForState(s)){
		return true;
	}
	
	return false;
}
 
Example #22
Source File: VIModelLearningPlanner.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double actionProb(State s, Action a) {
	if(!VIModelLearningPlanner.this.hasComputedValueFor(s)){
		VIModelLearningPlanner.this.observedStates.add(VIModelLearningPlanner.this.hashingFactory.hashState(s));
		VIModelLearningPlanner.this.rerunVI();
	}
	return p.actionProb(s, a);
}
 
Example #23
Source File: SingleStageNormalFormGame.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double[] reward(State s, JointAction ja, State sp) {

	double [] rewards = new double[this.nPlayers];
	String [] profile = new String[this.nPlayers];
	for(int i = 0; i < this.nPlayers; i++){
		profile[i] = ja.action(i).actionName();
	}
	StrategyProfile stprofile = SingleStageNormalFormGame.getStrategyProfile(this.actionNameToIndex, profile);
	for(int i = 0; i < nPlayers; i++){
		rewards[i] = this.payouts[i].getPayout(stprofile);
	}

	return rewards;
}
 
Example #24
Source File: GrimTrigger.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public void observeOutcome(State s, JointAction jointAction,
		double[] jointReward, State sprime, boolean isTerminal) {

	int oagent = this.agentNum == 0 ? 1 : 0;
	if(jointAction.action(oagent).equals(opponentDefect)){
		grimTrigger = true;
	}


}
 
Example #25
Source File: FlatStateGridder.java    From burlap with Apache License 2.0 5 votes vote down vote up
protected void gridStateHelper(MutableState s, List<Map.Entry<Object, VariableGridSpec>> gridDims, int index, List<State> createdStates){
	if(index == gridDims.size()){
		createdStates.add(s.copy());
	}
	else{
		Object key = gridDims.get(index).getKey();
		VariableGridSpec spec = gridDims.get(index).getValue();
		double cellWidth = spec.cellWidth();
		for(int i = 0; i < spec.numGridPoints; i++){
			double value = i*cellWidth + spec.lowerVal;
			s.set(key, value);
			this.gridStateHelper(s, gridDims, index+1, createdStates);
		}
	}
}
 
Example #26
Source File: SGNaiveQLAgent.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes with a default 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param qInitizalizer the Q-value initialization method
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, QFunction qInitizalizer, HashableStateFactory hashFactory) {
	this.init(d);
	this.discount = discount;
	this.learningRate = new ConstantLR(learningRate);
	this.hashFactory = hashFactory;
	this.qInit = qInitizalizer;
	
	this.qMap = new HashMap<HashableState, List<QValue>>();
	stateRepresentations = new HashMap<HashableState, State>();
	this.policy = new EpsilonGreedy(this, 0.1);
	
	this.storedMapAbstraction = new ShallowIdentityStateMapping();
}
 
Example #27
Source File: SparseToDenseFeatures.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double[] features(State s) {

	List<StateFeature> sfs = this.sparseStateFeatures.features(s);
	double [] fv = new double[this.sparseStateFeatures.numFeatures()];
	for(StateFeature sf : sfs){
		fv[sf.id] = sf.value;
	}
	return fv;
}
 
Example #28
Source File: StateEnumerator.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Finds all states that are reachable from an input state and enumerates them
 * @param from the state from which all reachable states should be searched
 */
public void findReachableStatesAndEnumerate(State from){
	Set<HashableState> reachable = StateReachability.getReachableHashedStates(from, (SADomain)this.domain, this.hashingFactory);
	for(HashableState sh : reachable){
		this.getEnumeratedID(sh);
	}
}
 
Example #29
Source File: UniversalActionType.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public List<Action> allApplicableActions(State s) {
	return allActions;
}
 
Example #30
Source File: MultiAgentPerformancePlotter.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public void gameStarting(State s) {
	//do nothing
}