burlap.mdp.core.action.Action Java Examples

The following examples show how to use burlap.mdp.core.action.Action. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExampleOOGridWorld.java    From burlap_examples with MIT License 6 votes vote down vote up
protected int actionDir(Action a){
	int adir = -1;
	if(a.actionName().equals(ACTION_NORTH)){
		adir = 0;
	}
	else if(a.actionName().equals(ACTION_SOUTH)){
		adir = 1;
	}
	else if(a.actionName().equals(ACTION_EAST)){
		adir = 2;
	}
	else if(a.actionName().equals(ACTION_WEST)){
		adir = 3;
	}
	return adir;
}
 
Example #2
Source File: BeliefAgent.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Causes the agent to act for some fixed number of steps. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @param maxSteps the maximum number of steps to take in the environment
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminalOrMaxSteps(int maxSteps){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	int c = 0;
	while(!this.environment.isInTerminalState() && c < maxSteps){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);

		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
		
		c++;
		
	}
	
	return ea;
}
 
Example #3
Source File: ExampleGridWorld.java    From burlap_examples with MIT License 6 votes vote down vote up
protected int actionDir(Action a){
	int adir = -1;
	if(a.actionName().equals(ACTION_NORTH)){
		adir = 0;
	}
	else if(a.actionName().equals(ACTION_SOUTH)){
		adir = 1;
	}
	else if(a.actionName().equals(ACTION_EAST)){
		adir = 2;
	}
	else if(a.actionName().equals(ACTION_WEST)){
		adir = 3;
	}
	return adir;
}
 
Example #4
Source File: QLTutorial.java    From burlap_examples with MIT License 6 votes vote down vote up
@Override
public List<QValue> qValues(State s) {
	//first get hashed state
	HashableState sh = this.hashingFactory.hashState(s);

	//check if we already have stored values
	List<QValue> qs = this.qValues.get(sh);

	//create and add initialized Q-values if we don't have them stored for this state
	if(qs == null){
		List<Action> actions = this.applicableActions(s);
		qs = new ArrayList<QValue>(actions.size());
		//create a Q-value for each action
		for(Action a : actions){
			//add q with initialized value
			qs.add(new QValue(s, a, this.qinit.qValue(s, a)));
		}
		//store this for later
		this.qValues.put(sh, qs);
	}

	return qs;
}
 
Example #5
Source File: LinearDiffRFVInit.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public double reward(State s, Action a, State sprime) {

	double [] features;
	if(this.rfFeaturesAreForNextState){
		features = this.rfFvGen.features(sprime);
	}
	else{
		features = this.rfFvGen.features(s);
	}
	double sum = 0.;
	for(int i = 0; i < features.length; i++){
		sum += features[i] * this.parameters[i];
	}
	return sum;

}
 
Example #6
Source File: QLearning.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the {@link QLearningStateNode} object stored for the given hashed state. If no {@link QLearningStateNode} object.
 * is stored, then it is created and has its Q-value initialize using this objects {@link burlap.behavior.valuefunction.QFunction} data member.
 * @param s the hashed state for which to get the {@link QLearningStateNode} object
 * @return the {@link QLearningStateNode} object stored for the given hashed state. If no {@link QLearningStateNode} object.
 */
protected QLearningStateNode getStateNode(HashableState s){
	
	QLearningStateNode node = qFunction.get(s);
	
	if(node == null){
		node = new QLearningStateNode(s);
		List<Action> gas = this.applicableActions(s.s());
		if(gas.isEmpty()){
			gas = this.applicableActions(s.s());
			throw new RuntimeErrorException(new Error("No possible actions in this state, cannot continue Q-learning"));
		}
		for(Action ga : gas){
			node.addQValue(ga, qInitFunction.qValue(s.s(), ga));
		}
		
		qFunction.put(s, node);
	}
	
	return node;
	
}
 
Example #7
Source File: UCTStateNode.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes the UCT state node.
 * @param s the state that this node wraps
 * @param d the depth of the node
 * @param actionTypes the possible OO-MDP actions that can be taken
 * @param constructor a {@link UCTActionNode} factory that can be used to create ActionNodes for each of the actions.
 */
public UCTStateNode(HashableState s, int d, List <ActionType> actionTypes, UCTActionConstructor constructor){
	
	state = s;
	depth = d;
	
	n = 0;
	
	actionNodes = new ArrayList<UCTActionNode>();

	List<Action> actions = ActionUtils.allApplicableActionsForTypes(actionTypes, s.s());
	for(Action a : actions){
		UCTActionNode an = constructor.generate(a);
		actionNodes.add(an);
	}

}
 
Example #8
Source File: ActionControllerPlaceBlock.java    From burlapcraft with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
public int executeAction(Action a) {
	
	System.out.println("Place Block");
	HelperActions.placeBlock();
	
	return this.delayMS;
}
 
Example #9
Source File: MinecraftModel.java    From burlapcraft with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
public State sample(State s, Action a) {

	GenericOOState gs = (GenericOOState)s.copy();

	String aname = a.actionName();
	if(aname.equals(HelperNameSpace.ACTION_MOVE)){
		simMove(gs);
	}
	else if(aname.equals(HelperNameSpace.ACTION_ROTATE_LEFT)){
		simRotate(gs, HelperNameSpace.RotDirection.size - 1);
	}
	else if(aname.equals(HelperNameSpace.ACTION_ROTATE_RIGHT)){
		simRotate(gs, 1);
	}
	else if(aname.equals(HelperNameSpace.ACTION_AHEAD)){
		simPitch(gs, 0);
	}
	else if(aname.equals(HelperNameSpace.ACTION_DOWN_ONE)){
		simPitch(gs, HelperNameSpace.VertDirection.size - 1);
	}
	else if(aname.equals(HelperNameSpace.ACTION_PLACE_BLOCK)){
		simPlace(gs);
	}
	else if(aname.equals(HelperNameSpace.ACTION_DEST_BLOCK)){
		simDestroy(gs);
	}
	else if(aname.equals(HelperNameSpace.ACTION_CHANGE_ITEM)){
		simChangeItem(gs);
	}
	else{
		throw new RuntimeException("MinecraftModel is not defined for action " + aname);
	}

	return gs;
}
 
Example #10
Source File: TitForTat.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes with the specified cooperate and defect actions for both players.
 * @param domain the domain in which this agent will play.
 * @param coop the cooperate action for both players
 * @param defect the defect action for both players
 */
public TitForTatAgentFactory(SGDomain domain, Action coop, Action defect){
	this.domain = domain;
	this.myCoop = coop;
	this.myDefect = defect;
	this.opponentCoop = coop;
	this.opponentDefect = defect;
	
}
 
Example #11
Source File: UCTTreeWalkPolicy.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double actionProb(State s, Action a) {
	if(this.action(s).equals(a)){
		return 1.;
	}
	return 0.;
}
 
Example #12
Source File: GraphDefinedDomain.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public List<Action> allApplicableActions(State s) {
	Action a = new GraphAction(aId);
	if(applicableInState(s)){
		return Arrays.asList(a);
	}
	return new ArrayList<Action>();
}
 
Example #13
Source File: QLTutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
	//initialize our episode object with the initial state of the environment
	Episode e = new Episode(env.currentObservation());

	//behave until a terminal state or max steps is reached
	State curState = env.currentObservation();
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		//select an action
		Action a = this.learningPolicy.action(curState);

		//take the action and observe outcome
		EnvironmentOutcome eo = env.executeAction(a);

		//record result
		e.transition(eo);

		//get the max Q value of the resulting state if it's not terminal, 0 otherwise
		double maxQ = eo.terminated ? 0. : this.value(eo.op);

		//update the old Q-value
		QValue oldQ = this.storedQ(curState, a);
		oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);


		//update state pointer to next environment state observed
		curState = eo.op;
		steps++;

	}

	return e;
}
 
Example #14
Source File: KWIKModel.java    From burlap with Apache License 2.0 5 votes vote down vote up
public static List<Action> unmodeledActions(KWIKModel model, List<ActionType> actionTypes, State s){
	List<Action> actions = ActionUtils.allApplicableActionsForTypes(actionTypes, s);
	List<Action> unmodeled = new ArrayList<Action>(actions.size());
	for(Action a : actions){
		if(!model.transitionIsModeled(s, a)){
			unmodeled.add(a);
		}
	}
	return unmodeled;
}
 
Example #15
Source File: FourierBasis.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public FourierBasis copy() {
	FourierBasis fb = new FourierBasis(this.inputFeatures, this.order, this.maxNonZeroCoefficients);
	fb.numStateVariables = this.numStateVariables;
	fb.coefficientVectors = new ArrayList<short[]>(this.coefficientVectors);
	fb.actionFeatureMultiplier = new HashMap<Action, Integer>(this.actionFeatureMultiplier);

	return fb;
}
 
Example #16
Source File: PolicyFromJointPolicy.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public Action action(State s) {
	if(!this.synchronizeJointActionSelectionAmongAgents){
		return ((JointAction)this.jointPolicy.action(s)).action(this.actingAgent);
	}
	else{
		return this.jointPolicy.getAgentSynchronizedActionSelection(this.actingAgent, s);
	}
}
 
Example #17
Source File: SDPlannerPolicy.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double actionProb(State s, Action a) {
	if(a.equals(this.action(s))){
		return 1.;
	}
	return 0.;
}
 
Example #18
Source File: FrostbiteRF.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double reward(State s, Action a, State sprime) {
	if (inWater.someGroundingIsTrue((OOState)sprime))
		return lostReward;
	if (iglooBuilt.someGroundingIsTrue((OOState)sprime) && onIce.someGroundingIsTrue((OOState)s))
		return goalReward;
	if (numberPlatformsActive((FrostbiteState)s) != numberPlatformsActive((FrostbiteState)sprime))
		return activatedPlatformReward;
	return defaultReward;
}
 
Example #19
Source File: EquilibriumPlayingSGAgent.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public Action action(State s) {

	List<Action> myActions = ActionUtils.allApplicableActionsForTypes(this.agentType.actions, s);
	BimatrixTuple bimatrix = this.constructBimatrix(s, myActions);
	solver.solve(bimatrix.rowPayoffs, bimatrix.colPayoffs);
	double [] strategy = solver.getLastComputedRowStrategy();
	Action selection = myActions.get(this.sampleStrategy(strategy));
	
	return selection;
}
 
Example #20
Source File: StaticWeightedAStar.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double computeF(PrioritizedSearchNode parentNode, Action generatingAction, HashableState successorState, double r) {
	double cumR = 0.;
	if(parentNode != null){
		double pCumR = cumulatedRewardMap.get(parentNode.s);
		cumR = pCumR + r;
	}
	
	double H  = heuristic.h(successorState.s());
	lastComputedCumR = cumR;
	double F = cumR + (this.epsilonP1*H);
	
	return F;
}
 
Example #21
Source File: ManualAgentsCommands.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	OptionSet oset = this.parser.parse(argString.split(" "));
	if(oset.has("h")){
		os.println("agentName actionName [actionParam*]\n" +
				"Sets the action for manual agent named agentName to the action with the name actionName. If the action" +
				"is a parameterized action, then the parameters must also be specified.");
		return 0;
	}

	List<String> args = (List<String>)oset.nonOptionArguments();

	if(args.size() < 2){
		return -1;
	}

	String agentName = args.get(0);

	String aname = args.get(1);

	ActionType action = ((SGDomain)shell.getDomain()).getActionType(aname);
	if(action == null){
		os.println("Cannot set action to " + aname + " because that action name is not known.");
		return 0;
	}

	Action ga = action.associatedAction(this.actionArgs(args));

	ManualSGAgent agent = manualAgents.get(agentName);
	if(agent == null){
		os.println("No manual agent named " + agentName);
		return 0;
	}

	agent.setNextAction(ga);

	return 0;
}
 
Example #22
Source File: ApproximateQLearning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public List<QValue> qValues(State s) {
	s = this.stateMapping.mapState(s);
	List<Action> actions = this.applicableActions(s);
	List<QValue> qs = new ArrayList<QValue>(actions.size());
	for(Action a : actions){
		QValue q = new QValue(s, a, this.qValue(s, a));
		qs.add(q);
	}
	return qs;
}
 
Example #23
Source File: TabularModel.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public boolean transitionIsModeled(State s, Action ga) {
	
	StateActionNode san = this.getStateActionNode(this.hashingFactory.hashState(s), ga);
	if(san == null){
		return false;
	}
	if(san.nTries < this.nConfident){
		return false;
	}
	
	return true;
}
 
Example #24
Source File: Visualizer.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Updates the state and action for the {@link burlap.visualizer.StateRenderLayer} and {@link burlap.visualizer.StateActionRenderLayer}; then repaints.
 * @param s the {@link State} to be painted.
 * @param a the {@link Action} to be painted.
 */
public void updateStateAction(State s, Action a){
	this.srender.updateState(s);
	if(this.sarender != null) {
		this.sarender.updateRenderedStateAction(s, a);
	}
	repaint();
}
 
Example #25
Source File: DQN.java    From burlap_caffe with Apache License 2.0 5 votes vote down vote up
@Override
public double evaluate(State state, Action action) {
    FloatBlob output = qValuesForState(state);

    int a = actionSet.map(action);
    return output.data_at(0,a,0,0);
}
 
Example #26
Source File: CPClassicModel.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public State sample(State s, Action a) {
	s = s.copy();

	if(a.actionName().equals(CartPoleDomain.ACTION_RIGHT)){
		return moveClassicModel(s, 1);
	}
	else if(a.actionName().equals(CartPoleDomain.ACTION_LEFT)){
		return moveClassicModel(s, -1);
	}
	throw new RuntimeException("Unknown action " + a.actionName());

}
 
Example #27
Source File: BoltzmannPolicyGradient.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Computes the gradient of a Boltzmann policy using the given differentiable valueFunction.
 * @param s the input state of the policy gradient
 * @param a the action whose policy probability gradient being queried
 * @param planner the differentiable {@link DifferentiableQFunction} valueFunction
 * @param beta the Boltzmann beta parameter. This parameter is the inverse of the Botlzmann temperature. As beta becomes larger, the policy becomes more deterministic. Should lie in [0, +ifnty].
 * @return the gradient of the policy.
 */
public static FunctionGradient computeBoltzmannPolicyGradient(State s, Action a, DifferentiableQFunction planner, double beta){


	//get q objects
	List<QValue> Qs = ((QProvider)planner).qValues(s);
	double [] qs = new double[Qs.size()];
	for(int i = 0; i < Qs.size(); i++){
		qs[i] = Qs.get(i).q;
	}

	//find matching action index
	int aind = -1;
	for(int i = 0; i < Qs.size(); i++){
		if(Qs.get(i).a.equals(a)){
			aind = i;
			break;
		}
	}

	if(aind == -1){
		throw new RuntimeException("Error in computing BoltzmannPolicyGradient: Could not find query action in Q-value list.");
	}

	FunctionGradient [] qGradients = new FunctionGradient[qs.length];
	for(int i = 0; i < qs.length; i++){
		qGradients[i] = planner.qGradient(s, Qs.get(i).a);
	}


	FunctionGradient policyGradient = computePolicyGradient(qs, qGradients, aind, beta);

	return policyGradient;

}
 
Example #28
Source File: DenseLinearVFA.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public double evaluate(State s, Action a) {
	this.currentStateFeatures = this.stateFeatures.features(s);
	this.currentActionOffset = this.getActionOffset(a);
	int indOff = this.currentActionOffset*this.currentStateFeatures.length;
	double val = 0;
	for(int i = 0; i < this.currentStateFeatures.length; i++){
		val += this.currentStateFeatures[i] * this.stateActionWeights[i+indOff];
	}
	this.currentValue = val;
	this.currentGradient = null;
	this.lastState = s;
	return this.currentValue;
}
 
Example #29
Source File: GreedyDeterministicQPolicy.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public Action action(State s) {
	
	List<QValue> qValues = this.qplanner.qValues(s);
	double maxQV = Double.NEGATIVE_INFINITY;
	QValue maxQ = null;
	for(QValue q : qValues){
		if(q.q > maxQV){
			maxQV = q.q;
			maxQ = q;
		}
	}
	
	return maxQ.a;
}
 
Example #30
Source File: BFSMarkovOptionModel.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public EnvironmentOutcome sample(State s, Action a) {
	if(!(a instanceof Option)){
		return model.sample(s, a);
	}

	Option o = (Option)a;

	SimulatedEnvironment env = new SimulatedEnvironment(model, s);
	return o.control(env, discount);
}