burlap.mdp.singleagent.environment.EnvironmentOutcome Java Examples

The following examples show how to use burlap.mdp.singleagent.environment.EnvironmentOutcome. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: DeepQLearner.java From burlap_caffe with Apache License 2.0

6 votes

@Override
public void updateQFunction(List<EnvironmentOutcome> samples) {

    // fill up experience replay
    if (runningRandomPolicy) {
        if (totalSteps >= replayStartSize) {
            System.out.println("Replay sufficiently filled. Beginning training...");

            setLearningPolicy(trainingPolicy);
            runningRandomPolicy = false;

            // reset stale update timer
            this.stepsSinceStale = 1;
        }

        return;
    }

    // only update every updateFreq steps
    if (totalSteps % updateFreq == 0) {
        ((DQN)vfa).updateQFunction(samples, (DQN)staleVfa);
    }
}

Example #2

Source File: FrameExperienceMemory.java From burlap_caffe with Apache License 2.0

6 votes

@Override
public void addExperience(EnvironmentOutcome eo) {
    // If this is the first frame of the episode, add the o frame.
    if (currentFrameHistory.historyLength == 0) {
        currentFrameHistory = addFrame(((ALEState)eo.o).getScreen());
    }

    // If this is experience ends in a terminal state,
    // the terminal frame will never be used so don't add it.
    FrameHistory op;
    if (eo.terminated) {
        op = new FrameHistory(currentFrameHistory.index, 0);
    } else {
        op = addFrame(((ALEState)eo.op).getScreen());
    }

    experiences[next] = new FrameExperience(currentFrameHistory, actionSet.map(eo.a), op, eo.r, eo.terminated);
    next = (next+1) % experiences.length;
    size = Math.min(size+1, experiences.length);

    currentFrameHistory = op;
}

Example #3

Source File: BeliefAgent.java From burlap with Apache License 2.0

6 votes

/**
 * Causes the agent to act for some fixed number of steps. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @param maxSteps the maximum number of steps to take in the environment
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminalOrMaxSteps(int maxSteps){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	int c = 0;
	while(!this.environment.isInTerminalState() && c < maxSteps){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);

		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
		
		c++;
		
	}
	
	return ea;
}

Example #4

Source File: MinecraftEnvironment.java From burlapcraft with GNU Lesser General Public License v3.0

6 votes

@Override
public EnvironmentOutcome executeAction(Action a) {
	State startState = this.currentObservation();
	
	ActionController ac = this.actionControllerMap.get(a.actionName());
	int delay = ac.executeAction(a);
	if (delay > 0) {
		try {
			Thread.sleep(delay);
		} catch(InterruptedException e) {
			e.printStackTrace();
		}
	}
	
	State finalState = this.currentObservation();
	
	this.lastReward = this.rewardFunction.reward(startState, a, finalState);
	
	EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());
	
	return eo;
}

Example #5

Source File: FactoredModel.java From burlap with Apache License 2.0

6 votes

@Override
public List<TransitionProb> transitions(State s, Action a) {

	if(!(this.stateModel instanceof FullStateModel)){
		throw new RuntimeException("Factored Model cannot enumerate transition distribution, because the state model does not implement FullStateModel");
	}

	List<StateTransitionProb> stps = ((FullStateModel)this.stateModel).stateTransitions(s, a);
	List<TransitionProb> tps = new ArrayList<TransitionProb>(stps.size());
	for(StateTransitionProb stp : stps){
		double r = this.rf.reward(s, a, stp.s);
		boolean t = this.tf.isTerminal(stp.s);
		TransitionProb tp = new TransitionProb(stp.p, new EnvironmentOutcome(s, a, stp.s, r, t));
		tps.add(tp);
	}

	return tps;
}

Example #6

Source File: LearningAgentToSGAgentInterface.java From burlap with Apache License 2.0

6 votes

@Override
public EnvironmentOutcome executeAction(Action ga) {

	State prevState = this.currentState;
	synchronized(this.nextAction){
		this.nextAction.val = ga;
		this.nextAction.notifyAll();
	}


	synchronized(this.nextState){
		while(this.nextState.val == null){
			try{
				nextState.wait();
			} catch(InterruptedException ex){
				ex.printStackTrace();
			}
		}
		this.nextState.val = null;
	}

	EnvironmentOutcome eo = new EnvironmentOutcome(prevState, ga, this.currentState, this.lastReward, this.curStateIsTerminal);

	return eo;
}

Example #7

Source File: SARSCollector.java From burlap with Apache License 2.0

6 votes

@Override
public SARSData collectDataFrom(State s, SampleModel model, int maxSteps, SARSData intoDataset) {
	
	if(intoDataset == null){
		intoDataset = new SARSData();
	}
	
	State curState = s;
	int nsteps = 0;
	boolean terminated = model.terminal(s);
	while(!terminated && nsteps < maxSteps){
		
		List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, curState);
		Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
		EnvironmentOutcome eo = model.sample(curState, ga);
		intoDataset.add(curState, ga, eo.r, eo.op);
		curState = eo.op;
		terminated = eo.terminated;
		nsteps++;
		
	}
	
	
	return intoDataset;
	
}

Example #8

Source File: SARSCollector.java From burlap with Apache License 2.0

6 votes

@Override
public SARSData collectDataFrom(Environment env, int maxSteps, SARSData intoDataset) {

	if(intoDataset == null){
		intoDataset = new SARSData();
	}

	int nsteps = 0;
	while(!env.isInTerminalState() && nsteps < maxSteps){
		List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, env.currentObservation());
		Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
		EnvironmentOutcome eo = env.executeAction(ga);
		intoDataset.add(eo.o, eo.a, eo.r, eo.op);

		nsteps++;
	}

	return intoDataset;
}

Example #9

Source File: DynamicWeightedAStar.java From burlap with Apache License 2.0

6 votes

public double computeF(PrioritizedSearchNode parentNode, Action generatingAction, HashableState successorState, EnvironmentOutcome eo) {
	double cumR = 0.;
	int d = 0;
	if(parentNode != null){
		double pCumR = cumulatedRewardMap.get(parentNode.s);
		cumR = pCumR + eo.r;
		
		int pD = depthMap.get(parentNode.s);
		if(!(generatingAction instanceof Option)){
			d = pD + 1;
		}
		else{
			d = pD + ((EnvironmentOptionOutcome)eo).numSteps();
		}
	}
	
	double H  = heuristic.h(successorState.s());
	lastComputedCumR = cumR;
	lastComputedDepth = d;
	double weightedE = this.epsilon * this.epsilonWeight(d);
	double F = cumR + ((1. + weightedE)*H);
	
	return F;
}

Example #10

Source File: DelegatedModel.java From burlap with Apache License 2.0

5 votes

@Override
public EnvironmentOutcome sample(State s, Action a) {
	SampleModel delgate = delgates.get(a.actionName());
	if(delgate == null){
		return defaultMode.sample(s, a);
	}
	return delgate.sample(s, a);
}

Example #11

Source File: TabularModel.java From burlap with Apache License 2.0

5 votes

@Override
public void updateModel(EnvironmentOutcome eo) {
	
	HashableState sh = this.hashingFactory.hashState(eo.o);
	HashableState shp = this.hashingFactory.hashState(eo.op);
	
	if(eo.terminated){
		this.terminalStates.add(shp);
	}
	
	StateActionNode san = this.getOrCreateActionNode(sh, eo.a);
	san.update(eo.r, shp);

}

Example #12

Source File: RMaxModel.java From burlap with Apache License 2.0

5 votes

@Override
public List<TransitionProb> transitions(State s, Action a) {
	List<TransitionProb> tps = sourceModel.transitions(s, a);
	for(TransitionProb tp : tps){
		EnvironmentOutcome eo = tp.eo;
		this.modifyEO(eo);
	}
	return tps;
}

Example #13

Source File: RMaxModel.java From burlap with Apache License 2.0

5 votes

protected void modifyEO(EnvironmentOutcome eo){
	double oldPotential = potentialFunction.potentialValue(eo.o);
	double nextPotential = 0.;
	if(!eo.terminated){
		nextPotential = potentialFunction.potentialValue(eo.op);
	}
	double bonus = gamma * nextPotential - oldPotential;
	eo.r = eo.r + bonus;

	if(!KWIKModel.Helper.stateTransitionsModeled(this, actionsTypes, eo.o)){
		eo.terminated = true;
	}
}

Example #14

Source File: FixedSizeMemory.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes with the size of the memory and whether the most recent memory should always be included
 * in the returned results from the sampling memory.
 * @param size the number of experiences to store
 * @param alwaysIncludeMostRecent if true, then the result of the {@link #sampleExperiences(int)}} will always include the most recent experience and is a uniform random sampling for the n-1 samples.
 *                                   If false, then it is a pure random sample with replacement.
 */
public FixedSizeMemory(int size, boolean alwaysIncludeMostRecent) {
	if(size < 1){
		throw new RuntimeException("FixedSizeMemory requires memory size > 0; was request size of " + size);
	}
	this.alwaysIncludeMostRecent = alwaysIncludeMostRecent;
	this.memory = new EnvironmentOutcome[size];
}

Example #15

Source File: SparseSampling.java From burlap with Apache License 2.0

5 votes

/**
 * Estimates the Q-value using sampling from the transition dynamics. This is the standard Sparse Sampling procedure.
 * @param ga the action for which the Q-value estimate is to be returned
 * @return the Q-value estimate
 */
protected double sampledQEstimate(Action ga){
	
	double sum = 0.;
	
	//generate C samples
	int c = SparseSampling.this.getCAtHeight(this.height);
	for(int i = 0; i < c; i++){
		
		//execute
		EnvironmentOutcome eo = model.sample(sh.s(), ga);
		State ns = eo.op;
		
		//manage option stepsize modifications
		int k = 1;
		if(ga instanceof Option){
			k = ((EnvironmentOptionOutcome)ga).numSteps();
		}
		
		//get reward; our rf will automatically do cumumative discounted if it's an option
		double r = eo.r;
		
		StateNode nsn = SparseSampling.this.getStateNode(ns, this.height-k);
		
		sum += r + Math.pow(SparseSampling.this.gamma, k)*nsn.estimateV();
	}
	sum /= (double)c;
	
	return sum;
}

Example #16

Source File: PerformancePlotter.java From burlap with Apache License 2.0

5 votes

@Override
synchronized public void observeEnvironmentInteraction(EnvironmentOutcome eo) {
	if(!this.collectData){
		return;
	}

	this.curTrial.stepIncrement(eo.r);
	this.curTimeStep++;

}

Example #17

Source File: RLGlueAgent.java From burlap with Apache License 2.0

5 votes

@Override
public EnvironmentOutcome executeAction(burlap.mdp.core.action.Action ga) {

	if(this.curState == null){
		this.blockUntilStateReceived();
	}

	if(!(ga instanceof RLGlueDomain.RLGlueActionType)){
		throw new RuntimeException("RLGlueEnvironment cannot execute actions that are not instances of RLGlueDomain.RLGlueSpecification.");
	}

	State prevState = this.curState;

	int actionId = ((RLGlueDomain.RLGlueActionType)ga).getInd();
	synchronized (nextAction) {
		this.nextStateReference.val = null;
		this.nextAction.val = actionId;
		this.nextAction.notifyAll();
	}

	DPrint.cl(debugCode, "Set action (" + this.nextAction.val + ")");

	State toRet;
	synchronized (this.nextStateReference) {
		while(this.nextStateReference.val == null){
			try{
				DPrint.cl(debugCode, "Waiting for state from RLGlue Server...");
				nextStateReference.wait();
			} catch(InterruptedException ex){
				ex.printStackTrace();
			}
		}
		toRet = this.curState;
		this.nextStateReference.val = null;
	}

	EnvironmentOutcome eo = new EnvironmentOutcome(prevState, ga, toRet, this.lastReward, this.curStateIsTerminal);

	return eo;
}

Example #18

Source File: FullModel.java From burlap with Apache License 2.0

5 votes

/**
 * Method to implement the {@link SampleModel#sample(State, Action)} method when the
 * {@link FullModel#transitions(State, Action)} method is implemented. Operates by calling
 * the {@link FullModel#transitions(State, Action)} method, rolls a random number, and selects a
 * transition according the probability specified by {@link FullModel#transitions(State, Action)}.
 * @param model the {@link FullModel} with the implemented {@link FullModel#transitions(State, Action)} method.
 * @param s the input state
 * @param a the action to be applied in the input state
 * @return a sampled transition ({@link EnvironmentOutcome}).
 */
public static EnvironmentOutcome sampleByEnumeration(FullModel model, State s, Action a){
	List<TransitionProb> tps = model.transitions(s, a);
	double roll = RandomFactory.getMapped(0).nextDouble();
	double sum = 0;
	for(TransitionProb tp : tps){
		sum += tp.p;
		if(roll < sum){
			return tp.eo;
		}
	}

	throw new RuntimeException("Transition probabilities did not sum to one, they summed to " + sum);
}

Example #19

Source File: FactoredModel.java From burlap with Apache License 2.0

5 votes

@Override
public EnvironmentOutcome sample(State s, Action a) {

	State sprime = this.stateModel.sample(s, a);
	double r = this.rf.reward(s, a, sprime);
	boolean t = this.tf.isTerminal(sprime);

	EnvironmentOutcome eo = new EnvironmentOutcome(s, a, sprime, r, t);

	return eo;
}

Example #20

Source File: Option.java From burlap with Apache License 2.0

5 votes

public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
	Random rand = RandomFactory.getMapped(0);
	State initial = env.currentObservation();
	State cur = initial;

	Episode episode = new Episode(cur);
	Episode history = new Episode(cur);
	double roll;
	double pT;
	int nsteps = 0;
	double r = 0.;
	double cd = 1.;
	do{
		Action a = o.policy(cur, history);
		EnvironmentOutcome eo = env.executeAction(a);
		nsteps++;
		r += cd*eo.r;
		cur = eo.op;
		cd *= discount;


		history.transition(a, eo.op, eo.r);

		AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
		episode.transition(annotatedAction, eo.op, r);


		pT = o.probabilityOfTermination(eo.op, history);
		roll = rand.nextDouble();

	}while(roll > pT && !env.isInTerminalState());

	EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);

	return eoo;

}

Example #21

Source File: BFSMarkovOptionModel.java From burlap with Apache License 2.0

5 votes

@Override
public EnvironmentOutcome sample(State s, Action a) {
	if(!(a instanceof Option)){
		return model.sample(s, a);
	}

	Option o = (Option)a;

	SimulatedEnvironment env = new SimulatedEnvironment(model, s);
	return o.control(env, discount);
}

Example #22

Source File: QLTutorial.java From burlap_examples with MIT License

5 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
	//initialize our episode object with the initial state of the environment
	Episode e = new Episode(env.currentObservation());

	//behave until a terminal state or max steps is reached
	State curState = env.currentObservation();
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		//select an action
		Action a = this.learningPolicy.action(curState);

		//take the action and observe outcome
		EnvironmentOutcome eo = env.executeAction(a);

		//record result
		e.transition(eo);

		//get the max Q value of the resulting state if it's not terminal, 0 otherwise
		double maxQ = eo.terminated ? 0. : this.value(eo.op);

		//update the old Q-value
		QValue oldQ = this.storedQ(curState, a);
		oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);


		//update state pointer to next environment state observed
		curState = eo.op;
		steps++;

	}

	return e;
}

Example #23

Source File: BeliefAgent.java From burlap with Apache License 2.0

5 votes

/**
 * Causes the agent to act until the environment reaches a termination condition. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminal(){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	while(!this.environment.isInTerminalState()){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);
		
		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
	}
	
	return ea;
}

Example #24

Source File: FrameExperienceMemory.java From burlap_caffe with Apache License 2.0

5 votes

@Override
public List<EnvironmentOutcome> sampleExperiences(int n) {
    List<FrameExperience> samples = sampleFrameExperiences(n);

    List<EnvironmentOutcome> sampleOutcomes = new ArrayList<>(samples.size());
    for (FrameExperience exp : samples) {
        sampleOutcomes.add(new EnvironmentOutcome(exp.o, actionSet.get(exp.a), exp.op, exp.r, exp.terminated));
    }

    return sampleOutcomes;
}

Example #25

Source File: UCT.java From burlap with Apache License 2.0

4 votes

/**
 * Performs a rollout in the UCT tree from the given node, keeping track of how many new nodes can be added to the tree.
 * @param node the node from which to rollout
 * @param depth the depth of the node
 * @param childrenLeftToAdd the number of new subsequent nodes that can be connected to the tree
 * @return the sample return from rolling out from this node
 */
public double treeRollOut(UCTStateNode node, int depth, int childrenLeftToAdd){
	
	numVisits++;
	
	if(depth == maxHorizon){
		return 0.;
	}
	
	if(model.terminal(node.state.s())){
		if(goalCondition != null && goalCondition.satisfies(node.state.s())){
		    foundGoal = true;
               foundGoalOnRollout = true;
		}
		DPrint.cl(debugCode, numRollOutsFromRoot + " Hit terminal at depth: " + depth);
		return 0.;
	}
	
	
	
	UCTActionNode anode = this.selectActionNode(node);
	
	if(anode == null){
		//no actions can be performed in this state
		return 0.;
	}
	
	
	
	//sample the action
	EnvironmentOutcome eo = model.sample(node.state.s(), anode.action);
	HashableState shprime = this.stateHash(eo.op);
	double r = eo.r;
	int depthChange = 1;
	if(anode.action instanceof Option){
		depthChange = ((EnvironmentOptionOutcome)eo).numSteps();
	}
	
	UCTStateNode snprime = this.queryTreeIndex(shprime, depth+depthChange);
	
	double sampledReturn;
	
	boolean shouldConnectNode = false;
	double futureReturn;
	if(snprime != null){
		
		//then this state already exists in the tree
		
		if(!anode.referencesSuccessor(snprime)){ 
			//then this successor has not been generated by this state-action pair before and should be indexed
			anode.addSuccessor(snprime);
		}
		
		futureReturn = this.treeRollOut(snprime, depth + depthChange, childrenLeftToAdd);
		sampledReturn = r + Math.pow(gamma, depthChange) * futureReturn;
		
	}
	else{
		
		//this state is not in the tree at this depth so create it
		snprime = stateNodeConstructor.generate(shprime, depth+1, actionTypes, actionNodeConstructor);
		
		//store it in the tree depending on how many new nodes have already been stored in this roll out
		if(childrenLeftToAdd > 0){
			shouldConnectNode = true;
		}
		
		//and do an exploratory sample from it
		futureReturn = this.treeRollOut(snprime, depth + depthChange, childrenLeftToAdd-1);
		sampledReturn = r + gamma * futureReturn;
		
		
	}
	
	node.n++;
	anode.update(sampledReturn);
	
	if(shouldConnectNode || foundGoalOnRollout){
		this.addNodeToIndexTree(snprime);
		anode.addSuccessor(snprime);
		uniqueStatesInTree.add(snprime.state);
	}
	
	
	return sampledReturn;
}

Example #26

Source File: ExecuteActionCommand.java From burlap with Apache License 2.0

4 votes

@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	Environment env = ((EnvironmentShell)shell).getEnv();

	OptionSet oset = this.parser.parse(argString.split(" "));
	List<String> args = (List<String>)oset.nonOptionArguments();

	if(oset.has("h")){
		os.println("[v|a] args*\nCommand to execute an action or set an action name alias.\n" +
				"If -a is not specified, then executes the action with name args[0] with parameters args[1]*\n" +
				"-v: the resulting reward, termination, and observation from execution is printed.\n" +
				"-a: assigns an action name alias where args[0] is the original action name, and args[1] is the alias.");

		return 0;
	}

	if(oset.has("a")){
		if(args.size() != 2){
			return  -1;
		}
		this.actionNameMap.put(args.get(1), args.get(0));
		return 0;
	}

	if(args.isEmpty()){
		return -1;
	}

	ActionType actionType = ((SADomain)this.domain).getAction(args.get(0));
	if(actionType == null){
		String actionName = this.actionNameMap.get(args.get(0));
		if(actionName != null){
			actionType = ((SADomain)this.domain).getAction(actionName);
		}
	}
	if(actionType != null){
		Action a = actionType.associatedAction(actionArgs(args));
		EnvironmentOutcome o = env.executeAction(a);
		if(oset.has("v")){
			os.println("reward: " + o.r);
			if(o.terminated){
				os.println("IS terminal");
			}
			else{
				os.println("is NOT terminal");
			}
			os.println(o.op.toString());
		}
		return 1;
	}


	return -1;
}

Example #27

Source File: BestFirst.java From burlap with Apache License 2.0

4 votes

/**
 * Plans and returns a {@link burlap.behavior.singleagent.planning.deterministic.SDPlannerPolicy}. If
 * a {@link State} is not in the solution path of this planner, then
 * the {@link burlap.behavior.singleagent.planning.deterministic.SDPlannerPolicy} will throw
 * a runtime exception. If you want a policy that will dynamically replan for unknown states,
 * you should create your own {@link burlap.behavior.singleagent.planning.deterministic.DDPlannerPolicy}.
 * @param initialState the initial state of the planning problem
 * @return a {@link burlap.behavior.singleagent.planning.deterministic.SDPlannerPolicy}.
 */
@Override
public SDPlannerPolicy planFromState(State initialState) {
	
	//first determine if there is even a need to plan
	HashableState sih = this.stateHash(initialState);
	
	if(internalPolicy.containsKey(sih)){
		return new SDPlannerPolicy(this); //no need to plan since this is already solved
	}
	
	
	//a plan is not cached so being planning process
	this.prePlanPrep();

	HashIndexedHeap<PrioritizedSearchNode> openQueue = new HashIndexedHeap<PrioritizedSearchNode>(new PrioritizedSearchNode.PSNComparator());
	Map<PrioritizedSearchNode, PrioritizedSearchNode> closedSet = new HashMap<PrioritizedSearchNode,PrioritizedSearchNode>();
	
	PrioritizedSearchNode ipsn = new PrioritizedSearchNode(sih, this.computeF(null, null, sih, 0.));
	this.insertIntoOpen(openQueue, ipsn);
	
	int nexpanded = 0;
	PrioritizedSearchNode lastVistedNode = null;
	double minF = ipsn.priority;
	while(openQueue.size() > 0){
		
		PrioritizedSearchNode node = openQueue.poll();
		closedSet.put(node, node);
		
		nexpanded++;
		if(node.priority < minF){
			minF = node.priority;
			DPrint.cl(debugCode, "Min F Expanded: " + minF + "; Nodes expanded so far: " + nexpanded + "; Open size: " + openQueue.size());
		}
		
		State s = node.s.s();
		if(gc.satisfies(s)){
			lastVistedNode = node;
			break;
		}
		
		if(this.model.terminal(s)){
			continue; //do not expand nodes from a terminal state
		}
	
		//generate successors
		for(ActionType a : actionTypes){
			//List<GroundedAction> gas = s.getAllGroundedActionsFor(a);
			List<Action> gas = a.allApplicableActions(s);
			for(Action ga : gas){
				EnvironmentOutcome eo = this.model.sample(s, ga);
				State ns = eo.op;
				HashableState nsh = this.stateHash(ns);
				
				double F = this.computeF(node, ga, nsh, eo.r);
				PrioritizedSearchNode npsn = new PrioritizedSearchNode(nsh, ga, node, F);
				
				//check closed
				PrioritizedSearchNode closedPSN = closedSet.get(npsn);
				if(closedPSN != null && F <= closedPSN.priority){
				    continue; //no need to reopen because this is a worse path to an already explored node
				}
				
				//check open
				PrioritizedSearchNode openPSN = openQueue.containsInstance(npsn);
				if(openPSN == null){
					this.insertIntoOpen(openQueue, npsn);
				}
				else if(F > openPSN.priority){
					this.updateOpen(openQueue, openPSN, npsn);
				}
				
				
			}
			
			
		}
		
		
		
		
	}
	
	//search to goal complete. Now follow back pointers to set policy
	this.encodePlanIntoPolicy(lastVistedNode);
	
	DPrint.cl(debugCode, "Num Expanded: " + nexpanded);
	
	this.postPlanPrep();

	return new SDPlannerPolicy(this);
	
}

Example #28

Source File: RMaxModel.java From burlap with Apache License 2.0

4 votes

@Override
public EnvironmentOutcome sample(State s, Action a) {
	EnvironmentOutcome eo = sourceModel.sample(s, a);
	modifyEO(eo);
	return eo;
}

Example #29

Source File: RMaxModel.java From burlap with Apache License 2.0

4 votes

@Override
public void updateModel(EnvironmentOutcome eo) {
	this.sourceModel.updateModel(eo);
}

Example #30

Source File: TestFrameExperienceMemory.java From burlap_caffe with Apache License 2.0

4 votes

@Test
public void TestSmall() {
    BytePointer data0 = new BytePointer((byte)0, (byte)0);
    BytePointer data1 = new BytePointer((byte)0, (byte)1);
    BytePointer data2 = new BytePointer((byte)2, (byte)3);
    BytePointer data3 = new BytePointer((byte)4, (byte)5);
    BytePointer data4 = new BytePointer((byte)6, (byte)7);
    BytePointer data5 = new BytePointer((byte)8, (byte)9);
    BytePointer data6 = new BytePointer((byte)10, (byte)11);
    BytePointer data7 = new BytePointer((byte)12, (byte)13);

    opencv_core.Mat frame0 = new opencv_core.Mat(1, 2, CV_8U, data0);
    opencv_core.Mat frame1 = new opencv_core.Mat(1, 2, CV_8U, data1);
    opencv_core.Mat frame2 = new opencv_core.Mat(1, 2, CV_8U, data2);
    opencv_core.Mat frame3 = new opencv_core.Mat(1, 2, CV_8U, data3);
    opencv_core.Mat frame4 = new opencv_core.Mat(1, 2, CV_8U, data4);
    opencv_core.Mat frame5 = new opencv_core.Mat(1, 2, CV_8U, data5);
    opencv_core.Mat frame6 = new opencv_core.Mat(1, 2, CV_8U, data6);
    opencv_core.Mat frame7 = new opencv_core.Mat(1, 2, CV_8U, data7);

    ALEState aleState0 = new ALEState(frame0);
    ALEState aleState1 = new ALEState(frame1);
    ALEState aleState2 = new ALEState(frame2);
    ALEState aleState3 = new ALEState(frame3);
    ALEState aleState4 = new ALEState(frame4);
    ALEState aleState5 = new ALEState(frame5);
    ALEState aleState6 = new ALEState(frame6);
    ALEState aleState7 = new ALEState(frame7);


    input = new FloatPointer(2 * 2);

    ActionSet actionSet = new ActionSet(new String[]{"Action0"});
    Action action0 = actionSet.get(0);

    FrameExperienceMemory experienceMemory = new FrameExperienceMemory(5, 2, new TestPreprocessor(2), actionSet);
    FrameHistory state0 = experienceMemory.currentFrameHistory;
    experienceMemory.addExperience(new EnvironmentOutcome(aleState0, action0, aleState1, 0, false));
    FrameHistory state1 = experienceMemory.currentFrameHistory;
    experienceMemory.addExperience(new EnvironmentOutcome(aleState1, action0, aleState2, 0, false));
    FrameHistory state2 = experienceMemory.currentFrameHistory;
    experienceMemory.addExperience(new EnvironmentOutcome(aleState2, action0, aleState3, 0, false));
    FrameHistory state3 = experienceMemory.currentFrameHistory;
    experienceMemory.addExperience(new EnvironmentOutcome(aleState3, action0, aleState4, 0, false));
    FrameHistory state4 = experienceMemory.currentFrameHistory;

    compare(state0, experienceMemory, new BytePointer[]{data0, data0}, 2);
    compare(state1, experienceMemory, new BytePointer[]{data0, data1}, 2);
    compare(state2, experienceMemory, new BytePointer[]{data1, data2}, 2);
    compare(state3, experienceMemory, new BytePointer[]{data2, data3}, 2);
    compare(state4, experienceMemory, new BytePointer[]{data3, data4}, 2);

    experienceMemory.addExperience(new EnvironmentOutcome(aleState4, action0, aleState5, 0, false));
    FrameHistory state5 = experienceMemory.currentFrameHistory;
    experienceMemory.addExperience(new EnvironmentOutcome(aleState5, action0, aleState6, 0, false));
    FrameHistory state6 = experienceMemory.currentFrameHistory;
    experienceMemory.addExperience(new EnvironmentOutcome(aleState6, action0, aleState7, 0, false));
    FrameHistory state7 = experienceMemory.currentFrameHistory;

    compare(state3, experienceMemory, new BytePointer[]{data2, data3}, 2);
    compare(state4, experienceMemory, new BytePointer[]{data3, data4}, 2);
    compare(state5, experienceMemory, new BytePointer[]{data4, data5}, 2);
    compare(state6, experienceMemory, new BytePointer[]{data5, data6}, 2);
    compare(state7, experienceMemory, new BytePointer[]{data6, data7}, 2);
}