burlap.mdp.singleagent.environment.Environment Java Examples

The following examples show how to use burlap.mdp.singleagent.environment.Environment. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EnvironmentShell.java    From burlap with Apache License 2.0 6 votes vote down vote up
public EnvironmentShell(Domain domain, Environment env, InputStream is, PrintStream os) {
	super(domain, is, os);
	this.env = env;

	this.welcomeMessage = "Welcome to the BURLAP agent environment shell. Type the command 'help' to bring " +
			"up additional information about using this shell.";

	this.helpText = "Use the command help to bring up this message again. " +
			"Here is a list of standard reserved commands:\n" +
			"cmds - list all known commands.\n" +
			"aliases - list all known command aliases.\n" +
			"alias - set an alias for a command.\n" +
			"quit - terminate this shell.\n\n" +
			"Other useful, but non-reserved, commands are:\n" +
			"obs - print the current observation of the environment\n" +
			"ex - execute an action\n\n" +
			"Usually, you can get help on an individual command by passing it the -h option.";

}
 
Example #2
Source File: LSPI.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {

	Episode ea = maxSteps != -1 ? PolicyUtils.rollout(this.learningPolicy, env, maxSteps) : PolicyUtils.rollout(this.learningPolicy, env);

	this.updateDatasetWithLearningEpisode(ea);

	if(this.shouldRereunPolicyIteration(ea)){
		this.runPolicyIteration(this.maxNumPlanningIterations, this.maxChange);
		this.numStepsSinceLastLearningPI = 0;
	}
	else{
		this.numStepsSinceLastLearningPI += ea.numTimeSteps()-1;
	}

	if(episodeHistory.size() >= numEpisodesToStore){
		episodeHistory.poll();
	}
	episodeHistory.offer(ea);

	return ea;
}
 
Example #3
Source File: SARSCollector.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Collects nSamples of SARS tuples from an {@link burlap.mdp.singleagent.environment.Environment} and returns it in a {@link burlap.behavior.singleagent.learning.lspi.SARSData} object.
 * Each sequence of samples is no longer than maxEpisodeSteps and samples are collected using this object's {@link #collectDataFrom(burlap.mdp.singleagent.environment.Environment, int, SARSData)}
 * method. After each call to {@link #collectDataFrom(burlap.mdp.singleagent.environment.Environment, int, SARSData)}, the provided {@link burlap.mdp.singleagent.environment.Environment}
 * is sent the {@link burlap.mdp.singleagent.environment.Environment#resetEnvironment()} message.
 * @param env The {@link burlap.mdp.singleagent.environment.Environment} from which samples should be collected.
 * @param nSamples The number of samples to generate.
 * @param maxEpisodeSteps the maximum number of steps to take from any initial state of the {@link burlap.mdp.singleagent.environment.Environment}.
 * @param intoDataset the dataset into which the results will be collected. If null, a new dataset is created.
 * @return the intoDataset object, which is created if it is input as null.
 */
public SARSData collectNInstances(Environment env, int nSamples, int maxEpisodeSteps, SARSData intoDataset){

	if(intoDataset == null){
		intoDataset = new SARSData(nSamples);
	}

	while(nSamples > 0 && !env.isInTerminalState()){
		int maxSteps = Math.min(nSamples, maxEpisodeSteps);
		int oldSize = intoDataset.size();
		this.collectDataFrom(env, maxSteps, intoDataset);
		int delta = intoDataset.size() - oldSize;
		nSamples -= delta;
		env.resetEnvironment();
	}

	return intoDataset;

}
 
Example #4
Source File: SARSCollector.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public SARSData collectDataFrom(Environment env, int maxSteps, SARSData intoDataset) {

	if(intoDataset == null){
		intoDataset = new SARSData();
	}

	int nsteps = 0;
	while(!env.isInTerminalState() && nsteps < maxSteps){
		List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, env.currentObservation());
		Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
		EnvironmentOutcome eo = env.executeAction(ga);
		intoDataset.add(eo.o, eo.a, eo.r, eo.op);

		nsteps++;
	}

	return intoDataset;
}
 
Example #5
Source File: RewardCommand.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	Environment env = ((EnvironmentShell)shell).getEnv();
	OptionSet oset = this.parser.parse(argString.split(" "));
	if(oset.has("h")){
		os.println("Prints the last reward generated by the environment.");
		return 0;
	}

	os.println("" + env.lastReward());

	return 0;
}
 
Example #6
Source File: QLTutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
	//initialize our episode object with the initial state of the environment
	Episode e = new Episode(env.currentObservation());

	//behave until a terminal state or max steps is reached
	State curState = env.currentObservation();
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		//select an action
		Action a = this.learningPolicy.action(curState);

		//take the action and observe outcome
		EnvironmentOutcome eo = env.executeAction(a);

		//record result
		e.transition(eo);

		//get the max Q value of the resulting state if it's not terminal, 0 otherwise
		double maxQ = eo.terminated ? 0. : this.value(eo.op);

		//update the old Q-value
		QValue oldQ = this.storedQ(curState, a);
		oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);


		//update state pointer to next environment state observed
		curState = eo.op;
		steps++;

	}

	return e;
}
 
Example #7
Source File: AtariDQN.java    From burlap_caffe with Apache License 2.0 5 votes vote down vote up
public AtariDQN(DeepQLearner learner, DeepQTester tester, DQN vfa, ActionSet actionSet, Environment env,
                FrameExperienceMemory trainingMemory,
                FrameExperienceMemory testMemory) {
    super(learner, tester, vfa, actionSet, env);

    this.trainingMemory = trainingMemory;
    this.testMemory = testMemory;
}
 
Example #8
Source File: PolicyUtils.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state
 * in the environment is reached.
 * @param p the {@link Policy}
 * @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated.
 * @return An {@link Episode} object specifying the interaction with the environment.
 */
public static Episode rollout(Policy p, Environment env){

	Episode ea = new Episode(env.currentObservation());

	do{
		followAndRecordPolicy(p, env, ea);
	}while(!env.isInTerminalState());

	return ea;
}
 
Example #9
Source File: PolicyUtils.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state
 * in the environment is reached or when the provided number of steps has been taken.
 * @param p the {@link Policy}
 * @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated.
 * @param numSteps the maximum number of steps to take in the environment.
 * @return An {@link Episode} object specifying the interaction with the environment.
 */
public static Episode rollout(Policy p, Environment env, int numSteps){

	Episode ea = new Episode(env.currentObservation());

	int nSteps;
	do{
		followAndRecordPolicy(p, env, ea);
		nSteps = ea.numTimeSteps();
	}while(!env.isInTerminalState() && nSteps < numSteps);

	return ea;
}
 
Example #10
Source File: TrainingHelper.java    From burlap_caffe with Apache License 2.0 5 votes vote down vote up
public TrainingHelper(DeepQLearner learner, Tester tester, DQN vfa, ActionSet actionSet, Environment env) {
    this.learner = learner;
    this.vfa = vfa;
    this.tester = tester;
    this.env = env;
    this.actionSet = actionSet;

    this.stepCounter = 0;
    this.episodeCounter = 0;
}
 
Example #11
Source File: Option.java    From burlap with Apache License 2.0 5 votes vote down vote up
public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
	Random rand = RandomFactory.getMapped(0);
	State initial = env.currentObservation();
	State cur = initial;

	Episode episode = new Episode(cur);
	Episode history = new Episode(cur);
	double roll;
	double pT;
	int nsteps = 0;
	double r = 0.;
	double cd = 1.;
	do{
		Action a = o.policy(cur, history);
		EnvironmentOutcome eo = env.executeAction(a);
		nsteps++;
		r += cd*eo.r;
		cur = eo.op;
		cd *= discount;


		history.transition(a, eo.op, eo.r);

		AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
		episode.transition(annotatedAction, eo.op, r);


		pT = o.probabilityOfTermination(eo.op, history);
		roll = rand.nextDouble();

	}while(roll > pT && !env.isInTerminalState());

	EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);

	return eoo;

}
 
Example #12
Source File: LearningAlgorithmExperimenter.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes.
 * The trialLength will be interpreted as the number of episodes, but it can be reinterpreted as a total number of steps per trial using the
 * {@link #toggleTrialLengthInterpretation(boolean)}.
 * @param testEnvironment the test {@link burlap.mdp.singleagent.environment.Environment} in which experiments will be performed.
 * @param nTrials the number of trials
 * @param trialLength the length of the trials (by default in episodes, but can be intereted as maximum step length)
 * @param agentFactories factories to generate the agents to be tested.
 */
public LearningAlgorithmExperimenter(Environment testEnvironment, int nTrials, int trialLength, LearningAgentFactory...agentFactories){
	
	if(agentFactories.length == 0){
		throw new RuntimeException("Zero agent factories provided. At least one must be given for an experiment");
	}
	
	this.testEnvironment = testEnvironment;
	this.nTrials = nTrials;
	this.trialLength = trialLength;
	this.agentFactories = agentFactories;
}
 
Example #13
Source File: EnvironmentServer.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * If the input {@link burlap.mdp.singleagent.environment.Environment} is an instance {@link EnvironmentServerInterface},
 * then all the input observers are added to it and it is returned. Otherwise, a new {@link EnvironmentServer}
 * is created around it, with all of the observers added.
 * @param env the {@link burlap.mdp.singleagent.environment.Environment} that will have observers added to it
 * @param observers the {@link EnvironmentObserver} objects to add.
 * @return the input {@link burlap.mdp.singleagent.environment.Environment} or an {@link EnvironmentServer}.
 */
public static EnvironmentServerInterface constructServerOrAddObservers(Environment env, EnvironmentObserver...observers){
	if(env instanceof EnvironmentServerInterface){
		((EnvironmentServerInterface)env).addObservers(observers);
		return (EnvironmentServerInterface)env;
	}
	else{
		return constructor(env, observers);
	}
}
 
Example #14
Source File: EnvironmentDelegation.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the root {@link burlap.mdp.singleagent.environment.Environment} delegate. Useful
 * if an {@link EnvironmentDelegation} is expected to have
 * a delegate that is an {@link EnvironmentDelegation}.
 * @param env the {@link EnvironmentDelegation} to inspect
 * @return the root {@link burlap.mdp.singleagent.environment.Environment} delegate
 */
public static Environment getRootEnvironmentDelegate(EnvironmentDelegation env){
	Environment envChild = env.getEnvironmentDelegate();
	if(envChild instanceof EnvironmentDelegation){
		envChild = getRootEnvironmentDelegate((EnvironmentDelegation)envChild);
	}
	return envChild;
}
 
Example #15
Source File: EnvironmentDelegation.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the {@link burlap.mdp.singleagent.environment.Environment} or {@link burlap.mdp.singleagent.environment.Environment}
 * delegate that implements the class/interface type, or null if none do.
 * @param env An {@link burlap.mdp.singleagent.environment.Environment} to inspect
 * @param type the class/interface type against which and {@link burlap.mdp.singleagent.environment.Environment} or
 *             its delegates are being compared.
 * @return the {@link burlap.mdp.singleagent.environment.Environment} delegate implementing the input type or null if none do.
 */
public static Environment getDelegateImplementing(Environment env, Class<?> type){

	if(type.isAssignableFrom(env.getClass())){
		return env;
	}
	else if(env instanceof EnvironmentDelegation){
		return getDelegateImplementing(((EnvironmentDelegation)env).getEnvironmentDelegate(), type);
	}

	return null;

}
 
Example #16
Source File: VisualExplorer.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	Environment env = ((EnvironmentShell)shell).getEnv();
	OptionSet oset = this.parser.parse(argString.split(" "));

	if(oset.has("h")){
		os.println("[-t interval] [-f] [-c]\n\n" +
				"Used to set the associated visual explorer to poll the environment for the current state and update the display on a fixed interval.\n" +
				"-t interval: turns on live polling and causes the environment to be polled every interval milliseconds.\n" +
				"-f: turns off live polling.\n" +
				"-c: returns the status of live polling (enabled/disabled and rate");

	}

	if(oset.has("t")){
		String val = (String)oset.valueOf("t");
		int interval = Integer.valueOf(val);
		startLiveStatePolling(interval);
	}
	else if(oset.has("f")){
		stopLivePolling();
	}

	if(oset.has("c")){
		if(livePollingTimer != null && livePollingTimer.isRunning()){
			os.println("Live polling is enabled and polls every " + pollInterval + " milliseconds.");
		}
		else{
			os.println("Live polling is disabled.");
		}
	}

	return 0;
}
 
Example #17
Source File: ObservationCommand.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	OptionSet oset = this.parser.parse(argString.split(" "));
	if(oset.has("h")){
		os.println("Prints the current observation from the environment.");
		return 0;
	}

	Environment env = ((EnvironmentShell)shell).getEnv();
	os.println(env.currentObservation().toString());
	return 0;
}
 
Example #18
Source File: IsTerminalCommand.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	Environment env = ((EnvironmentShell)shell).getEnv();
	OptionSet oset = this.parser.parse(argString.split(" "));
	if(oset.has("h")){
		os.println("Prints whether the environment is in a terminal state or not (true if so, false otherwise)");
		return 0;
	}

	os.println("" + env.isInTerminalState());

	return 0;
}
 
Example #19
Source File: RemoveStateObjectCommand.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	Environment env = ((EnvironmentShell)shell).getEnv();
	OptionSet oset = this.parser.parse(argString.split(" "));
	List<String> args = (List<String>)oset.nonOptionArguments();
	if(oset.has("h")){
		os.println("[-v] objectName\nRemoves an OO-MDP object instance with name objectName" +
				"from the current state of the environment. The environment must implement StateSettableEnvironment " +
				"for this operation to work.\n\n" +
				"-v print the new Environment state after completion.");
		return 0;
	}

	StateSettableEnvironment senv = (StateSettableEnvironment) EnvironmentDelegation.Helper.getDelegateImplementing(env, StateSettableEnvironment.class);
	if(senv == null){
		os.println("Cannot remove object from environment state, because the environment does not implement StateSettableEnvironment");
		return 0;
	}

	if(args.size() != 1){
		return -1;
	}

	State s = env.currentObservation();

	if(!(s instanceof MutableOOState)){
		os.println("Cannot remove object from state, because state is not a MutableOOState");
		return 0;
	}

	((MutableOOState)s).removeObject(args.get(0));
	senv.setCurStateTo(s);

	if(oset.has("v")){
		os.println(env.currentObservation().toString());
	}

	return 1;
}
 
Example #20
Source File: SimpleTester.java    From burlap_caffe with Apache License 2.0 4 votes vote down vote up
@Override
public Episode runTestEpisode(Environment env, int maxSteps) {
    return PolicyUtils.rollout(policy, env, maxSteps);
}
 
Example #21
Source File: ResetEnvCommand.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {
	Environment env = ((EnvironmentShell)shell).getEnv();
	env.resetEnvironment();
	return 1;
}
 
Example #22
Source File: ListPropFunctions.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {
	Environment env = ((EnvironmentShell)shell).getEnv();

	OptionSet oset = this.parser.parse(argString.split(" "));

	if(oset.has("h")){
		os.println("[s]\nCommand to list all true (or false) grounded propositional function for the current environment observation.\n" +
				"-f: list false grounded propositional functions, rather than true ones. " +
				"-n: list the name of all propositional functions, rather than grounded evaluations\n" +
				"-s: evaluate propositional functions on POMDP environment hidden state, rather than environment observation. Environment must extend SimulatedPOEnvironment");

		return 0;
	}


	if(!(shell.getDomain() instanceof OODomain)){
		os.println("cannot query propositional functions because the domain is not an OODomain");
		return 0;
	}

	if(oset.has("n")){
		for(PropositionalFunction pf : ((OODomain)shell.getDomain()).propFunctions()){
			os.println(pf.getName());
		}
		return 0;
	}


	State qs = env.currentObservation();

	if(oset.has("s")){
		if(!(env instanceof SimulatedPOEnvironment)){
			os.println("Cannot query applicable actions with respect to POMDP hidden state, because the environment does not extend SimulatedPOEnvironment.");
			return 0;
		}
		qs = ((SimulatedPOEnvironment)env).getCurrentHiddenState();
	}

	List<GroundedProp> gps = PropositionalFunction.allGroundingsFromList(((OODomain)shell.getDomain()).propFunctions(), (OOState)qs);
	for(GroundedProp gp : gps){
		if(gp.isTrue((OOState)qs) == !oset.has("f")){
			os.println(gp.toString());
		}
	}

	return 0;
}
 
Example #23
Source File: VisualExplorer.java    From burlap with Apache License 2.0 4 votes vote down vote up
protected void init(SADomain domain, Environment env, Visualizer painter, int w, int h){
	
	this.domain = domain;
	this.env = env;
	this.painter = painter;
	this.keyActionMap = new HashMap <String, Action>();
	
	this.keyShellMap.put("`", "reset");
	
	this.cWidth = w;
	this.cHeight = h;
	
	this.propViewer = new TextArea();
	this.propViewer.setEditable(false);

	
}
 
Example #24
Source File: ActionControllerDestroyBlock.java    From burlapcraft with GNU Lesser General Public License v3.0 4 votes vote down vote up
public ActionControllerDestroyBlock(int delayMS, Environment e) {
	this.delayMS = delayMS;
	this.environment = e;
}
 
Example #25
Source File: ActionControllerChangeYaw.java    From burlapcraft with GNU Lesser General Public License v3.0 4 votes vote down vote up
public ActionControllerChangeYaw(int delayMS, Environment e, int d) {
	this.delayMS = delayMS;
	this.environment = e;
	this.direction = d;
}
 
Example #26
Source File: PolicyUtils.java    From burlap with Apache License 2.0 4 votes vote down vote up
/**
 * Follows this policy for one time step in the provided {@link burlap.mdp.singleagent.environment.Environment} and
 * records the interaction in the provided {@link Episode} object. If the policy
 * selects an {@link burlap.behavior.singleagent.options.Option}, then how the option's interaction in the environment
 * is recorded depends on the {@link #rolloutsDecomposeOptions} flag.
 * If {@link #rolloutsDecomposeOptions} is false, then the option is recorded as a single action. If it is true, then
 * the individual primitive actions selected by the environment are recorded.
 * @param p the {@link Policy}
 * @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy should be followed.
 * @param ea The {@link Episode} object to which the action selection will be recorded.
 */
protected static void followAndRecordPolicy(Policy p, Environment env, Episode ea){


	//follow policy
	Action a = p.action(env.currentObservation());
	if(a == null){
		throw new PolicyUndefinedException();
	}


	EnvironmentOutcome eo = env.executeAction(a);


	if(a instanceof Option && rolloutsDecomposeOptions){
		ea.appendAndMergeEpisodeAnalysis(((EnvironmentOptionOutcome)eo).episode);
	}
	else{
		ea.transition(a, eo.op, eo.r);
	}

}
 
Example #27
Source File: ActionControllerPlaceBlock.java    From burlapcraft with GNU Lesser General Public License v3.0 4 votes vote down vote up
public ActionControllerPlaceBlock(int delayMS, Environment e) {
	this.delayMS = delayMS;
	this.environment = e;
}
 
Example #28
Source File: ActionControllerMoveForward.java    From burlapcraft with GNU Lesser General Public License v3.0 4 votes vote down vote up
public ActionControllerMoveForward(int delayMS, Environment e) {
	this.delayMS = delayMS;
	this.environment = e;
}
 
Example #29
Source File: PerformancePlotter.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public void observeEnvironmentReset(Environment resetEnvironment) {
	//do nothing
}
 
Example #30
Source File: EnvironmentShell.java    From burlap with Apache License 2.0 4 votes vote down vote up
public Environment getEnv() {
	return env;
}