burlap.mdp.auxiliary.StateGenerator Java Examples

The following examples show how to use burlap.mdp.auxiliary.StateGenerator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: World.java    From burlap with Apache License 2.0 6 votes vote down vote up
protected void init(SGDomain domain, JointModel jam, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg, StateMapping abstractionForAgents){
	this.domain = domain;
	this.worldModel = jam;
	this.jointRewardFunction = jr;
	this.tf = tf;
	this.initialStateGenerator = sg;
	this.abstractionForAgents = abstractionForAgents;
	
	agents = new ArrayList<SGAgent>();
	
	agentCumulativeReward = new HashedAggregator<String>();
	
	worldObservers = new ArrayList<WorldObserver>();

	this.generateNewCurrentState();
	
	debugId = 284673923;
}
 
Example #2
Source File: SARSCollector.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Collects nSamples of SARS tuples and returns it in a {@link SARSData} object.
 * @param sg a state generator for finding initial state from which data can be collected.
 * @param model the model of the world to use
 * @param nSamples the number of SARS samples to collect.
 * @param maxEpisodeSteps the maximum number of steps that can be taken when rolling out from a state generated by {@link StateGenerator} sg, before a new rollout is started.
 * @param intoDataset the dataset into which the results will be collected. If null, a new dataset is created.
 * @return the intoDataset object, which is created if it is input as null.
 */
public SARSData collectNInstances(StateGenerator sg, SampleModel model, int nSamples, int maxEpisodeSteps, SARSData intoDataset){
	
	if(intoDataset == null){
		intoDataset = new SARSData(nSamples);
	}
	
	while(nSamples > 0){
		int maxSteps = Math.min(nSamples, maxEpisodeSteps);
		int oldSize = intoDataset.size();
		this.collectDataFrom(sg.generateState(), model, maxSteps, intoDataset);
		int delta = intoDataset.size() - oldSize;
		nSamples -= delta;
	}
	
	return intoDataset;
	
}
 
Example #3
Source File: RLGlueEnvironment.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Constructs with all the BURLAP information necessary for generating an RLGlue Environment.
 * @param domain the BURLAP domain
 * @param stateGenerator a generated for generating states at the start of each episode.
 * @param stateFlattener used to flatten states into a numeric representation
 * @param valueRanges the value ranges of the flattened vector state
 * @param rewardRange the reward function value range
 * @param isEpisodic whether the task is episodic or continuing
 * @param discount the discount factor to use for the task
 */
public RLGlueEnvironment(SADomain domain, StateGenerator stateGenerator, DenseStateFeatures stateFlattener,
						 DoubleRange[] valueRanges,
						 DoubleRange rewardRange, boolean isEpisodic, double discount){

	if(domain.getModel() == null){
		throw new RuntimeException("RLGlueEnvironment requires a BURLAP domain with a SampleModel, but the domain does not provide one.");
	}

	this.domain = domain;
	this.stateGenerator = stateGenerator;
	this.stateFlattener = stateFlattener;
	this.valueRanges = valueRanges;
	this.rewardRange = rewardRange;
	this.isEpisodic = isEpisodic;
	this.discount = discount;
	
	State exampleState = this.stateGenerator.generateState();
	int actionInd = 0;
	for(ActionType a : this.domain.getActionTypes()){
		List<burlap.mdp.core.action.Action> gas = a.allApplicableActions(exampleState);
		for(burlap.mdp.core.action.Action ga : gas){
			this.actionMap.put(actionInd, ga);
			actionInd++;
		}
	}
	
	//set this to be the first state returned
	this.curState = exampleState;
	
	
}
 
Example #4
Source File: SimulatedEnvironment.java    From burlap with Apache License 2.0 5 votes vote down vote up
public SimulatedEnvironment(SADomain domain, StateGenerator stateGenerator) {
	this.stateGenerator = stateGenerator;
	this.curState = stateGenerator.generateState();
	if(domain.getModel() == null){
		throw new RuntimeException("SimulatedEnvironment requires a Domain with a model, but the input domain does not have one.");
	}
	this.model = domain.getModel();
}
 
Example #5
Source File: ConstantWorldGenerator.java    From burlap with Apache License 2.0 5 votes vote down vote up
protected void CWGInit(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg, StateMapping abstractionForAgents){
	this.domain = domain;
	this.jointRewardFunctionModel = jr;
	this.tf = tf;
	this.initialStateGenerator = sg;
	this.abstractionForAgents = abstractionForAgents;
}
 
Example #6
Source File: TigerDomain.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link burlap.mdp.auxiliary.StateGenerator} that some of the of the time generates an hidden tiger state with the tiger on the
 * left side, and others on the right. Probability of left side is specified with the argument probLeft
 * @param probLeft the probability that a state with the tiger on the left side will be generated
 * @return a {@link burlap.mdp.auxiliary.StateGenerator}
 */
public static StateGenerator randomSideStateGenerator(final double probLeft){
	return new StateGenerator() {
		@Override
		public State generateState() {
			double roll = RandomFactory.getMapped(0).nextDouble();
			return roll < probLeft ? new TigerState(VAL_LEFT) : new TigerState(VAL_RIGHT);
		}
	};
}
 
Example #7
Source File: MCVideo.java    From burlap_examples with MIT License 5 votes vote down vote up
public static void main(String[] args) {

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		NormalizedVariableFeatures features = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));
		FourierBasis fb = new FourierBasis(features, 4);

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();

		SimulatedEnvironment env = new SimulatedEnvironment(domain,
				new MCState(mcGen.physParams.valleyPos(), 0));
		EnvironmentServer envServ = new EnvironmentServer(env, vob);

		for(int i = 0; i < 100; i++){
			PolicyUtils.rollout(p, envServ);
			envServ.resetEnvironment();
		}

		System.out.println("Finished");

	}
 
Example #8
Source File: TigerDomain.java    From burlap with Apache License 2.0 4 votes vote down vote up
/**
 * Main method for interacting with the tiger domain via an {@link EnvironmentShell}
 * By default, the TerminalExplorer interacts with the partially observable environment ({@link burlap.mdp.singleagent.pomdp.SimulatedPOEnvironment}),
 * which means you only get to see the observations that the agent would. However, if you set the first command-line argument
 * to be "h", then the explorer will explorer the underlying fully observable MDP states.
 * @param args either empty or ["h"]; provide "h" to explorer the underlying fully observable tiger MDP.
 */
public static void main(String [] args){


	TigerDomain dgen = new TigerDomain(false);
	PODomain domain = (PODomain)dgen.generateDomain();

	StateGenerator tigerGenerator = TigerDomain.randomSideStateGenerator(0.5);

	Environment observableEnv = new SimulatedEnvironment(domain, tigerGenerator);
	Environment poEnv = new SimulatedPOEnvironment(domain, tigerGenerator);

	Environment envTouse = poEnv;
	if(args.length > 0 && args[0].equals("h")){
	    envTouse = observableEnv;
	}

	EnvironmentShell shell = new EnvironmentShell(domain, envTouse);
	shell.start();

	
	
}
 
Example #9
Source File: ApprenticeshipLearningRequest.java    From burlap with Apache License 2.0 4 votes vote down vote up
public ApprenticeshipLearningRequest(SADomain domain, Planner planner, DenseStateFeatures featureGenerator, List<Episode> expertEpisodes, StateGenerator startStateGenerator) {
	super(domain, planner, expertEpisodes);
	this.initDefaults();
	this.setFeatureGenerator(featureGenerator);
	this.setStartStateGenerator(startStateGenerator);
}
 
Example #10
Source File: MinecraftEnvironment.java    From burlapcraft with GNU Lesser General Public License v3.0 4 votes vote down vote up
public void setStateGenerator(StateGenerator stateGenerator) {
	this.stateGenerator = stateGenerator;
}
 
Example #11
Source File: SingleStageNormalFormGame.java    From burlap with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a world instance for this game in which the provided agents join in the order they are passed. This object
 * uses the provided domain instance generated from this object instead of generating a new one.
 * @param domain the SGDomain instance
 * @param agents the agents to join the created world.
 * @return a world instance with the provided agents having already joined.
 */
public World createRepeatedGameWorld(SGDomain domain, SGAgent...agents){
	
	//grab the joint reward function from our bimatrix game in the more general BURLAP joint reward function interface
	JointRewardFunction jr = this.getJointRewardFunction();
	
	//game repeats forever unless manually stopped after T times.
	TerminalFunction tf = new NullTermination();
	
	//set up the initial state generator for the world, which for a bimatrix game is trivial
	StateGenerator sg = new NFGameState(agents.length);

	//create a world to synchronize the actions of agents in this domain and record results
	World w = new World(domain, jr, tf, sg);
	
	for(SGAgent a : agents){
		w.join(a);
	}
	
	return w;
	
}
 
Example #12
Source File: MinecraftEnvironment.java    From burlapcraft with GNU Lesser General Public License v3.0 4 votes vote down vote up
public StateGenerator getStateGenerator() {
	return stateGenerator;
}
 
Example #13
Source File: SimulatedPOEnvironment.java    From burlap with Apache License 2.0 4 votes vote down vote up
public SimulatedPOEnvironment(PODomain domain, StateGenerator hiddenStateGenerator) {
	super(domain, hiddenStateGenerator);
	this.poDomain = domain;
}
 
Example #14
Source File: SimulatedEnvironment.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void setStateGenerator(StateGenerator stateGenerator) {
	this.stateGenerator = stateGenerator;
}
 
Example #15
Source File: SimulatedEnvironment.java    From burlap with Apache License 2.0 4 votes vote down vote up
public StateGenerator getStateGenerator() {
	return stateGenerator;
}
 
Example #16
Source File: SimulatedEnvironment.java    From burlap with Apache License 2.0 4 votes vote down vote up
public SimulatedEnvironment(SampleModel model, StateGenerator stateGenerator) {
	this.stateGenerator = stateGenerator;
	this.curState = stateGenerator.generateState();
	this.model = model;
}
 
Example #17
Source File: ContinuousDomainTutorial.java    From burlap_examples with MIT License 4 votes vote down vote up
public static void MCLSPIRBF(){

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();
		MCState s = new MCState(mcGen.physParams.valleyPos(), 0.);

		NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		RBFFeatures rbf = new RBFFeatures(inputFeatures, true);
		FlatStateGridder gridder = new FlatStateGridder()
				.gridDimension("x", mcGen.physParams.xmin, mcGen.physParams.xmax, 5)
				.gridDimension("v", mcGen.physParams.vmin, mcGen.physParams.vmax, 5);

		List<State> griddedStates = gridder.gridState(s);
		DistanceMetric metric = new EuclideanDistance();
		for(State g : griddedStates){
			rbf.addRBF(new GaussianRBF(inputFeatures.features(g), metric, 0.2));
		}

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(rbf, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();


		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);
		env.addObservers(vob);

		for(int i = 0; i < 5; i++){
			PolicyUtils.rollout(p, env);
			env.resetEnvironment();
		}

		System.out.println("Finished");


	}
 
Example #18
Source File: ContinuousDomainTutorial.java    From burlap_examples with MIT License 3 votes vote down vote up
public static void MCLSPIFB(){

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));

		FourierBasis fb = new FourierBasis(inputFeatures, 4);

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();

		SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0.));
		env.addObservers(vob);

		for(int i = 0; i < 5; i++){
			PolicyUtils.rollout(p, env);
			env.resetEnvironment();
		}

		System.out.println("Finished");


	}
 
Example #19
Source File: ConstantWorldGenerator.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes the WorldGenerator.
 * @param domain the SGDomain the world will use
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param sg a state generator for generating initial states of a game
 * @param abstractionForAgents the abstract state representation that agents will be provided
 */
public ConstantWorldGenerator(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg, StateMapping abstractionForAgents){
	this.CWGInit(domain, jr, tf, sg, abstractionForAgents);
}
 
Example #20
Source File: ConstantWorldGenerator.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * This constructor is deprecated, because {@link burlap.mdp.stochasticgames.SGDomain} objects are now expected
 * to have a {@link JointModel} associated with them, making the constructor parameter for it
 * unnecessary. Instead use the constructor {@link #ConstantWorldGenerator(burlap.mdp.stochasticgames.SGDomain, JointRewardFunction, burlap.mdp.core.TerminalFunction, StateGenerator, burlap.mdp.auxiliary.StateMapping)}
 * @param domain the SGDomain the world will use
 * @param jam the joint action model that specifies the transition dynamics
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param sg a state generator for generating initial states of a game
 * @param abstractionForAgents the abstract state representation that agents will be provided
 */
@Deprecated
public ConstantWorldGenerator(SGDomain domain, JointModel jam, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg, StateMapping abstractionForAgents){
	this.CWGInit(domain, jr, tf, sg, abstractionForAgents);
}
 
Example #21
Source File: ConstantWorldGenerator.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes the WorldGenerator.
 * @param domain the SGDomain the world will use
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param sg a state generator for generating initial states of a game
 */
public ConstantWorldGenerator(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg){
	this.CWGInit(domain, jr, tf, sg, new IdentityStateMapping());
}
 
Example #22
Source File: ConstantWorldGenerator.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * This constructor is deprecated, because {@link burlap.mdp.stochasticgames.SGDomain} objects are now expected
 * to have a {@link JointModel} associated with them, making the constructor parameter for it
 * unnecessary. Instead use the constructor {@link #ConstantWorldGenerator(burlap.mdp.stochasticgames.SGDomain, JointRewardFunction, burlap.mdp.core.TerminalFunction, StateGenerator)}
 * @param domain the SGDomain the world will use
 * @param jam the joint action model that specifies the transition dynamics
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param sg a state generator for generating initial states of a game
 */
@Deprecated
public ConstantWorldGenerator(SGDomain domain, JointModel jam, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg){
	this.CWGInit(domain, jr, tf, sg, new IdentityStateMapping());
}
 
Example #23
Source File: World.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes the world
 * @param domain the SGDomain the world will use
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param sg a state generator for generating initial states of a game
 * @param abstractionForAgents the abstract state representation that agents will be provided
 */
public World(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg, StateMapping abstractionForAgents){
	this.init(domain, domain.getJointActionModel(), jr, tf, sg, abstractionForAgents);
}
 
Example #24
Source File: World.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes the world.
 * @param domain the SGDomain the world will use
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param sg a state generator for generating initial states of a game
 */
public World(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg){
	this.init(domain, domain.getJointActionModel(), jr, tf, sg, new IdentityStateMapping());
}
 
Example #25
Source File: TigerDomain.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Returns a {@link burlap.mdp.auxiliary.StateGenerator} that 50% of the time generates an hidden tiger state with the tiger on the
 * left side, and 50% time on the right.
 * @return a {@link burlap.mdp.auxiliary.StateGenerator}
 */
public static StateGenerator randomSideStateGenerator(){
	return randomSideStateGenerator(0.5);
}
 
Example #26
Source File: ApprenticeshipLearningRequest.java    From burlap with Apache License 2.0 votes vote down vote up
public StateGenerator getStartStateGenerator() {return this.startStateGenerator;} 
Example #27
Source File: ApprenticeshipLearningRequest.java    From burlap with Apache License 2.0 votes vote down vote up
public void setStartStateGenerator(StateGenerator startStateGenerator) { this.startStateGenerator = startStateGenerator;}