burlap.mdp.core.TerminalFunction Java Examples

The following examples show how to use burlap.mdp.core.TerminalFunction. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultiAgentExperimenter.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes. Trial length is interepted either has the number of episodes per trial or the total number of steps across all episodes.
 * By default the length will be interepted as the number of episodes, but this interpetation can be changed with the {@link #toggleTrialLengthInterpretation(boolean)}
 * method. The agents will join generated worlds in the order that they appear in the list.
 * @param worldGenerator the world generator used to create a clean world for each trial.
 * @param tf the terminal function used to interpret the end of episodes
 * @param nTrials the number of trials over which performance will be gathered
 * @param trialLength the length of trial
 * @param agentFactoriesAndTypes the agent factories and the type of agent the generated agent will join the world as
 */
public MultiAgentExperimenter(WorldGenerator worldGenerator, TerminalFunction tf, int nTrials, int trialLength, AgentFactoryAndType...agentFactoriesAndTypes){
	
	if(agentFactoriesAndTypes.length == 0){
		throw new RuntimeException("Zero agent factories provided. At least one must be given for an experiment");
	}
	this.worldGenerator = worldGenerator;
	this.tf = tf;
	this.nTrials = nTrials;
	this.trialLength = trialLength;
	this.agentFactoriesAndTypes = agentFactoriesAndTypes;
	
	this.displayPlots = true;

	
}
 
Example #2
Source File: MADPPlannerFactory.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes.
 * @param domain the domain in which to perform planing
 * @param jointModel the joint action model
 * @param jointRewardFunction the joint reward function
 * @param terminalFunction the terminal state function
 * @param discount the discount
 * @param hashingFactory the hashing factory to use for storing states
 * @param qInit the q-value initialization function to use.
 * @param backupOperator the backup operator that defines the solution concept being solved
 * @param maxDelta the threshold that causes VI to terminate when the max Q-value change is less than it
 * @param maxIterations the maximum number of iterations allowed
 */
public MAVIPlannerFactory(SGDomain domain, JointModel jointModel, JointRewardFunction jointRewardFunction, TerminalFunction terminalFunction,
						  double discount, HashableStateFactory hashingFactory, QFunction qInit, SGBackupOperator backupOperator, double maxDelta, int maxIterations){
	
	this.domain = domain;
	this.jointModel = jointModel;
	this.jointRewardFunction = jointRewardFunction;
	this.terminalFunction = terminalFunction;
	this.discount = discount;
	this.hashingFactory = hashingFactory;
	this.qInit = qInit;
	this.backupOperator = backupOperator;
	this.maxDelta = maxDelta;
	this.maxIterations = maxIterations;
	
}
 
Example #3
Source File: ContinuousDomainTutorial.java    From burlap_examples with MIT License 6 votes vote down vote up
public static void IPSS(){

		InvertedPendulum ip = new InvertedPendulum();
		ip.physParams.actionNoise = 0.;
		RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
		TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
		ip.setRf(rf);
		ip.setTf(tf);
		SADomain domain = ip.generateDomain();

		State initialState = new InvertedPendulumState();

		SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
		ss.setForgetPreviousPlanResults(true);
		ss.toggleDebugPrinting(false);
		Policy p = new GreedyQPolicy(ss);

		Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
		System.out.println("Num steps: " + e.maxTimeStep());
		Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));

	}
 
Example #4
Source File: World.java    From burlap with Apache License 2.0 6 votes vote down vote up
protected void init(SGDomain domain, JointModel jam, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg, StateMapping abstractionForAgents){
	this.domain = domain;
	this.worldModel = jam;
	this.jointRewardFunction = jr;
	this.tf = tf;
	this.initialStateGenerator = sg;
	this.abstractionForAgents = abstractionForAgents;
	
	agents = new ArrayList<SGAgent>();
	
	agentCumulativeReward = new HashedAggregator<String>();
	
	worldObservers = new ArrayList<WorldObserver>();

	this.generateNewCurrentState();
	
	debugId = 284673923;
}
 
Example #5
Source File: ExampleGridWorld.java    From burlap_examples with MIT License 6 votes vote down vote up
@Override
public SADomain generateDomain() {

	SADomain domain = new SADomain();


	domain.addActionTypes(
			new UniversalActionType(ACTION_NORTH),
			new UniversalActionType(ACTION_SOUTH),
			new UniversalActionType(ACTION_EAST),
			new UniversalActionType(ACTION_WEST));

	GridWorldStateModel smodel = new GridWorldStateModel();
	RewardFunction rf = new ExampleRF(this.goalx, this.goaly);
	TerminalFunction tf = new ExampleTF(this.goalx, this.goaly);

	domain.setModel(new FactoredModel(smodel, rf, tf));

	return domain;
}
 
Example #6
Source File: MADPPlannerFactory.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes.
 * @param domain the domain in which to perform planing
 * @param agentDefinitions the definitions of the agent types
 * @param jointModel the joint action model
 * @param jointRewardFunction the joint reward function
 * @param terminalFunction the terminal state function
 * @param discount the discount
 * @param hashingFactory the hashing factory to use for storing states
 * @param qInit the q-value initialization function to use.
 * @param backupOperator the backup operator that defines the solution concept being solved
 * @param maxDelta the threshold that causes VI to terminate when the max Q-value change is less than it
 * @param maxIterations the maximum number of iterations allowed
 */
public MAVIPlannerFactory(SGDomain domain, List<SGAgentType> agentDefinitions, JointModel jointModel, JointRewardFunction jointRewardFunction, TerminalFunction terminalFunction,
						  double discount, HashableStateFactory hashingFactory, QFunction qInit, SGBackupOperator backupOperator, double maxDelta, int maxIterations){
	
	this.domain = domain;
	this.agentDefinitions = agentDefinitions;
	this.jointModel = jointModel;
	this.jointRewardFunction = jointRewardFunction;
	this.terminalFunction = terminalFunction;
	this.discount = discount;
	this.hashingFactory = hashingFactory;
	this.qInit = qInit;
	this.backupOperator = backupOperator;
	this.maxDelta = maxDelta;
	this.maxIterations = maxIterations;
	
}
 
Example #7
Source File: MADynamicProgramming.java    From burlap with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes all the main datstructres of the value function valueFunction
 * @param domain the domain in which to perform planning
 * @param agentDefinitions the definitions of the agents involved in the planning problem.
 * @param jointRewardFunction the joint reward function
 * @param terminalFunction the terminal state function
 * @param discount the discount factor
 * @param hashingFactory the state hashing factorying to use to lookup Q-values for individual states
 * @param vInit the value function initialization function to use
 * @param backupOperator the solution concept backup operator to use.
 */
public void initMAVF(SGDomain domain, List<SGAgentType> agentDefinitions, JointRewardFunction jointRewardFunction, TerminalFunction terminalFunction,
					 double discount, HashableStateFactory hashingFactory, ValueFunction vInit, SGBackupOperator backupOperator){

	this.domain = domain;
	this.jointModel = domain.getJointActionModel();
	this.jointRewardFunction = jointRewardFunction;
	this.terminalFunction = terminalFunction;
	this.discount = discount;
	this.hashingFactory = hashingFactory;
	this.vInit = vInit;
	this.backupOperator = backupOperator;
	
	
	this.setAgentDefinitions(agentDefinitions);
	
}
 
Example #8
Source File: BlocksWorld.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public OOSADomain generateDomain() {

	OOSADomain domain = new OOSADomain();
	
	domain.addStateClass(CLASS_BLOCK, BlocksWorldBlock.class);

	domain.addActionType(new StackActionType(ACTION_STACK))
			.addActionType(new UnstackActionType(ACTION_UNSTACK));

	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;

	if(rf == null){
		rf = new NullRewardFunction();
	}
	if(tf == null){
		tf = new NullTermination();
	}

	BWModel smodel = new BWModel();
	FactoredModel model = new FactoredModel(smodel, rf , tf);
	domain.setModel(model);

	OODomain.Helper.addPfsToDomain(domain, this.generatePfs());
	
	return domain;
}
 
Example #9
Source File: MADPPlannerFactory.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes.
 * @param domain the domain in which to perform planing
 * @param jointModel the joint action model
 * @param jointRewardFunction the joint reward function
 * @param terminalFunction the terminal state function
 * @param discount the discount
 * @param hashingFactory the hashing factory to use for storing states
 * @param qInit the default Q-value to initialize all values to
 * @param backupOperator the backup operator that defines the solution concept being solved
 * @param maxDelta the threshold that causes VI to terminate when the max Q-value change is less than it
 * @param maxIterations the maximum number of iterations allowed
 */
public MAVIPlannerFactory(SGDomain domain, JointModel jointModel, JointRewardFunction jointRewardFunction, TerminalFunction terminalFunction,
						  double discount, HashableStateFactory hashingFactory, double qInit, SGBackupOperator backupOperator, double maxDelta, int maxIterations){
	
	this.domain = domain;
	this.jointModel = jointModel;
	this.jointRewardFunction = jointRewardFunction;
	this.terminalFunction = terminalFunction;
	this.discount = discount;
	this.hashingFactory = hashingFactory;
	this.qInit = new ConstantValueFunction(qInit);
	this.backupOperator = backupOperator;this.maxDelta = maxDelta;
	this.maxIterations = maxIterations;
	
}
 
Example #10
Source File: InvertedPendulum.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Override
public SADomain generateDomain() {
	
	SADomain domain = new SADomain();


	IPPhysicsParams cphys = this.physParams.copy();
	IPModel smodel = new IPModel(cphys);

	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;

	if(rf == null){
		rf = new InvertedPendulumRewardFunction();
	}
	if(tf == null){
		tf = new InvertedPendulumTerminalFunction();
	}

	FactoredModel model = new FactoredModel(smodel, rf ,tf);
	domain.setModel(model);

	domain.addActionType(new UniversalActionType(ACTION_LEFT))
			.addActionType(new UniversalActionType(ACTION_RIGHT))
			.addActionType(new UniversalActionType(ACTION_NO_FORCE));

	
	return domain;
}
 
Example #11
Source File: ConstantWorldGenerator.java    From burlap with Apache License 2.0 5 votes vote down vote up
protected void CWGInit(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg, StateMapping abstractionForAgents){
	this.domain = domain;
	this.jointRewardFunctionModel = jr;
	this.tf = tf;
	this.initialStateGenerator = sg;
	this.abstractionForAgents = abstractionForAgents;
}
 
Example #12
Source File: TestPlanning.java    From burlap with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() {
	this.gw = new GridWorldDomain(11, 11);
	this.gw.setMapToFourRooms();
	this.gw.setRf(new UniformCostRF());
	TerminalFunction tf = new SinglePFTF(PropositionalFunction.findPF(gw.generatePfs(), PF_AT_LOCATION));
	this.gw.setTf(tf);
	this.domain = this.gw.generateDomain();
	this.goalCondition = new TFGoalCondition(tf);
	this.hashingFactory = new SimpleHashableStateFactory();
}
 
Example #13
Source File: BlockDude.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void setTf(TerminalFunction tf) {
	this.tf = tf;
}
 
Example #14
Source File: MountainCar.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void setTf(TerminalFunction tf) {
	this.tf = tf;
}
 
Example #15
Source File: LunarLanderDomain.java    From burlap with Apache License 2.0 4 votes vote down vote up
public TerminalFunction getTf() {
	return tf;
}
 
Example #16
Source File: LunarLanderDomain.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void setTf(TerminalFunction tf) {
	this.tf = tf;
}
 
Example #17
Source File: LunarLanderDomain.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public OOSADomain generateDomain() {
	
	OOSADomain domain = new OOSADomain();
	
	List <Double> thrustValuesTemp = this.thrustValues;
	if(thrustValuesTemp.isEmpty()){
		thrustValuesTemp.add(0.32);
		thrustValuesTemp.add(-physParams.gravity);
	}
	
	domain.addStateClass(CLASS_AGENT, LLAgent.class)
			.addStateClass(CLASS_PAD, LLBlock.LLPad.class)
			.addStateClass(CLASS_OBSTACLE, LLBlock.LLObstacle.class);

	//make copy of physics parameters
	LLPhysicsParams cphys = this.physParams.copy();
	
	//add actions
	domain.addActionType(new UniversalActionType(ACTION_TURN_LEFT))
			.addActionType(new UniversalActionType(ACTION_TURN_RIGHT))
			.addActionType(new UniversalActionType(ACTION_IDLE))
			.addActionType(new ThrustType(thrustValues));


	OODomain.Helper.addPfsToDomain(domain, this.generatePfs());

	LunarLanderModel smodel = new LunarLanderModel(cphys);
	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;
	if(rf == null){
		rf = new LunarLanderRF(domain);
	}
	if(tf == null){
		tf = new LunarLanderTF(domain);
	}

	FactoredModel model = new FactoredModel(smodel, rf, tf);
	domain.setModel(model);
	
	return domain;
	
}
 
Example #18
Source File: GridWorldDomain.java    From burlap with Apache License 2.0 4 votes vote down vote up
public TerminalFunction getTf() {
	return tf;
}
 
Example #19
Source File: GridWorldDomain.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void setTf(TerminalFunction tf) {
	this.tf = tf;
}
 
Example #20
Source File: MinecraftDomainGenerator.java    From burlapcraft with GNU Lesser General Public License v3.0 4 votes vote down vote up
public void setTf(TerminalFunction tf) {
	this.tf = tf;
}
 
Example #21
Source File: MountainCar.java    From burlap with Apache License 2.0 4 votes vote down vote up
public TerminalFunction getTf() {
	return tf;
}
 
Example #22
Source File: FrostbiteDomain.java    From burlap with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a new frostbite domain.
 *
 * @return the generated domain object
 */
@Override
public OOSADomain generateDomain() {

	OOSADomain domain = new OOSADomain();

	domain.addStateClass(CLASS_AGENT, FrostbiteAgent.class)
			.addStateClass(CLASS_IGLOO, FrostbiteIgloo.class)
			.addStateClass(CLASS_PLATFORM, FrostbitePlatform.class);

	//add actions
	domain.addActionType(new UniversalActionType(ACTION_NORTH))
			.addActionType(new UniversalActionType(ACTION_SOUTH))
			.addActionType(new UniversalActionType(ACTION_EAST))
			.addActionType(new UniversalActionType(ACTION_WEST))
			.addActionType(new UniversalActionType(ACTION_IDLE));



	//add pfs
	List<PropositionalFunction> pfs = this.generatePFs();
	for(PropositionalFunction pf : pfs){
		domain.addPropFunction(pf);
	}


	FrostbiteModel smodel = new FrostbiteModel(scale);
	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;
	if(rf == null){
		rf = new FrostbiteRF(domain);
	}
	if(tf == null){
		tf = new FrostbiteTF(domain);
	}


	FactoredModel model = new FactoredModel(smodel, rf, tf);
	domain.setModel(model);

	return domain;
}
 
Example #23
Source File: BlockDude.java    From burlap with Apache License 2.0 4 votes vote down vote up
public TerminalFunction getTf() {
	return tf;
}
 
Example #24
Source File: BlocksWorld.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void setTf(TerminalFunction tf) {
	this.tf = tf;
}
 
Example #25
Source File: FactoredModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public void useTerminalFunction(TerminalFunction tf) {
	this.tf = tf;
}
 
Example #26
Source File: FactoredModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
@Override
public TerminalFunction terminalFunction() {
	return tf;
}
 
Example #27
Source File: FactoredModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
public TerminalFunction getTf() {
	return tf;
}
 
Example #28
Source File: FactoredModel.java    From burlap with Apache License 2.0 4 votes vote down vote up
public void setTf(TerminalFunction tf) {
	this.tf = tf;
}
 
Example #29
Source File: BlocksWorld.java    From burlap with Apache License 2.0 4 votes vote down vote up
public TerminalFunction getTf() {
	return tf;
}
 
Example #30
Source File: SingleStageNormalFormGame.java    From burlap with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a world instance for this game in which the provided agents join in the order they are passed. This object
 * uses the provided domain instance generated from this object instead of generating a new one.
 * @param domain the SGDomain instance
 * @param agents the agents to join the created world.
 * @return a world instance with the provided agents having already joined.
 */
public World createRepeatedGameWorld(SGDomain domain, SGAgent...agents){
	
	//grab the joint reward function from our bimatrix game in the more general BURLAP joint reward function interface
	JointRewardFunction jr = this.getJointRewardFunction();
	
	//game repeats forever unless manually stopped after T times.
	TerminalFunction tf = new NullTermination();
	
	//set up the initial state generator for the world, which for a bimatrix game is trivial
	StateGenerator sg = new NFGameState(agents.length);

	//create a world to synchronize the actions of agents in this domain and record results
	World w = new World(domain, jr, tf, sg);
	
	for(SGAgent a : agents){
		w.join(a);
	}
	
	return w;
	
}