burlap.mdp.singleagent.model.RewardFunction Java Examples

The following examples show how to use burlap.mdp.singleagent.model.RewardFunction. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: ContinuousDomainTutorial.java From burlap_examples with MIT License

6 votes

public static void IPSS(){

		InvertedPendulum ip = new InvertedPendulum();
		ip.physParams.actionNoise = 0.;
		RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
		TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
		ip.setRf(rf);
		ip.setTf(tf);
		SADomain domain = ip.generateDomain();

		State initialState = new InvertedPendulumState();

		SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
		ss.setForgetPreviousPlanResults(true);
		ss.toggleDebugPrinting(false);
		Policy p = new GreedyQPolicy(ss);

		Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
		System.out.println("Num steps: " + e.maxTimeStep());
		Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));

	}

Example #2

Source File: ExampleGridWorld.java From burlap_examples with MIT License

6 votes

@Override
public SADomain generateDomain() {

	SADomain domain = new SADomain();


	domain.addActionTypes(
			new UniversalActionType(ACTION_NORTH),
			new UniversalActionType(ACTION_SOUTH),
			new UniversalActionType(ACTION_EAST),
			new UniversalActionType(ACTION_WEST));

	GridWorldStateModel smodel = new GridWorldStateModel();
	RewardFunction rf = new ExampleRF(this.goalx, this.goaly);
	TerminalFunction tf = new ExampleTF(this.goalx, this.goaly);

	domain.setModel(new FactoredModel(smodel, rf, tf));

	return domain;
}

Example #3

Source File: ApprenticeshipLearning.java From burlap with Apache License 2.0

6 votes

/**
 * Generates an anonymous instance of a reward function derived from a FeatureMapping 
 * and associated feature weights
 * Computes (w^(i))T phi from step 4 in section 3
 * @param featureFunctions The feature mapping of states to features
 * @param featureWeights The weights given to each feature
 * @return An anonymous instance of RewardFunction
 */
public static RewardFunction generateRewardFunction(
		DenseStateFeatures featureFunctions, FeatureWeights featureWeights) {
	final DenseStateFeatures newFeatureFunctions = featureFunctions;
	final FeatureWeights newFeatureWeights = new FeatureWeights(featureWeights);
	return new RewardFunction() {
		@Override
		public double reward(State state, Action a, State sprime) {
			double[] featureWeightValues = newFeatureWeights.getWeights();
			double sumReward = 0;
			double [] fv = newFeatureFunctions.features(state);
			for (int i = 0; i < fv.length; ++i) {
				sumReward += featureWeightValues[i] * fv[i];
			}
			return sumReward;
		}

	};
}

Example #4

Source File: InvertedPendulum.java From burlap with Apache License 2.0

5 votes

@Override
public SADomain generateDomain() {
	
	SADomain domain = new SADomain();


	IPPhysicsParams cphys = this.physParams.copy();
	IPModel smodel = new IPModel(cphys);

	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;

	if(rf == null){
		rf = new InvertedPendulumRewardFunction();
	}
	if(tf == null){
		tf = new InvertedPendulumTerminalFunction();
	}

	FactoredModel model = new FactoredModel(smodel, rf ,tf);
	domain.setModel(model);

	domain.addActionType(new UniversalActionType(ACTION_LEFT))
			.addActionType(new UniversalActionType(ACTION_RIGHT))
			.addActionType(new UniversalActionType(ACTION_NO_FORCE));

	
	return domain;
}

Example #5

Source File: BlocksWorld.java From burlap with Apache License 2.0

5 votes

@Override
public OOSADomain generateDomain() {

	OOSADomain domain = new OOSADomain();
	
	domain.addStateClass(CLASS_BLOCK, BlocksWorldBlock.class);

	domain.addActionType(new StackActionType(ACTION_STACK))
			.addActionType(new UnstackActionType(ACTION_UNSTACK));

	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;

	if(rf == null){
		rf = new NullRewardFunction();
	}
	if(tf == null){
		tf = new NullTermination();
	}

	BWModel smodel = new BWModel();
	FactoredModel model = new FactoredModel(smodel, rf , tf);
	domain.setModel(model);

	OODomain.Helper.addPfsToDomain(domain, this.generatePfs());
	
	return domain;
}

Example #6

Source File: RewardValueProjection.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes. Note that if projectionType is ONESTEP a runtime exception will be thrown because projecting a one step
 * value requires the {@link burlap.mdp.core.Domain} to enumerate the actions and transition dynamics. Use the
 * {@link #RewardValueProjection(RewardFunction, RewardProjectionType, SADomain)}
 * constructor instead.
 * @param rf the input {@link RewardFunction} to project for one step.
 * @param projectionType the type of reward projection to use.
 */
public RewardValueProjection(RewardFunction rf, RewardProjectionType projectionType){
	this.rf = rf;
	this.projectionType = projectionType;
	if(projectionType == RewardProjectionType.ONESTEP){
		throw new RuntimeException("If the reward function depends on a 1 step transition (e.g., from a source state to a target state) " +
				"then to project the value the Domain is needed evaluate the transition dynamics. Use the RewardValueProjection(RewardFunction, RewardProjectionType, Domain) " +
				"constructor instead to specify.");
	}
}

Example #7

Source File: RewardValueProjection.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes.
 * @param rf the input {@link RewardFunction} to project for one step.
 * @param projectionType the type of reward projection to use.
 * @param domain the {@link burlap.mdp.core.Domain} in which the {@link RewardFunction} is evaluated.
 */
public RewardValueProjection(RewardFunction rf, RewardProjectionType projectionType, SADomain domain){
	this.rf = rf;
	this.projectionType = projectionType;
	this.domain = domain;
	if(this.projectionType == RewardProjectionType.ONESTEP){
		this.oneStepBellmanPlanner = new SparseSampling(domain, 1., new SimpleHashableStateFactory(), 1, -1);
		this.oneStepBellmanPlanner.setModel(new CustomRewardNoTermModel(domain.getModel(), rf));
		this.oneStepBellmanPlanner.toggleDebugPrinting(false);
		this.oneStepBellmanPlanner.setForgetPreviousPlanResults(true);
	}
}

Example #8

Source File: PotentialShapedRF.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes the shaping with the objective reward function, the potential function, and the discount of the MDP.
 * @param baseRF the objective task reward function.
 * @param potentialFunction the potential function to use.
 * @param discount the discount factor of the MDP.
 */
public PotentialShapedRF(RewardFunction baseRF, PotentialFunction potentialFunction, double discount) {
	super(baseRF);
	
	this.potentialFunction = potentialFunction;
	this.discount = discount;
	
}

Example #9

Source File: MountainCar.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Example #10

Source File: CartPoleDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Example #11

Source File: MountainCar.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Example #12

Source File: LunarLanderDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Example #13

Source File: LunarLanderDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Example #14

Source File: LunarLanderDomain.java From burlap with Apache License 2.0

4 votes

@Override
public OOSADomain generateDomain() {
	
	OOSADomain domain = new OOSADomain();
	
	List <Double> thrustValuesTemp = this.thrustValues;
	if(thrustValuesTemp.isEmpty()){
		thrustValuesTemp.add(0.32);
		thrustValuesTemp.add(-physParams.gravity);
	}
	
	domain.addStateClass(CLASS_AGENT, LLAgent.class)
			.addStateClass(CLASS_PAD, LLBlock.LLPad.class)
			.addStateClass(CLASS_OBSTACLE, LLBlock.LLObstacle.class);

	//make copy of physics parameters
	LLPhysicsParams cphys = this.physParams.copy();
	
	//add actions
	domain.addActionType(new UniversalActionType(ACTION_TURN_LEFT))
			.addActionType(new UniversalActionType(ACTION_TURN_RIGHT))
			.addActionType(new UniversalActionType(ACTION_IDLE))
			.addActionType(new ThrustType(thrustValues));


	OODomain.Helper.addPfsToDomain(domain, this.generatePfs());

	LunarLanderModel smodel = new LunarLanderModel(cphys);
	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;
	if(rf == null){
		rf = new LunarLanderRF(domain);
	}
	if(tf == null){
		tf = new LunarLanderTF(domain);
	}

	FactoredModel model = new FactoredModel(smodel, rf, tf);
	domain.setModel(model);
	
	return domain;
	
}

Example #15

Source File: GridWorldDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Example #16

Source File: GridWorldDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Example #17

Source File: GraphDefinedDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Example #18

Source File: GraphDefinedDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Example #19

Source File: FrostbiteDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Example #20

Source File: FrostbiteDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Example #21

Source File: FrostbiteDomain.java From burlap with Apache License 2.0

4 votes

/**
 * Creates a new frostbite domain.
 *
 * @return the generated domain object
 */
@Override
public OOSADomain generateDomain() {

	OOSADomain domain = new OOSADomain();

	domain.addStateClass(CLASS_AGENT, FrostbiteAgent.class)
			.addStateClass(CLASS_IGLOO, FrostbiteIgloo.class)
			.addStateClass(CLASS_PLATFORM, FrostbitePlatform.class);

	//add actions
	domain.addActionType(new UniversalActionType(ACTION_NORTH))
			.addActionType(new UniversalActionType(ACTION_SOUTH))
			.addActionType(new UniversalActionType(ACTION_EAST))
			.addActionType(new UniversalActionType(ACTION_WEST))
			.addActionType(new UniversalActionType(ACTION_IDLE));



	//add pfs
	List<PropositionalFunction> pfs = this.generatePFs();
	for(PropositionalFunction pf : pfs){
		domain.addPropFunction(pf);
	}


	FrostbiteModel smodel = new FrostbiteModel(scale);
	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;
	if(rf == null){
		rf = new FrostbiteRF(domain);
	}
	if(tf == null){
		tf = new FrostbiteTF(domain);
	}


	FactoredModel model = new FactoredModel(smodel, rf, tf);
	domain.setModel(model);

	return domain;
}

Example #22

Source File: InvertedPendulum.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Example #23

Source File: InvertedPendulum.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Example #24

Source File: CartPoleDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Example #25

Source File: BlockDude.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Example #26

Source File: ExampleOOGridWorld.java From burlap_examples with MIT License

4 votes

@Override
public OOSADomain generateDomain() {

	OOSADomain domain = new OOSADomain();

	domain.addStateClass(CLASS_AGENT, ExGridAgent.class)
			.addStateClass(CLASS_LOCATION, EXGridLocation.class);

	domain.addActionTypes(
			new UniversalActionType(ACTION_NORTH),
			new UniversalActionType(ACTION_SOUTH),
			new UniversalActionType(ACTION_EAST),
			new UniversalActionType(ACTION_WEST));


	OODomain.Helper.addPfsToDomain(domain, this.generatePfs());

	OOGridWorldStateModel smodel = new OOGridWorldStateModel();
	RewardFunction rf = new SingleGoalPFRF(domain.propFunction(PF_AT), 100, -1);
	TerminalFunction tf = new SinglePFTF(domain.propFunction(PF_AT));

	domain.setModel(new FactoredModel(smodel, rf, tf));


	return domain;
}

Example #27

Source File: MinecraftEnvironment.java From burlapcraft with GNU Lesser General Public License v3.0

4 votes

public void setRewardFunction(RewardFunction rf) {
	this.rewardFunction = rf;
}

Example #28

Source File: MinecraftDomainGenerator.java From burlapcraft with GNU Lesser General Public License v3.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Example #29

Source File: MinecraftDomainGenerator.java From burlapcraft with GNU Lesser General Public License v3.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Example #30

Source File: CustomRewardModel.java From burlap with Apache License 2.0

4 votes

public CustomRewardModel(SampleModel model, RewardFunction rewardFunction) {
	this.model = model;
	this.rewardFunction = rewardFunction;
}