burlap.behavior.policy.EpsilonGreedy Java Examples

The following examples show how to use burlap.behavior.policy.EpsilonGreedy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SGQWActionHistoryFactory.java    From burlap with Apache License 2.0 6 votes vote down vote up
@Override
public SGAgent generateAgent(String agentName, SGAgentType type) {
	SGQWActionHistory agent = new SGQWActionHistory(domain, discount, learningRate, stateHash, historySize)
			.setAgentDetails(agentName, type);

	if(this.qinit != null){
		agent.setQValueInitializer(qinit);
	}
	if(this.epsilon >= 0.){
		EpsilonGreedy egreedy = new EpsilonGreedy(agent, this.epsilon);
		agent.setStrategy(egreedy);
	}
	
	return agent;
	
}
 
Example #2
Source File: QLTutorial.java    From burlap_examples with MIT License 5 votes vote down vote up
public QLTutorial(SADomain domain, double gamma, HashableStateFactory hashingFactory,
				  QFunction qinit, double learningRate, double epsilon){

	this.solverInit(domain, gamma, hashingFactory);
	this.qinit = qinit;
	this.learningRate = learningRate;
	this.qValues = new HashMap<HashableState, List<QValue>>();
	this.learningPolicy = new EpsilonGreedy(this, epsilon);

}
 
Example #3
Source File: SGNaiveQLAgent.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes with a default Q-value of 0 and a 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory) {
	this.init(d);
	this.discount = discount;
	this.learningRate = new ConstantLR(learningRate);
	this.hashFactory = hashFactory;
	this.qInit = new ConstantValueFunction(0.);
	
	this.qMap = new HashMap<HashableState, List<QValue>>();
	stateRepresentations = new HashMap<HashableState, State>();
	this.policy = new EpsilonGreedy(this, 0.1);
	
	this.storedMapAbstraction = new ShallowIdentityStateMapping();
}
 
Example #4
Source File: SGNaiveQLAgent.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes with a default 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param defaultQ the default to which all Q-values will be initialized
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, double defaultQ, HashableStateFactory hashFactory) {
	this.init(d);
	this.discount = discount;
	this.learningRate = new ConstantLR(learningRate);
	this.hashFactory = hashFactory;
	this.qInit = new ConstantValueFunction(defaultQ);
	
	this.qMap = new HashMap<HashableState, List<QValue>>();
	stateRepresentations = new HashMap<HashableState, State>();
	this.policy = new EpsilonGreedy(this, 0.1);
	
	this.storedMapAbstraction = new ShallowIdentityStateMapping();
}
 
Example #5
Source File: SGNaiveQLAgent.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes with a default 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param qInitizalizer the Q-value initialization method
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, QFunction qInitizalizer, HashableStateFactory hashFactory) {
	this.init(d);
	this.discount = discount;
	this.learningRate = new ConstantLR(learningRate);
	this.hashFactory = hashFactory;
	this.qInit = qInitizalizer;
	
	this.qMap = new HashMap<HashableState, List<QValue>>();
	stateRepresentations = new HashMap<HashableState, State>();
	this.policy = new EpsilonGreedy(this, 0.1);
	
	this.storedMapAbstraction = new ShallowIdentityStateMapping();
}
 
Example #6
Source File: ApproximateQLearning.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes.
 * @param domain the learning domain
 * @param gamma the discount factor
 * @param vfa the value function approximation to use
 * @param stateMapping the state mapping to use to process a state observation from the environment
 */
public ApproximateQLearning(SADomain domain, double gamma, ParametricFunction.ParametricStateActionFunction vfa, StateMapping stateMapping) {
	this.vfa = vfa;
	this.staleVfa = vfa;
	this.learningPolicy = new EpsilonGreedy(this, 0.1);
	this.stateMapping = stateMapping;

	this.solverInit(domain, gamma, null);
}
 
Example #7
Source File: LSPI.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes.
 * @param domain the problem domain
 * @param gamma the discount factor
 * @param saFeatures the state-action features to use
 */
public LSPI(SADomain domain, double gamma, DenseStateActionFeatures saFeatures){
	this.solverInit(domain, gamma, null);
	this.saFeatures = saFeatures;
	this.vfa = new DenseStateActionLinearVFA(saFeatures, 0.);
	this.learningPolicy = new EpsilonGreedy(this, 0.1);
}
 
Example #8
Source File: LSPI.java    From burlap with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes.
 * @param domain the problem domain
 * @param gamma the discount factor
 * @param saFeatures the state-action features
 * @param dataset the dataset of transitions to use
 */
public LSPI(SADomain domain, double gamma, DenseStateActionFeatures saFeatures, SARSData dataset){
	this.solverInit(domain, gamma, null);
	this.saFeatures = saFeatures;
	this.vfa = new DenseStateActionLinearVFA(saFeatures, 0.);
	this.learningPolicy = new EpsilonGreedy(this, 0.1);
	this.dataset = dataset;
}
 
Example #9
Source File: AtariDQN.java    From burlap_caffe with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {

        // Learning constants defined in the DeepMind Nature paper
        // (http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html)
        int experienceMemoryLength = 1000000;
        int maxHistoryLength = 4;
        int staleUpdateFreq = 10000;
        double gamma = 0.99;
        int frameSkip = 4;
        int updateFreq = 4;
        double rewardClip = 1.0;
        float gradientClip = 1.0f;
        double epsilonStart = 1;
        double epsilonEnd = 0.1;
        int epsilonAnnealDuration = 1000000;
        int replayStartSize = 50000;
        int noopMax = 30;
        int totalTrainingSteps = 50000000;
        double testEpsilon = 0.05;

        // Testing and recording constants
        int testInterval = 250000;
        int totalTestSteps = 125000;
        int maxEpisodeSteps = 100000;
        int snapshotInterval = 1000000;
        String snapshotPrefix = "snapshots/experiment1";
        String resultsDirectory = "results/experiment1";

        // ALE Paths
        // TODO: Set to appropriate paths for your machine
        String alePath = "/path/to/atari/executable";
        String romPath = "/path/to/atari/rom/file";

        // Caffe solver file
        String solverFile = "example_models/atari_dqn_solver.prototxt";

        // Load Caffe
        Loader.load(Caffe.class);

        // Create the domain
        ALEDomainGenerator domGen = new ALEDomainGenerator(ALEDomainGenerator.saActionSet());
        SADomain domain = domGen.generateDomain();

        // Create the ALEEnvironment and visualizer
        ALEEnvironment env = new ALEEnvironment(alePath, romPath, frameSkip, PoolingMethod.POOLING_METHOD_MAX);
        env.setRandomNoopMax(noopMax);
        ALEVisualExplorer exp = new ALEVisualExplorer(domain, env, ALEVisualizer.create());
        exp.initGUI();
        exp.startLiveStatePolling(1000/60);

        // Setup the ActionSet from the ALEDomain to use the ALEActions
        ActionSet actionSet = new ActionSet(domain);

        // Setup the training and test memory
        FrameExperienceMemory trainingExperienceMemory =
                new FrameExperienceMemory(experienceMemoryLength, maxHistoryLength, new ALEPreProcessor(), actionSet);
        // The size of the test memory is arbitrary but should be significantly greater than 1 to minimize copying
        FrameExperienceMemory testExperienceMemory =
                new FrameExperienceMemory(10000, maxHistoryLength, new ALEPreProcessor(), actionSet);


        // Initialize the DQN with the solver file.
        // NOTE: this Caffe architecture is made for 3 actions (the number of actions in Pong)
        DQN dqn = new DQN(solverFile, actionSet, trainingExperienceMemory, gamma);
        dqn.setRewardClip(rewardClip);
        dqn.setGradientClip(gradientClip);

        // Create the policies
        SolverDerivedPolicy learningPolicy =
                new AnnealedEpsilonGreedy(dqn, epsilonStart, epsilonEnd, epsilonAnnealDuration);
        SolverDerivedPolicy testPolicy = new EpsilonGreedy(dqn, testEpsilon);

        // Setup the learner
        DeepQLearner deepQLearner = new DeepQLearner(domain, gamma, replayStartSize, learningPolicy, dqn, trainingExperienceMemory);
        deepQLearner.setExperienceReplay(trainingExperienceMemory, dqn.batchSize);
        deepQLearner.useStaleTarget(staleUpdateFreq);
        deepQLearner.setUpdateFreq(updateFreq);

        // Setup the tester
        DeepQTester tester = new DeepQTester(testPolicy, testExperienceMemory, testExperienceMemory);

        // Setup helper
        TrainingHelper helper =
                new AtariDQN(deepQLearner, tester, dqn, actionSet, env, trainingExperienceMemory, testExperienceMemory);
        helper.setTotalTrainingSteps(totalTrainingSteps);
        helper.setTestInterval(testInterval);
        helper.setTotalTestSteps(totalTestSteps);
        helper.setMaxEpisodeSteps(maxEpisodeSteps);
        helper.enableSnapshots(snapshotPrefix, snapshotInterval);
        helper.recordResultsTo(resultsDirectory);
        //helper.verbose = true;

        // Uncomment this line to load learning state if resuming
        //helper.loadLearningState(snapshotDirectory);

        // Run helper
        helper.run();
    }
 
Example #10
Source File: GridWorldDQN.java    From burlap_caffe with Apache License 2.0 4 votes vote down vote up
public static void main(String args[]) {

        // Learning constants
        double gamma = 0.99;
        int replayStartSize = 50000;
        int memorySize = 1000000;
        double epsilonStart = 1;
        double epsilonEnd = 0.1;
        double testEpsilon = 0.05;
        int epsilonAnnealDuration = 1000000;
        int staleUpdateFreq = 10000;

        // Caffe solver file
        String solverFile = "example_models/grid_world_dqn_solver.prototxt";

        // Load Caffe
        Loader.load(caffe.Caffe.class);

        // Setup the network
        GridWorldDQN gridWorldDQN = new GridWorldDQN(solverFile, gamma);

        // Create the policies
        SolverDerivedPolicy learningPolicy =
                new AnnealedEpsilonGreedy(epsilonStart, epsilonEnd, epsilonAnnealDuration);
        SolverDerivedPolicy testPolicy = new EpsilonGreedy(testEpsilon);

        // Setup the learner
        DeepQLearner deepQLearner =
                new DeepQLearner(gridWorldDQN.domain, gamma, replayStartSize, learningPolicy, gridWorldDQN.dqn);
        deepQLearner.setExperienceReplay(new FixedSizeMemory(memorySize), gridWorldDQN.dqn.batchSize);
        deepQLearner.useStaleTarget(staleUpdateFreq);

        // Setup the tester
        Tester tester = new SimpleTester(testPolicy);

        // Set the QProvider for the policies
        learningPolicy.setSolver(deepQLearner);
        testPolicy.setSolver(deepQLearner);

        // Setup the visualizer
        VisualExplorer exp = new VisualExplorer(
                gridWorldDQN.domain, gridWorldDQN.env, GridWorldVisualizer.getVisualizer(gridWorldDQN.gwdg.getMap()));
        exp.initGUI();
        exp.startLiveStatePolling(33);

        // Setup helper
        TrainingHelper helper = new TrainingHelper(
                deepQLearner, tester, gridWorldDQN.dqn, actionSet, gridWorldDQN.env);
        helper.setTotalTrainingSteps(50000000);
        helper.setTestInterval(500000);
        helper.setTotalTestSteps(125000);
        helper.setMaxEpisodeSteps(10000);

        // Run helper
        helper.run();
    }
 
Example #11
Source File: QLearning.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes Q-learning with 0.1 epsilon greedy policy, the same Q-value initialization everywhere, and places no limit on the number of steps the 
 * agent can take in an episode. By default the agent will only save the last learning episode and a call to the {@link #planFromState(State)} method
 * will cause the valueFunction to use only one episode for planning; this should probably be changed to a much larger value if you plan on using this
 * algorithm as a planning algorithm.
 * @param domain the domain in which to learn
 * @param gamma the discount factor
 * @param hashingFactory the state hashing factory to use for Q-lookups
 * @param qInit the initial Q-value to user everywhere
 * @param learningRate the learning rate
 */
public QLearning(SADomain domain, double gamma, HashableStateFactory hashingFactory,
		double qInit, double learningRate) {
	this.QLInit(domain, gamma, hashingFactory, new ConstantValueFunction(qInit), learningRate, new EpsilonGreedy(this, 0.1), Integer.MAX_VALUE);
}
 
Example #12
Source File: QLearning.java    From burlap with Apache License 2.0 2 votes vote down vote up
/**
 * Initializes Q-learning with 0.1 epsilon greedy policy, the same Q-value initialization everywhere. By default the agent will only save the last learning episode and a call to the {@link #planFromState(State)} method
 * will cause the valueFunction to use only one episode for planning; this should probably be changed to a much larger value if you plan on using this
 * algorithm as a planning algorithm.
 * @param domain the domain in which to learn
 * @param gamma the discount factor
 * @param hashingFactory the state hashing factory to use for Q-lookups
 * @param qInit the initial Q-value to user everywhere
 * @param learningRate the learning rate
 * @param maxEpisodeSize the maximum number of steps the agent will take in a learning episode for the agent stops trying.
 */
public QLearning(SADomain domain, double gamma, HashableStateFactory hashingFactory,
		double qInit, double learningRate, int maxEpisodeSize) {
	this.QLInit(domain, gamma, hashingFactory, new ConstantValueFunction(qInit), learningRate, new EpsilonGreedy(this, 0.1), maxEpisodeSize);
}
 
Example #13
Source File: GradientDescentSarsaLam.java    From burlap with Apache License 2.0 1 votes vote down vote up
/**
 * Initializes SARSA(\lambda) with 0.1 epsilon greedy policy and places no limit on the number of steps the 
 * agent can take in an episode. By default the agent will only save the last learning episode and a call to the {@link #planFromState(State)} method
 * will cause the valueFunction to use only one episode for planning; this should probably be changed to a much larger value if you plan on using this
 * algorithm as a planning algorithm.
 * @param domain the domain in which to learn
 * @param gamma the discount factor
 * @param vfa the value function approximation method to use for estimate Q-values
 * @param learningRate the learning rate
 * @param lambda specifies the strength of eligibility traces (0 for one step, 1 for full propagation)
 */
public GradientDescentSarsaLam(SADomain domain, double gamma, DifferentiableStateActionValue vfa,
		double learningRate, double lambda) {
	
	this.GDSLInit(domain, gamma, vfa, learningRate, new EpsilonGreedy(this, 0.1), Integer.MAX_VALUE, lambda);
	
}
 
Example #14
Source File: GradientDescentSarsaLam.java    From burlap with Apache License 2.0 1 votes vote down vote up
/**
 * Initializes SARSA(\lambda) with 0.1 epsilon greedy policy. By default the agent will only save the last learning episode and a call to the {@link #planFromState(State)} method
 * will cause the valueFunction to use only one episode for planning; this should probably be changed to a much larger value if you plan on using this
 * algorithm as a planning algorithm.
 * @param domain the domain in which to learn
 * @param gamma the discount factor
 * @param vfa the value function approximation method to use for estimate Q-values
 * @param learningRate the learning rate
 * @param maxEpisodeSize the maximum number of steps the agent will take in an episode before terminating
 * @param lambda specifies the strength of eligibility traces (0 for one step, 1 for full propagation)
 */
public GradientDescentSarsaLam(SADomain domain, double gamma, DifferentiableStateActionValue vfa,
		double learningRate, int maxEpisodeSize, double lambda) {
	
	this.GDSLInit(domain, gamma, vfa, learningRate, new EpsilonGreedy(this, 0.1), maxEpisodeSize, lambda);
	
}