package edu.brown.cs.burlap.policies;

import burlap.behavior.policy.EpsilonGreedy;
import burlap.behavior.valuefunction.QProvider;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import edu.brown.cs.burlap.ALEAction;

/**
 * A EpsilonGready policy that is linearly annealed from epsilonStart to epsilonEnd.
 *
 * @author Melrose Roderick.
 */
public class AnnealedEpsilonGreedy extends EpsilonGreedy implements StatefulPolicy {

    protected double epsilonStart;
    protected double epsilonEnd;
    protected double epsilonStep;
    protected int annealingTime;

    public AnnealedEpsilonGreedy(double epsilonStart, double epsilonEnd, int annealingTime) {
        this(null, epsilonStart, epsilonEnd, annealingTime);
    }
    public AnnealedEpsilonGreedy(QProvider planner, double epsilonStart, double epsilonEnd, int annealingTime) {
        super(planner, epsilonStart);

        this.epsilonStart = epsilonStart;
        this.epsilonEnd = epsilonEnd;
        this.epsilonStep = (epsilonEnd - epsilonStart)/annealingTime;
        this.annealingTime = annealingTime;
    }

    @Override
    public Action action(State s) {
        Action action = super.action(s);

        if (epsilon > epsilonEnd) {
            epsilon += epsilonStep;

            if (epsilon < epsilonEnd) {
                epsilon = epsilonEnd;
            }
        }

        return action;
    }

    @Override
    public void loadStateAt(int steps) {
        epsilon += epsilonStep * steps;
        if (epsilon < epsilonEnd) {
            epsilon = epsilonEnd;
        }
    }
}