package burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.diffvinit;

import burlap.behavior.functionapproximation.FunctionGradient;
import burlap.behavior.functionapproximation.ParametricFunction;
import burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF;
import burlap.behavior.valuefunction.ValueFunction;
import burlap.mdp.core.state.State;

/**
 * A class for the default condition when a value function initialization returns an unparameterized  value
 * for each state, but must be differentiable
 * with respect to the reward function parameters for use with a differentiable finite horizon valueFunction.
 * @author James MacGlashan.
 */
public class VanillaDiffVinit implements DifferentiableVInit {


	/**
	 * The source value function initialization.
	 */
	protected ValueFunction vinit;

	/**
	 * The differentiable reward function that defines the parameter space over which this value function
	 * initialization must differentiate.
	 */
	protected DifferentiableRF rf;


	/**
	 * Initializes.
	 * @param vinit The vanilla unparameterized value function initialization
	 * @param rf the differentiable reward function that defines the total parameter space
	 */
	public VanillaDiffVinit(ValueFunction vinit, DifferentiableRF rf) {
		this.vinit = vinit;
		this.rf = rf;
	}

	@Override
	public int numParameters() {
		return this.rf.numParameters();
	}

	@Override
	public double getParameter(int i) {
		return this.rf.getParameter(i);
	}

	@Override
	public void setParameter(int i, double p) {
		this.rf.setParameter(i, p);
	}

	@Override
	public void resetParameters() {
		this.rf.resetParameters();
	}

	@Override
	public ParametricFunction copy() {
		return new VanillaDiffVinit(vinit, rf);
	}

	@Override
	public FunctionGradient valueGradient(State s) {
		return new FunctionGradient.SparseGradient();
	}

	@Override
	public double value(State s) {
		return this.vinit.value(s);
	}


}