Learning to Predict Utility Across Episodes

We start with a dataset generated by the model given in the previous section:

///fold:
var repeatIndexed = function(n, fn) {
  var helper = function(m, offset) {
    if (m == 0) {
      return [];
    } else if (m == 1) {
      return [fn(offset)]; // Pass the offset to fn, this is the difference with the built-in repeat
    } else {
      var m1 = Math.ceil(m / 2),
          m2 = m - m1;
      return helper(m1, offset).concat(helper(m2, offset + m1));
    }
  }

  return helper(n, 0);
};

var last = function(xs) {
  return xs[xs.length - 1];
};

var printEpisode = function(episode) {
  print("Episode " + episode.index + ":");
  map(function(step){print(step);}, episode.steps);
  return;
};

var printEpisodes = function(data) {
  print('globalState: ' + JSON.stringify(data.globalState));
  map(printEpisode, data.episodes);
  return;
};

var makeDataGenerator = function(options) {

  var sampleAction = options.sampleAction;
  var transition = options.transition;
  var sampleEpisodeState = options.sampleEpisodeState;
  var sampleGlobalState = options.sampleGlobalState;  
  var utility = options.utility;
  
  var sampleStep = function(globalState, episodeState) {
    var action = sampleAction(globalState, episodeState);
    var newEpisodeState = transition(globalState, episodeState, action);
    var actionUtility = utility(globalState, newEpisodeState) - utility(globalState, episodeState);
    var step = {
      action: action,
      stateAfterAction: newEpisodeState,
      actionUtility: actionUtility
    };
    return step;
  };

  var sampleSteps = function(globalState, numSteps, stepsSoFar) {
    if (numSteps === 0) {
      return stepsSoFar;
    } else {
      var prevState = last(stepsSoFar).stateAfterAction;
      var step = sampleStep(globalState, prevState);
      var steps = stepsSoFar.concat([step]);
      return sampleSteps(globalState, numSteps - 1, steps);
    }
  };

  var sampleEpisode = function(globalState, numSteps, episodeIndex) {
    var initialStep = {
      action: {author: 'system', value: 'initialize'},
      stateAfterAction: sampleEpisodeState(globalState),
      actionUtility: 0
    };
    var steps = sampleSteps(globalState, numSteps, [initialStep]);
    return {
      steps: steps,
      index: episodeIndex
    };
  };

  var sampleEpisodes = function(options) {
    var numEpisodes = options.numEpisodes || 1;
    var stepsPerEpisode = options.stepsPerEpisode || 5;
    var globalState = sampleGlobalState();
    var episodes = repeatIndexed(numEpisodes, function(i){
      return sampleEpisode(globalState, stepsPerEpisode, i);
    });
    return {
      globalState: globalState,
      episodes: episodes
    };
  };

  return sampleEpisodes;

};

var sampleEpisodes = makeDataGenerator({
  sampleGlobalState: function() {
    return {
      aliceIsHelpful: flip(.5),
      bobIsHelpful: flip(.5)
    };
  },
  sampleEpisodeState: function(globalState) {
    return randomInteger(5);
  },
  sampleAction: function(globalState, episodeState) {
    var author = flip(.5) ? 'alice' : 'bob';
    var isHelpful = globalState[author + 'IsHelpful'];
    var p_help = isHelpful ? .8 : .2;
    var value = flip(p_help) ? 'plusone' : 'minusone';
    return {
      author: author,
      value: value
    };
  },
  transition: function(globalState, episodeState, action) {
    var value = action.value;
    if (value === 'plusone') {
      return episodeState + 1;
    } else if (value === 'minusone') {
      return episodeState - 1;
    } else {
      print("error: unknown action");
    }
  },
  utility: function(globalState, episodeState) {
    return episodeState;
  }
});
///

var data = sampleEpisodes({
  numEpisodes: 3,
  stepsPerEpisode: 5
});

wpEditor.put('data', data);

printEpisodes(data);

We’ll observe the first two episodes, and will try to predict utility for the third. During training, for each step, we’ll observe some features. Suppose we observe just author and actionUtility. During test, we only observe author. This isn’t sufficient to perfectly predict, but is pretty informative.

var data = wpEditor.get('data');

var authorEncoding = {
  alice: [1, 0, 0],
  bob: [0, 1, 0],
  system: [0, 0, 1]
};

// This will select a subset of features to observe, and also map them to a vector.
var encodeStep = function(step) {
  return {
    in: Vector(authorEncoding[step.action.author]),
    out: Vector([step.actionUtility])
  }
};

wpEditor.put('encodeStep', encodeStep);

print(encodeStep(data.episodes[0].steps[0]))
print(encodeStep(data.episodes[0].steps[1]))

We can treat this as a regression problem:

///fold:
var sampleGaussianMatrix = function(dims, mean, variance, guideMean){  
  var length = dims[0] * dims[1];
  var dist = DiagCovGaussian({
    mu: Vector(repeat(length, constF(mean))),
    sigma: Vector(repeat(length, constF(variance)))
  });
  var guideMean = param([length,1], 0, 0.1);
  var guide = DiagCovGaussian({
    mu: guideMean,
    sigma: Vector(repeat(length, constF(0.001)))
  });
  var g = sample(dist, {guide: guide});
  return T.reshape(g, dims);
};

var matrixToFunction = function(matrix) {
  return function(x) {
    var u = sample(Uniform({a: 0, b: 1}), { guide: Uniform({a: 0, b: 1}) });
    var a = T.concat(x, Vector([u, 1]));
    var b = T.reshape(a, [x.length+2, 1]);
    var input = T.transpose(b);
    return T.dot(input, matrix);
  };
}

var functionPrior = function(dims) {
  var matrixDims = [dims[0] + 2, dims[1]];
  var matrix = sampleGaussianMatrix(matrixDims, 0, 1, 0);
  var f = matrixToFunction(matrix);
  _.assign(f, {'matrix': matrix});
  return f;
};

var squaredError = function(m1, m2) {
  var x = T.add(m1, T.mul(m2, -1));
  return T.sumreduce(T.mul(x, x));
};
///

var data = wpEditor.get('data');
var encodeStep = wpEditor.get('encodeStep');

var trainingEpisodes = [data.episodes[0], data.episodes[1]];
var testEpisodes = [data.episodes[2]];

var train = function() {
  
  var f = functionPrior([3, 1]);
  
  var score = sum(map(function(episode){
    return sum(map(function(step){
      var stepDatum = encodeStep(step);
      return squaredError(f(stepDatum.in), stepDatum.out);
    }, episode.steps));
  }, trainingEpisodes));  
  factor(-score);
  
  return f.matrix;
};

var params = Optimize(train, {
  steps: 500,
  method: {
    gd: {stepSize: 0.01}
  },
  estimator: {ELBO: {samples: 20}}});

var trainDist = SampleGuide(train, {params: params, samples: 1});

var trainedMatrix = trainDist.support()[0]

wpEditor.put('trainedMatrix', trainedMatrix);
wpEditor.put('matrixToFunction', matrixToFunction);

trainedMatrix

We can now test our learned predictor on the unseen episodes:

var data = wpEditor.get('data');
var encodeStep = wpEditor.get('encodeStep');
var matrixToFunction = wpEditor.get('matrixToFunction');
var trainedMatrix = wpEditor.get('trainedMatrix');
var testEpisodes = [data.episodes[2]];

var predict = matrixToFunction(trainedMatrix);

var test = function() {
  
  map(function(episode){
    map(function(step){
      var stepDatum = encodeStep(step);
      var predictDist = Infer({method: 'rejection', samples: 1000}, function() {
        return predict(stepDatum.in).data[0];
      });
      print({
        expected: expectation(predictDist), 
        real: stepDatum.out.data[0]
      });
      viz.auto(predictDist);            
    }, episode.steps);
  }, testEpisodes);
  
  return;
};

test();

Next steps:

Make the evaluation less noisy: This evaluation is very noisy, since we only have five data points to test on (but we could use arbitrarily more test data to improve on this).
Write a more systematic framework for evaluating utility predictors; refactor what we have so far; define a clean interface so that this framework can be used with and tested on external data in addition to synthesized data.
Handle uncertainty correctly: Due to the use of variational inference, and the selection of a single matrix/function above, we’re also not correctly capturing uncertainty in our distribution over functions when we’re doing prediction. We could address this by (1) computing a distribution on matrices using HMC (with independent random variable for each matrix element) and (2) storing this distribution so that we can use it for prediction.