Learning to Predict Utility Across Episodes
We start with a dataset generated by the model given in the previous section:
///fold:
var repeatIndexed = function(n, fn) {
var helper = function(m, offset) {
if (m == 0) {
return [];
} else if (m == 1) {
return [fn(offset)]; // Pass the offset to fn, this is the difference with the built-in repeat
} else {
var m1 = Math.ceil(m / 2),
m2 = m - m1;
return helper(m1, offset).concat(helper(m2, offset + m1));
}
}
return helper(n, 0);
};
var last = function(xs) {
return xs[xs.length - 1];
};
var printEpisode = function(episode) {
print("Episode " + episode.index + ":");
map(function(step){print(step);}, episode.steps);
return;
};
var printEpisodes = function(data) {
print('globalState: ' + JSON.stringify(data.globalState));
map(printEpisode, data.episodes);
return;
};
var makeDataGenerator = function(options) {
var sampleAction = options.sampleAction;
var transition = options.transition;
var sampleEpisodeState = options.sampleEpisodeState;
var sampleGlobalState = options.sampleGlobalState;
var utility = options.utility;
var sampleStep = function(globalState, episodeState) {
var action = sampleAction(globalState, episodeState);
var newEpisodeState = transition(globalState, episodeState, action);
var actionUtility = utility(globalState, newEpisodeState) - utility(globalState, episodeState);
var step = {
action: action,
stateAfterAction: newEpisodeState,
actionUtility: actionUtility
};
return step;
};
var sampleSteps = function(globalState, numSteps, stepsSoFar) {
if (numSteps === 0) {
return stepsSoFar;
} else {
var prevState = last(stepsSoFar).stateAfterAction;
var step = sampleStep(globalState, prevState);
var steps = stepsSoFar.concat([step]);
return sampleSteps(globalState, numSteps - 1, steps);
}
};
var sampleEpisode = function(globalState, numSteps, episodeIndex) {
var initialStep = {
action: {author: 'system', value: 'initialize'},
stateAfterAction: sampleEpisodeState(globalState),
actionUtility: 0
};
var steps = sampleSteps(globalState, numSteps, [initialStep]);
return {
steps: steps,
index: episodeIndex
};
};
var sampleEpisodes = function(options) {
var numEpisodes = options.numEpisodes || 1;
var stepsPerEpisode = options.stepsPerEpisode || 5;
var globalState = sampleGlobalState();
var episodes = repeatIndexed(numEpisodes, function(i){
return sampleEpisode(globalState, stepsPerEpisode, i);
});
return {
globalState: globalState,
episodes: episodes
};
};
return sampleEpisodes;
};
var sampleEpisodes = makeDataGenerator({
sampleGlobalState: function() {
return {
aliceIsHelpful: flip(.5),
bobIsHelpful: flip(.5)
};
},
sampleEpisodeState: function(globalState) {
return randomInteger(5);
},
sampleAction: function(globalState, episodeState) {
var author = flip(.5) ? 'alice' : 'bob';
var isHelpful = globalState[author + 'IsHelpful'];
var p_help = isHelpful ? .8 : .2;
var value = flip(p_help) ? 'plusone' : 'minusone';
return {
author: author,
value: value
};
},
transition: function(globalState, episodeState, action) {
var value = action.value;
if (value === 'plusone') {
return episodeState + 1;
} else if (value === 'minusone') {
return episodeState - 1;
} else {
print("error: unknown action");
}
},
utility: function(globalState, episodeState) {
return episodeState;
}
});
///
var data = sampleEpisodes({
numEpisodes: 3,
stepsPerEpisode: 5
});
wpEditor.put('data', data);
printEpisodes(data);
We’ll observe the first two episodes, and will try to predict utility for the third. During training, for each step, we’ll observe some features. Suppose we observe just author
and actionUtility
. During test, we only observe author
. This isn’t sufficient to perfectly predict, but is pretty informative.
var data = wpEditor.get('data');
var authorEncoding = {
alice: [1, 0, 0],
bob: [0, 1, 0],
system: [0, 0, 1]
};
// This will select a subset of features to observe, and also map them to a vector.
var encodeStep = function(step) {
return {
in: Vector(authorEncoding[step.action.author]),
out: Vector([step.actionUtility])
}
};
wpEditor.put('encodeStep', encodeStep);
print(encodeStep(data.episodes[0].steps[0]))
print(encodeStep(data.episodes[0].steps[1]))
We can treat this as a regression problem:
///fold:
var sampleGaussianMatrix = function(dims, mean, variance, guideMean){
var length = dims[0] * dims[1];
var dist = DiagCovGaussian({
mu: Vector(repeat(length, constF(mean))),
sigma: Vector(repeat(length, constF(variance)))
});
var guideMean = param([length,1], 0, 0.1);
var guide = DiagCovGaussian({
mu: guideMean,
sigma: Vector(repeat(length, constF(0.001)))
});
var g = sample(dist, {guide: guide});
return T.reshape(g, dims);
};
var matrixToFunction = function(matrix) {
return function(x) {
var u = sample(Uniform({a: 0, b: 1}), { guide: Uniform({a: 0, b: 1}) });
var a = T.concat(x, Vector([u, 1]));
var b = T.reshape(a, [x.length+2, 1]);
var input = T.transpose(b);
return T.dot(input, matrix);
};
}
var functionPrior = function(dims) {
var matrixDims = [dims[0] + 2, dims[1]];
var matrix = sampleGaussianMatrix(matrixDims, 0, 1, 0);
var f = matrixToFunction(matrix);
_.assign(f, {'matrix': matrix});
return f;
};
var squaredError = function(m1, m2) {
var x = T.add(m1, T.mul(m2, -1));
return T.sumreduce(T.mul(x, x));
};
///
var data = wpEditor.get('data');
var encodeStep = wpEditor.get('encodeStep');
var trainingEpisodes = [data.episodes[0], data.episodes[1]];
var testEpisodes = [data.episodes[2]];
var train = function() {
var f = functionPrior([3, 1]);
var score = sum(map(function(episode){
return sum(map(function(step){
var stepDatum = encodeStep(step);
return squaredError(f(stepDatum.in), stepDatum.out);
}, episode.steps));
}, trainingEpisodes));
factor(-score);
return f.matrix;
};
var params = Optimize(train, {
steps: 500,
method: {
gd: {stepSize: 0.01}
},
estimator: {ELBO: {samples: 20}}});
var trainDist = SampleGuide(train, {params: params, samples: 1});
var trainedMatrix = trainDist.support()[0]
wpEditor.put('trainedMatrix', trainedMatrix);
wpEditor.put('matrixToFunction', matrixToFunction);
trainedMatrix
We can now test our learned predictor on the unseen episodes:
var data = wpEditor.get('data');
var encodeStep = wpEditor.get('encodeStep');
var matrixToFunction = wpEditor.get('matrixToFunction');
var trainedMatrix = wpEditor.get('trainedMatrix');
var testEpisodes = [data.episodes[2]];
var predict = matrixToFunction(trainedMatrix);
var test = function() {
map(function(episode){
map(function(step){
var stepDatum = encodeStep(step);
var predictDist = Infer({method: 'rejection', samples: 1000}, function() {
return predict(stepDatum.in).data[0];
});
print({
expected: expectation(predictDist),
real: stepDatum.out.data[0]
});
viz.auto(predictDist);
}, episode.steps);
}, testEpisodes);
return;
};
test();
Next steps:
-
Make the evaluation less noisy: This evaluation is very noisy, since we only have five data points to test on (but we could use arbitrarily more test data to improve on this).
-
Write a more systematic framework for evaluating utility predictors; refactor what we have so far; define a clean interface so that this framework can be used with and tested on external data in addition to synthesized data.
-
Handle uncertainty correctly: Due to the use of variational inference, and the selection of a single matrix/function above, we’re also not correctly capturing uncertainty in our distribution over functions when we’re doing prediction. We could address this by (1) computing a distribution on matrices using HMC (with independent random variable for each matrix element) and (2) storing this distribution so that we can use it for prediction.