Model 1---Utility directly observed, Bias side-info depends on utility
Introduction
Model 1 has the following form:
X is a vector random variable (as in supervised learning), the “context”. X is always fully observed and we don’t model its prior distribution.
u:X -> R, where u ~ prior(u)
b: R->R, where b ~ prior(b) and b is stochastic
Training data has form: (x_i, u(x_i), b(u(x_i))), where i is the index.
Test data has form: (x_i, ___, b(u(x_i))), where the goal is to infer u(x_i).
We are interested in cases with the following additional properties:
-
The training data is insufficient for us to learn
u. That is, on some regions of X, we will be quite uncertain about u(x). -
Our test data will include x-values from some such regions. If the training data shows
bto be generally informative aboutu, then conditioning onbwill improve our posteriors overuin those regions. (Ifbis not informative, then conditioning should not help us).
So we will compare algorithms that condition on the b-values at test time with algorithms that do not (and simply do supervised learning using the training data). We use examples where u is piecewise linear with different parameters in different regions. We begin with examples where b is not a function of X (i.e. the relation between the output of u and b does not depend directly on the context X).
Example 1
Here is the generative model. We make both u and b stochastic functions.
var getDatumGivenContext = function(x, params){
var u = params.u;
var b = params.b;
var ux = sample(u(x));
var bx = sample(b(ux));
return {x:x, u:ux, b: bx};
};
We pick some parameters for u and b and then generate some data. Note that u is a piecewise linear with boundaries at -1 and 1. The function b is adds a constant and some Gaussian noise to u(x).
var u = function(x){
var getCoefficient = function(x){
if (x<-1){return -1.5}
if (x<1){return 3}
if (x>=1){return -.3}
}
return Gaussian({mu:getCoefficient(x)*x, sigma:.1})
}
var b = function(ux){
var constant = .5
var sigma = .1
return Gaussian({mu:ux + constant, sigma:sigma});
}
var displayFunctions = function(u,b){
print('Display true functions u and b on range [-3,3]');
var xs = repeat(100,function(){return sample(Uniform({a:-3,b:3}))});
var uValues = map(function(x){return sample(u(x))}, xs);
viz.scatter(xs, uValues);
var bValues = map(function(ux){return sample(b(ux))}, uValues);
viz.scatter(xs, bValues);
};
displayFunctions(u,b)
var trueParams = {u:u, b:b};
wpEditor.put('trueParams', trueParams);
We generate some data. The training and test data have different distributions. This means that u is not learnable from the training data but its values can be inferred fairly accurately given that b is informative about u.
///fold:
var getDatumGivenContext = function(x, params){
var u = params.u;
var b = params.b;
var ux = sample(u(x));
var bx = sample(b(ux));
return {x:x, u:ux, b: bx};
};
///
var getAllData = function(numberTrainingData, numberTestData, params){
var generateData = function(numberDataPoints, priorX){
var xs = repeat(numberDataPoints, priorX);
return map(function(x){
return getDatumGivenContext(x, params);
}, xs);
};
var trainingPrior = function(){return sample(Uniform({a:-.5, b:.5}));}
var trainingData = generateData(numberTrainingData, trainingPrior)
var testPrior = function(){return sample(Uniform({a:-1.5, b:1.5}));}
var testData = generateData(numberTestData, testPrior)
var displayData = function(trainingData, testData){
print('Display Training and then Test Data')
map(function(data){
var xs = _.map(data,'x');
var bs = _.map(data,'b');
viz.scatter(xs,bs)
}, [trainingData, testData]);
};
displayData(trainingData, testData)
return {params: params,
trainingData: trainingData,
testData: testData};
};
var trueParams = wpEditor.get('trueParams');
var allData = getAllData(10, 20, trueParams);
wpEditor.put('allData', allData);
To perform inference, we need a prior on the functions u and b. We simply abstract out the parameters that we used to define the functions u and b above:
var getU = function(uParams){
return function(x){
var getCoefficient = function(x){
if (x<-1){return uParams.lessMinus1}
if (x<1){return uParams.less1}
if (x>=1){return uParams.greater1}
}
return Gaussian({mu:getCoefficient(x)*x, sigma:.1})
}
}
var getB = function(bParams){
return function(ux){
return Gaussian({mu:ux + bParams.constant, sigma:bParams.sigma});
}
};
var priorParams = function(){
var sampleG = function(){return sample(Gaussian({mu:0, sigma:2}));}
var uParams = {lessMinus1: sampleG(),
less1: sampleG(),
greater1: sampleG()}
var bParams = {constant: sampleG(), sigma: Math.abs(sampleG())}
return {
u: getU(uParams),
uParams: uParams,
b: getB(bParams),
bParams: bParams
}
}
var displayFunctions = function(u,b){
print('Display functions u and b on range [-3,3]');
var xs = repeat(100,function(){return sample(Uniform({a:-3,b:3}))});
var uValues = map(function(x){return sample(u(x))}, xs);
viz.scatter(xs, uValues);
var bValues = map(function(ux){return sample(b(ux))}, uValues);
viz.scatter(xs, bValues);
};
var paramsExample = priorParams()
displayFunctions(paramsExample.u, paramsExample.b)
Putting everything together, we do inference on the training set to learn the parameters of u and b. We first do this using likelihoods (via score) and then we do this using an error function.
///fold:
var u = function(x){
var getCoefficient = function(x){
if (x<-1){return -3}
if (x<1){return 3}
if (x>=1){return -4}
}
return Gaussian({mu:getCoefficient(x)*x, sigma:.1})
}
var b = function(ux){
var constant = .5
var sigma = .1
return Gaussian({mu:ux + constant, sigma:sigma});
}
var displayFunctions = function(u,b){
print('Display true functions u and b on range [-3,3]');
var xs = repeat(100,function(){return sample(Uniform({a:-3,b:3}))});
var uValues = map(function(x){return sample(u(x))}, xs);
viz.scatter(xs, uValues);
var bValues = map(function(ux){return sample(b(ux))}, uValues);
viz.scatter(xs, bValues);
};
displayFunctions(u,b)
var trueParams = {u:u, b:b};
var getDatumGivenContext = function(x, params){
var u = params.u;
var b = params.b;
var ux = sample(u(x));
var bx = sample(b(ux));
return {x:x, u:ux, b: bx};
};
var getAllData = function(numberTrainingData, numberTestData, params){
var generateData = function(numberDataPoints, priorX){
var xs = repeat(numberDataPoints, priorX);
return map(function(x){
return getDatumGivenContext(x, params);
}, xs);
};
var trainingPrior = function(){return sample(Uniform({a:-.5, b:.5}));}
var trainingData = generateData(numberTrainingData, trainingPrior)
var testPrior = function(){return sample(Uniform({a:-2, b:2}));}
var testData = generateData(numberTestData, testPrior)
var displayData = function(trainingData, testData){
print('Display Training and then Test Data')
map(function(data){
var xs = _.map(data,'x');
var bs = _.map(data,'b');
viz.scatter(xs,bs)
}, [trainingData, testData]);
};
displayData(trainingData, testData)
return {params: params,
trainingData: trainingData,
testData: testData};
};
var allData = getAllData(15, 20, trueParams);
var getU = function(uParams){
return function(x){
var getCoefficient = function(x){
if (x<-1){return uParams.lessMinus1}
if (x<1){return uParams.less1}
if (x>=1){return uParams.greater1}
}
return Gaussian({mu:getCoefficient(x)*x, sigma:.1})
}
}
var getB = function(bParams){
return function(ux){
return Gaussian({mu:ux + bParams.constant, sigma:bParams.sigma});
}
};
var priorParams = function(){
var sampleG = function(){return sample(Gaussian({mu:0, sigma:2}));}
var uParams = {lessMinus1: sampleG(),
less1: sampleG(),
greater1: sampleG()}
var bParams = {constant: sampleG(), sigma: Math.abs(sampleG())}
return {
u: getU(uParams),
uParams: uParams,
b: getB(bParams),
bParams: bParams
}
}
///
var trainingModel = function(){
var params = priorParams();
var u = params.u
var b = params.b
// factor on training set
map( function(datum){
factor( u(datum.x).score(datum.u) );
factor( b(datum.u).score(datum.b) );
}, allData.trainingData);
// factor on test set (b values only)
map( function(datum){
var predictU = sample(u(datum.x))
factor( b(predictU).score(datum.b) );
}, allData.testData);
return {
uParams: params.uParams,
bParams: params.bParams,
};
};
// Inference using absolute distance between predicted and observed points
var distance = function(x,y){return Math.abs(x-y)};
var factorBValues = false;
var trainingModelError = function(){
var params = priorParams();
map( function(datum){
var prDatum = getDatumGivenContext(datum.x,params)
var error = distance(prDatum.u, datum.u) + distance(prDatum.b, datum.b)
factor(-error)
}, allData.trainingData);
var datumToError = map( function(datum){
var predictDatum = getDatumGivenContext(datum.x, params)
if (factorBValues){factor(-distance(predictDatum.b, datum.b))}
//var error = distance(predictDatum.u, datum.u)
//return {x:datum.x, guessU: predictDatum.u, u: datum.u, error:error};
}, allData.testData);
return {
uParams: params.uParams,
bParams: params.bParams,
//datumToError: datumToError,
//totalError: sum( _.map(datumToError, 'error'))
};
};
var getPosterior = function(model){
var posterior = Infer(
{method:'MCMC',
kernel:{HMC: {steps:10, stepSize:.1}},
burn:1,
samples:1000,
},
model);
// print MAP value for params vs true params
var MAP = posterior.MAP().val
print( '\nTrue vs. MAP uParams:' +
JSON.stringify({lessMinus1: -3, less1:3, greater1:-4}) +
JSON.stringify(MAP.uParams) +
'\n\nTrue vs. MAP bParams:' +
JSON.stringify({constant: .5, sigma:.1}) +
JSON.stringify(MAP.bParams));
return posterior;
}
print('Model with likelihoods')
var posterior = getPosterior(trainingModel)
// These functions assume that inference returns "datumToError"
// which provides a distribution over the u(x) value for each test point x
var getMarginal = function(erp, index){
return Infer({method:'rejection',samples: 200},
function(){return sample(erp).datumToError[index].guessU})
}
var MAP = posterior.MAP().val;
print( 'datumToError: ');
map( function(datum){
print(datum);
}, MAP.datumToError)
var allPoints = map(function(i){
var marginal = getMarginal(posterior, i);
//viz.auto(marginal)
var x = allData.testData[i].x
var samples = repeat(60, function(){
return {x:x, u:sample(marginal)}})
return samples
}, _.range(allData.testData.length))
var trueValues = map(
function(datum){return {x:datum.x, u:datum.u}},
allData.testData);
viz.scatter(_.flatten(allPoints))
print('true values')
viz.scatter(trueValues)
The next codebox computes a posterior on u values given the corresponding b values. (This doesn’t work well because we don’t have a fine-grained enough representation of the distribution on functions u and b from running MCMC on the prior).
///fold:
var predictModelError = function(posteriorTraining, factorBValues){
return function(){
var trainingParams = sample(posteriorTraining)
var u = getU(trainingParams.uParams)
var b = getB(trainingParams.bParams)
var params = {u:u, b:b}
var datumToError = map( function(datum){
var predictDatum = getDatumGivenContext(datum.x, params)
if (factorBValues){
factor(-distance(predictDatum.b, datum.b))
}
var error = distance(predictDatum.u, datum.u)
return {x:datum.x, guessU: predictDatum.u, u: datum.u, error:error};
}, allData.testData);
var totalError = sum( _.map(datumToError, 'error'));
return {
uParams: trainingParams.uParams,
bParams: trainingParams.bParams,
datumToError: datumToError,
totalError: totalError
};
};
}
var runPosteriorPrediction = function(model){
var posteriorPredict = Infer(
{method:'MCMC',
kernel:{HMC: {steps:10, stepSize:.1}},
burn:20,
samples:2000,
},
model);
print( 'MAP totalError: ' +
JSON.stringify(posteriorPredict.MAP().val.totalError ))
var getMarginal = function(erp, index){
return Infer({
method:'rejection',
samples: 200},
function(){
return sample(erp).datumToError[index].guessU
})
}
var MAP = posteriorPredict.MAP().val;
print( 'datumToError: ');
map( function(datum){
print(datum);
}, MAP.datumToError)
var allPoints = map(function(i){
var marginal = getMarginal(posteriorPredict, i);
//viz.auto(marginal)
var x = allData.testData[i].x
var samples = repeat(60, function(){
return {x:x, u:sample(marginal)}})
return samples
}, _.range(allData.testData.length))
var trueValues = map(
function(datum){
return {x:datum.x, u:datum.u}},
allData.testData);
viz.scatter(_.flatten(allPoints))
print('true values')
viz.scatter(trueValues)
}
var predictModelFactorBValues = predictModelError(posteriorTraining,true);
runPosteriorPrediction(predictModelFactorBValues)
var predictModelNoBValues = predictModelError(posteriorTraining,false);
runPosteriorPrediction(predictModelNoBValues)
///
The results show that the model does not infer the correct parameters for x<-1 and x>1. There’s no way it could: the training data doesn’t cover these regions.
Another way of looking at this:
///fold:
var utility = function(x) {
var getCoefficient = function(x){
if (x<-1){
return -3;
} else if (x<1){
return 3;
} else {
return -4;
}
};
return Gaussian({
mu: getCoefficient(x)*x,
sigma: .1
});
};
var biasedSignal = function(u) {
var constant = .5;
var sigma = .1;
return Gaussian({
mu: u + constant,
sigma: sigma
});
};
var displayFunctions = function() {
var xs = repeat(100, function(){return sample(Uniform({a:-3,b:3}))});
var uValues = map(function(x){return sample(utility(x))}, xs);
viz.scatter(xs, uValues);
var bValues = map(function(ux){return sample(biasedSignal(ux))}, uValues);
viz.scatter(xs, bValues);
};
var trueParams = {
utility: utility,
biasedSignal: biasedSignal
};
var getDatumGivenContext = function(x, params) {
var utility = params.utility;
var biasedSignal = params.biasedSignal;
var u = sample(utility(x));
var b = sample(biasedSignal(u));
return {
x: x,
u: u,
b: b
};
};
var getAllData = function(numberTrainingData, numberTestData, params){
var generateData = function(numberDataPoints, priorX){
var xs = repeat(numberDataPoints, priorX);
return map(function(x){
return getDatumGivenContext(x, params);
}, xs);
};
var trainingPrior = function(){return sample(Uniform({a:-.5, b:.5}));}
var trainingData = generateData(numberTrainingData, trainingPrior)
var testPrior = function(){return sample(Uniform({a:-2, b:2}));}
var testData = generateData(numberTestData, testPrior)
var displayData = function(trainingData, testData){
print('Display Training and then Test Data')
map(function(data){
var xs = _.map(data,'x');
var bs = _.map(data,'b');
viz.scatter(xs,bs)
}, [trainingData, testData]);
};
return {
params: params,
trainingData: trainingData,
testData: testData
};
};
var allData = getAllData(15, 20, trueParams);
var utilityFromParams = function(uParams){
return function(x){
var getCoefficient = function(x){
if (x<-1){
return uParams.lessMinus1;
} else if (x<1){
return uParams.less1;
} else {
return uParams.greater1;
}
}
return Gaussian({
mu: getCoefficient(x)*x,
sigma: .1
});
}
}
var biasedSignalFromParams = function(bParams){
return function(u){
return Gaussian({
mu: u + bParams.constant,
sigma: bParams.sigma
});
}
};
var paramPrior = function(){
var sampleG = function(){
return sample(Gaussian({mu: 0, sigma: 2}));
};
var uParams = {
lessMinus1: sampleG(),
less1: sampleG(),
greater1: sampleG()
};
var bParams = {
constant: sampleG(),
sigma: Math.abs(sampleG())
};
return {
uParams: uParams,
bParams: bParams
};
};
///
var model = function(){
var params = paramPrior();
var utility = utilityFromParams(params.uParams);
var biasedSignal = biasedSignalFromParams(params.bParams);
map(
function(datum){
factor(utility(datum.x).score(datum.u));
factor(biasedSignal(datum.u).score(datum.b));
},
allData.trainingData);
return _.extend({}, params.uParams, params.bParams);
};
var posterior = Infer(
{
method: 'MCMC',
kernel: {HMC: {steps:5, stepSize:.01}},
burn: 500,
samples: 50000 // <- takes a while time to run!
},
model);
var getMarginals = function(dist, keys){
return Infer(
{
method: 'enumerate'
},
function(){
return _.pick(sample(dist), keys);
});
};
viz.auto(getMarginals(posterior, ['lessMinus1'])); // -3
viz.auto(getMarginals(posterior, ['less1'])); // 3
viz.auto(getMarginals(posterior, ['greater1'])); // -4
viz.auto(getMarginals(posterior, ['constant'])); // .5
viz.auto(getMarginals(posterior, ['sigma'])); // .1