Learning Multivariate Linear Functions

First, we need a loss function that compares actual and desired function output.

We use the squared-error loss:

var squaredError = function(m1, m2) {
  var x = T.add(m1, T.mul(m2, -1));
  return T.sumreduce(T.mul(x, x));
};

var f = function(x){
  return squaredError(Vector([1, 2]), Vector([x, 2]));
};

map(f, [1, 2, 3, 4]);

This works for transposed (column) vectors as well:

var squaredError = function(m1, m2) {
  var x = T.add(m1, T.mul(m2, -1));
  return T.sumreduce(T.mul(x, x));
};

var f = function(x){
  return squaredError(
    T.transpose(Vector([1, 2])), 
    T.transpose(Vector([x, 2])));
};

map(f, [1, 2, 3, 4]);

Taken together, we can try to learn multi-dimensional linear functions from data. We’ll try to learn a matrix that outputs the two numbers it gets as inputs, and also their sum. That is, our desired output looks like this:

Matrix([
    [1, 0, 1],
    [0, 1, 1]
]);

Let’s first try to do this using MCMC (with proposals from the prior):

var sampleGaussianMatrix = function(dims, mean, variance){
  var length = dims[0] * dims[1];
  var dist = DiagCovGaussian({
    mu: Vector(repeat(length, constF(mean))),
    sigma: Vector(repeat(length, constF(variance)))
  });
  var g = sample(dist);
  return T.reshape(g, dims);
};

wpEditor.put('sampleGaussianMatrix', sampleGaussianMatrix);

var data = [
  {input: [1, 2], output: [1, 2, 3]},
  {input: [4, 5], output: [4, 5, 9]},
  {input: [0, 0], output: [0, 0, 0]},
  {input: [-2, 2], output: [-2, 2, 0]}
];

var squaredError = function(m1, m2) {
  var x = T.add(m1, T.mul(m2, -1));
  return T.sumreduce(T.mul(x, x));
};

var model = function() {
  var M = sampleGaussianMatrix([2, 3], 0, 1);
  var f = function(x) {
    return T.dot(x, M);
  }
  
  // Condition on data
  var totalError = sum(map(function(datum){
    var x = T.transpose(Vector(datum.input));
    return squaredError(f(x), Vector(datum.output));
  }, data));
  factor(-totalError);
  
  // Test
  return f(T.transpose(Vector([1, 2])));
}

var testDist = Infer(
  {method: 'MCMC', samples: 10, burn: 10000, verbose: true}, 
  model);

testDist.support();

Unsurprisingly, the acceptance ratio is really small (0.0036).

We can approach the problem using variational inference as well. To do so, we now provide a guide distribution for the matrix prior and optimize its parameters.

var sampleGaussianMatrix = function(dims, mean, variance, guideMean){  
  var length = dims[0] * dims[1];
  var dist = DiagCovGaussian({
    mu: Vector(repeat(length, constF(mean))),
    sigma: Vector(repeat(length, constF(variance)))
  });
  var guideMean = param([length,1], 0, 0.1);
  var guide = DiagCovGaussian({
    mu: guideMean,
    sigma: Vector(repeat(length, constF(0.001)))
  });
  var g = sample(dist, {guide: guide});
  return T.reshape(g, dims);
};

var data = [
  {input: [1, 2], output: [1, 2, 3]},
  {input: [4, 5], output: [4, 5, 9]},
  {input: [0, 0], output: [0, 0, 0]},
  {input: [-2, 2], output: [-2, 2, 0]}
];

var squaredError = function(m1, m2) {
  var x = T.add(m1, T.mul(m2, -1));
  return T.sumreduce(T.mul(x, x));
};

var model = function() {
  var M = sampleGaussianMatrix([2, 3], 0, 1, 0);
  var f = function(x) {
    return T.dot(x, M);
  }
  
  // Condition on data
  var totalError = sum(map(function(datum){
    var x = T.transpose(Vector(datum.input));
    return squaredError(f(x), Vector(datum.output));
  }, data));
  factor(-totalError);
  
  // Test
  return {
    test: f(T.transpose(Vector([1, 3]))).data,
    matrix: M.data
  };
}

var params = Optimize(model, {
  steps: 1000,
  method: {
    gd: {stepSize: 0.001}
  },
  estimator: {ELBO: {samples: 20}}});

var modelDist = SampleGuide(model, {params: params, samples: 500});

map(function(i){
  viz.auto(Enumerate(function(){return sample(modelDist).test[i];}));
}, [0, 1, 2]);

modelDist.support()[0].matrix

The resulting matrix looks as expected.

Instead of using a factor based on the squared error, we could also factor based on a Gaussian likelihood.