Sequential Planning as Inference

Let’s see if we can infer actions that lead to a specific outcome.

///fold:
var moveFunctions = {
  push: function(state, s) {
    return state.concat([s]);
  },
  pop: function(state) {
    return state.slice(1);
  },
  swap: function(state, i) {
    var a = state[i];
    var b = state[i + 1];
    return state.slice(0, i).concat([b, a]).concat(state.slice(i + 2));    
  }
};

var randomSymbol = function() {
  return uniformDraw(['a', 'b', 'c', 'd', 'e']);
};

var samplePush = function(state) {
  return {
    name: 'push',
    data: randomSymbol()
  }
};

var samplePop = function(state) {
  return {
    name: 'pop',
    data: null
  }
};

var sampleSwap = function(state) {
  return {
    name: 'swap',
    data: randomInteger(state.length - 1)
  }
};

var availableMoves = function(state) {
  if (state.length >= 2) {
    return [samplePush, samplePop, sampleSwap];
  } else if (state.length >= 1) {
    return [samplePush, samplePop];
  } else {
    return [samplePush];
  }
};

var sampleMove = function(state) {
  var sampler = uniformDraw(availableMoves(state));
  return sampler(state);
};

var applyMove = function(state, move) {
  var moveFunc = moveFunctions[move.name];
  return moveFunc(state, move.data);
};

var applyMoves = function(state, moves) {
  if (moves.length === 0) {
    return state;
  } else {
    return applyMoves(applyMove(state, moves[0]), moves.slice(1));
  }
};

var sampleMoves = function(state, n) {
  if (n === 0) {
    return [];
  } else {
    var move = sampleMove(state);
    var newState = applyMove(state, move);
    return [move].concat(sampleMoves(newState, n - 1));
  }
};

var showMove = function(state, move) {
  var nextState = applyMove(state, move);
  print(
    JSON.stringify(state) + 
    " --{" + move.name + 
    ((move.data !== null) ? " " + move.data : "") + "}--> " + 
    JSON.stringify(nextState));
}

var showMoves = function(state, moves) {
  if (moves.length === 0) {
    return;
  } else {    
    var move = moves[0];
    showMove(state, move);
    var nextState = applyMove(state, move);
    return showMoves(nextState, moves.slice(1));
  }
};

wpEditor.put('sampleMoves', sampleMoves);
wpEditor.put('applyMoves', applyMoves);
wpEditor.put('showMoves', showMoves);
///

var initState = [];
var targetState = ['a', 'b', 'c', 'd'];

var model = function() {
  var i = randomInteger(targetState.length * 2);
  var moves = sampleMoves(initState, i);
  var finalState = applyMoves(initState, moves);
  condition(_.isEqual(finalState, targetState));
  return moves;
}

var out = Infer({method: 'rejection', samples: 1}, model);

var moves = out.support()[0];

showMoves(initState, moves);

This works, but gets slow very quickly as the state space grows. Maybe we can do better using MCMC? For this to work well, we’ll need a soft comparison function that can tell us how far off our sequence of actions is from accomplishing the goal.

var distance = dp.cache(function(s, t) {
  // This needs to be quick to compute...
  if ((s.length === 0) && (t.length === 0)) {
    return 0;
  } else if (s.length === 0) {
    return t.length;
  } else if (t.length === 0) {
    return s.length;
  } else if (s[0] === t[0]) {
    return distance(s.slice(1), t.slice(1));
  } else {
    return Math.min(
      distance(s, t.slice(1)) + 1,
      distance(s.slice(1), t) + 1
    );    
  }
});

wpEditor.put('distance', distance);

distance(['a', 'b', 'c', 'e', 'f'], ['b', 'c', 'd', 'f', 'e']);

We can now infer somewhat longer sequences of actions:

var sampleMoves = wpEditor.get('sampleMoves');
var applyMoves = wpEditor.get('applyMoves');
var showMoves = wpEditor.get('showMoves');
var distance = wpEditor.get('distance');


var initState = [];
var targetState = ['a', 'b', 'c', 'd', 'c', 'b'];

var model = function() {
  var i = randomInteger(targetState.length * 2);
  var moves = sampleMoves(initState, i);
  var finalState = applyMoves(initState, moves);
  factor(-10 * distance(finalState, targetState));
  return moves;
};

var out = Infer({method: 'MCMC', samples: 1, burn: 50000}, model);

var moves = out.support()[0];

showMoves(initState, moves);

A further significant improvement here could be to sample moves one by one instead of all at once (assuming a score function that can give informative partial credit to such sequences of moves).