On Github GatorLUG / js-language-tooling-talk
Code → Tokens → AST → Constraints → Transformer → Interpreter
(Some of these are optional)
Code → Tokens → AST → Constraints → Transformer → Interpreter
/\s+/ => whitespace /"[^"]*"/ => string /#.+$/ => comment /[_a-z][_a-z0-9]*/i => identifier /[0-9]+/ => integer⇩
_"foo" 42bar#buzz⇩
indentifier string whitespace integer identifier comment⇩
indentifier string integer identifier
function parse(tokens) {
  const body = [];
  while (tokens.hasNext()) {
    body.push(parseStatement(tokens));
  }
  return {type: 'Program', body};
}
function parseStatement(tokens) {
  if (tokens.next().type === 'FunctionKeyword') {
    return parseFunction();
  } else if (tokens.next().type === 'Identifier') {
    return parseCallExpression();
  } else {
    throw new SyntaxError();
  }
}
function parseFunction(tokens) {
  const args = [];
  const body = [];
  tokens.consume('FunctionKeyword');
  tokens.consume('(');
  while (tokens.next() !== ')') {
    args.push(parseExpression());
  }
  tokens.consume(')');
  tokens.consume('{');
  while (tokens.next() !== '}') {
    body.push(parseExpression());
  }
  tokens.consume('}');
  return {type: 'FunctionDeclaration', args, body};
}
// ...
          Recursive tree traversal!
program
  = _ body:(s:statement _ {return s;})+ {
    return {type: 'Program', body: body};
  }
_ 'whitespace'
  = [ \t\r\n]*
statement
  = function
  / callExpression
function
  = 'function' _ '('
  _ args:(expr:expression _ {return expr;})*
  _ ')' _ '{'
  _ body:(s:statement _ {return s;})*
  _ '}' {
    return {
      type: 'FunctionDeclaration',
      args: args,
      body: body
    };
  }
expression
  = 'expr'
callExpression
  = 'fname' _ '('
  _ args:(expr:expression _ {return expr;})*
  _ ')' {
    return {
      type: 'CallExpression',
      args: args
    };
  }
        Some constructs aren't "context free", and can't be parsed this way.
def foo():
    bar()
          Example: Identation-based languages don't have clear start/end tokens. How do you tell when to stop parsing a construct?
Extend the lexer, and insert indent/dedent tokens!
def foo():
    bar()
          1,0-1,3:    NAME    u'def'
1,4-1,7:    NAME    u'foo'
1,7-1,8:    OP      u'('
1,8-1,9:    OP      u')'
1,9-1,10:   OP      u':'
1,10-1,11:  NEWLINE u'\n'
2,0-2,4:    INDENT  u'    '
2,4-2,7:    NAME    u'bar'
2,7-2,8:    OP      u'('
2,8-2,9:    OP      u')'
3,0-3,0:    DEDENT  ''
3,0-3,0:    ENDMARKER   ''
          Try this at home using import tokenize!
Code → Tokens → AST → Constraints → Transformer → Interpreter
JavaScript doesn't do any of this. But a linter does!
Static analysis of dynamic languages is hard.
var obj = {foo: 1, bar: 2};
console.log(obj[prompt('What property should I access?')]);
          Akin to the halting problem
module.exports = function(context) {
    return {
        "MemberExpression": function(node) {
            if (node.object.name === "console") {
                context.report(node, "Unexpected console statement.");
            }
        }
    };
};
        /* @flow */
function foo(x) {
  return x * 10;
}
foo('Hello, world!');
          hello.js:5:5,19: string This type is incompatible with hello.js:3:10,15: number
/* @flow */
function foo(x: string, y: number): string {
  return x.length * y;
}
foo('Hello', 42);
          hello.js:3:10,21: number This type is incompatible with hello.js:2:37,42: string
Code → Tokens → AST → Constraints → Transformer → Interpreter
function update(callback) {
  $.ajax({
    url : 'example.com',
    type: 'GET',
    success: function(data) {
      console.log(data.something);
      callback(data);
    }
  })
}
          function update() {
  return $.ajax('example.com').then((data) => {
    console.log(data.something);
    return data;
  }
}
        function update() {
  return $.ajax('example.com').then((data) => {
    console.log(data.something);
    return data;
  }
}
          async function update() {
  const data = await $.ajax('example.com');
  console.log(data.something);
  return data;
}
        function GeometricMean(stdlib, foreign, buffer) {
  "use asm";
  var exp = stdlib.Math.exp;
  var log = stdlib.Math.log;
  var values = new stdlib.Float64Array(buffer);
  function logSum(start, end) {
    start = start|0;
    end = end|0;
    var sum = 0.0, p = 0, q = 0;
    // asm.js forces byte addressing of the heap by requiring shifting by 3
    for (p = start << 3, q = end << 3; (p|0) < (q|0); p = (p + 8)|0) {
      sum = sum + +log(values[p>>3]);
    }
    return +sum;
  }
  function geometricMean(start, end) {
    start = start|0;
    end = end|0;
    return +exp(+logSum(start, end) / +((end - start)|0));
  }
  return { geometricMean: geometricMean };
}
      "usually within a factor of 2 slowdown over native compilation with clang"
var output = recast.print(ast).code;
See Also: Effective JavaScript Codemods by @cpojer