Add HIR "tree" visitor and use it for codegen, alternate printer

Refactors Codegen to extract the core "visit IR as a tree" logic separately from 
the code to emit JS: 

* `HIRTreeVisitor` is a new helper that visits the HIR as a tree. You call 
`visitTree(ir, yourVisitor)` and it drives visiting of the IR, tracking blocks 
and scopes and calling methods as appropriate. 

* `Codegen` is now implemented as a Visitor implementation. For example 
`enterBlock()` creates an empty `Array<t.Statement>`, `leaveBlock()` wraps that 
in a `t.BlockStatement`, etc. 

* `printHIRTree()` is a new IR printer (implemented as a visitor) that prints 
the HIR in tree form, so it retains the original shape of the code but with each 
block replaced with its IR equivalent.
This commit is contained in:
Joe Savona
2022-11-18 17:15:16 -08:00
parent 0c1f88db6c
commit 5f2833bd5c
7 changed files with 1009 additions and 567 deletions
+158 -554
View File
@@ -9,18 +9,16 @@ import * as t from "@babel/types";
import { assertExhaustive } from "../Common/utils";
import { invariant } from "../CompilerError";
import {
BasicBlock,
BlockId,
GotoVariant,
HIR,
HIRFunction,
Identifier,
IdentifierId,
Instruction,
InstructionKind,
InstructionValue,
LValue,
Place,
} from "./HIR";
import { BlockTerminal, Visitor, visitTree } from "./HIRTreeVisitor";
import { todoInvariant } from "./todo";
/**
@@ -44,9 +42,9 @@ import { todoInvariant } from "./todo";
* performed as an HIR optimization pass, that is left todo for the time being.
*/
export default function codegen(fn: HIRFunction): t.Function {
const entry = fn.body.blocks.get(fn.body.entry)!;
const cx = new Context(fn.body);
const body = codegenBlock(cx, entry);
const visitor = new CodegenVisitor();
const body = visitTree(fn, visitor);
invariant(t.isBlockStatement(body), "Expected a block statement");
const params = fn.params.map((param) => convertIdentifier(param.identifier));
return t.functionDeclaration(
fn.id !== null ? convertIdentifier(fn.id) : null,
@@ -57,544 +55,188 @@ export default function codegen(fn: HIRFunction): t.Function {
);
}
class Context {
ir: HIR;
type Temporaries = Map<IdentifierId, t.Expression>;
class CodegenVisitor
implements
Visitor<
Array<t.Statement>,
t.Expression | t.Statement | t.JSXFragment,
t.Statement,
t.SwitchCase
>
{
temp: Map<IdentifierId, t.Expression> = new Map();
#nextScheduleId: number = 0;
/**
* Used to track which blocks *have been* generated already in order to
* abort if a block is generated a second time. This is an error catching
* mechanism for debugging purposes, and is not used by the codegen algorithm
* to drive decisions about how to emit blocks.
*/
emitted: Set<BlockId> = new Set();
/**
* A set of blocks that are already scheduled to be emitted by eg a parent.
* This allows child nodes to avoid re-emitting the same block and emit eg
* a break instead.
*/
#scheduled: Set<BlockId> = new Set();
/**
* Represents which control flow operations are currently in scope, with the innermost
* scope last. Roughly speaking, the last ControlFlowTarget on the stack indicates where
* control will implicitly transfer, such that gotos to that block can be elided. Gotos
* targeting items higher up the stack may need labeled break or continue; see
* getBreakTarget() and getContinueTarget() for more details.
*/
#controlFlowStack: Array<ControlFlowTarget> = [];
constructor(ir: HIR) {
this.ir = ir;
enterBlock(): t.Statement[] {
return [];
}
/**
* Record that the given block will be emitted (eg by the codegen of a parent node)
* so that child nodes can avoid re-emitting it.
*/
schedule(block: BlockId, type: "if" | "switch" | "case"): number {
const id = this.#nextScheduleId++;
invariant(
!this.#scheduled.has(block),
`Break block is already scheduled: bb${block}`
);
this.#scheduled.add(block);
this.#controlFlowStack.push({ block, id, type });
return id;
visitValue(
value: InstructionValue
): t.Expression | t.Statement | t.JSXFragment {
return codegenInstructionValue(this.temp, value);
}
scheduleLoop(
fallthroughBlock: BlockId,
continueBlock: BlockId,
loopBlock: BlockId | null
): number {
const id = this.#nextScheduleId++;
const ownsBlock = !this.#scheduled.has(fallthroughBlock);
this.#scheduled.add(fallthroughBlock);
invariant(
!this.#scheduled.has(continueBlock),
`Continue block is already scheduled: bb${continueBlock}`
);
this.#scheduled.add(continueBlock);
let ownsLoop = false;
if (loopBlock !== null) {
ownsLoop = !this.#scheduled.has(loopBlock);
this.#scheduled.add(loopBlock);
visitInstruction(
instr: Instruction,
value: t.Expression | t.Statement | t.JSXFragment
): t.Statement {
if (t.isStatement(value)) {
return value;
}
this.#controlFlowStack.push({
block: fallthroughBlock,
ownsBlock,
id,
type: "loop",
continueBlock,
loopBlock,
ownsLoop,
});
return id;
}
/**
* Removes a block that was scheduled; must be called after that block is emitted.
*/
unschedule(scheduleId: number): void {
const last = this.#controlFlowStack.pop();
invariant(
last !== undefined && last.id === scheduleId,
"Can only unschedule the last target"
);
if (last.type !== "loop" || last.ownsBlock !== null) {
this.#scheduled.delete(last.block);
if (instr.lvalue === null) {
return t.expressionStatement(value);
}
if (last.type === "loop") {
this.#scheduled.delete(last.continueBlock);
if (last.ownsLoop && last.loopBlock !== null) {
this.#scheduled.delete(last.loopBlock);
}
}
}
/**
* Helper to unschedule multiple scheduled blocks. The ids should be in
* the order in which they were scheduled, ie most recently scheduled last.
*/
unscheduleAll(scheduleIds: Array<number>): void {
for (let i = scheduleIds.length - 1; i >= 0; i--) {
this.unschedule(scheduleIds[i]!);
}
}
/**
* Check if the given @param block is scheduled or not.
*/
isScheduled(block: BlockId): boolean {
return this.#scheduled.has(block);
}
/**
* Given the current control flow stack, determines how a `break` to the given @param block
* must be emitted. Returns as follows:
* - 'implicit' if control would implicitly transfer to that block
* - 'labeled' if a labeled break is required to transfer control to that block
* - 'unlabeled' if an unlabeled break would transfer to that block
* - null if there is no information for this block
*
* The returned 'block' value should be used as the label if necessary.
*/
getBreakTarget(
block: BlockId
): { block: BlockId; type: ControlFlowKind } | null {
let hasPrecedingLoop = false;
for (let i = this.#controlFlowStack.length - 1; i >= 0; i--) {
const target = this.#controlFlowStack[i]!;
if (target.block === block) {
let type: ControlFlowKind;
if (target.type === "loop") {
// breaking out of a loop requires an explicit break,
// but only requires a label if breaking past the innermost loop.
type = hasPrecedingLoop ? "labeled" : "unlabeled";
} else if (i === this.#controlFlowStack.length - 1) {
// breaking to the last break point, which is where control will transfer
// implicitly
type = "implicit";
} else {
// breaking somewhere else requires an explicit break
type = "labeled";
if (
instr.lvalue.place.memberPath === null &&
instr.lvalue.place.identifier.name === null
) {
// temporary
this.temp.set(instr.lvalue.place.identifier.id, value);
return t.emptyStatement();
} else {
switch (instr.lvalue.kind) {
case InstructionKind.Const: {
return t.variableDeclaration("const", [
t.variableDeclarator(codegenLVal(instr.lvalue), value),
]);
}
return {
block: target.block,
type,
};
}
hasPrecedingLoop ||= target.type === "loop";
}
return null;
}
/**
* Given the current control flow stack, determines how a `continue` to the given @param block
* must be emitted. Returns as follows:
* - 'implicit' if control would implicitly continue to that block
* - 'labeled' if a labeled continue is required to continue to that block
* - 'unlabeled' if an unlabeled continue would transfer to that block
* - null if there is no information for this block
*
* The returned 'block' value should be used as the label if necessary.
*/
getContinueTarget(
block: BlockId
): { block: BlockId; type: ControlFlowKind } | null {
let hasPrecedingLoop = false;
for (let i = this.#controlFlowStack.length - 1; i >= 0; i--) {
const target = this.#controlFlowStack[i]!;
if (target.type == "loop" && target.continueBlock === block) {
let type: ControlFlowKind;
if (hasPrecedingLoop) {
// continuing to a loop that is not the innermost loop always requires
// a label
type = "labeled";
} else if (i === this.#controlFlowStack.length - 1) {
// continuing to the last break point, which is where control will
// transfer to naturally
type = "implicit";
} else {
// the continue is inside some conditional logic, requires an explicit
// continue
type = "unlabeled";
case InstructionKind.Let: {
return t.variableDeclaration("let", [
t.variableDeclarator(codegenLVal(instr.lvalue), value),
]);
}
return {
block: target.block,
type,
};
}
hasPrecedingLoop ||= target.type === "loop";
}
return null;
}
debugBreakTargets(): Array<ControlFlowTarget> {
return this.#controlFlowStack.map((target) => ({ ...target }));
}
}
type ControlFlowKind = "implicit" | "labeled" | "unlabeled";
type ControlFlowTarget =
| { type: "if"; block: BlockId; id: number }
| { type: "switch"; block: BlockId; id: number }
| { type: "case"; block: BlockId; id: number }
| {
type: "loop";
block: BlockId;
ownsBlock: boolean;
continueBlock: BlockId;
loopBlock: BlockId | null;
ownsLoop: boolean;
id: number;
};
function codegenBlock(cx: Context, block: BasicBlock): t.BlockStatement {
const body: Array<t.Statement> = [];
writeBlock(cx, block, body);
return t.blockStatement(body);
}
function writeBlock(cx: Context, block: BasicBlock, body: Array<t.Statement>) {
invariant(
!cx.emitted.has(block.id),
`Cannot emit the same block twice: bb${block.id}`
);
cx.emitted.add(block.id);
for (const instr of block.instructions) {
writeInstr(cx, instr, body);
}
const terminal = block.terminal;
const scheduleIds = [];
switch (terminal.kind) {
case "return": {
const value =
terminal.value != null ? codegenPlace(cx, terminal.value) : null;
body.push(t.returnStatement(value));
break;
}
case "throw": {
const value = codegenPlace(cx, terminal.value);
body.push(t.throwStatement(value));
break;
}
case "if": {
const test = codegenPlace(cx, terminal.test);
const fallthroughId =
terminal.fallthrough !== null && !cx.isScheduled(terminal.fallthrough)
? terminal.fallthrough
: null;
const alternateId =
terminal.alternate !== terminal.fallthrough ? terminal.alternate : null;
if (fallthroughId !== null) {
const scheduleId = cx.schedule(fallthroughId, "if");
scheduleIds.push(scheduleId);
}
let consequent: t.Statement | null = null;
if (cx.isScheduled(terminal.consequent)) {
consequent = codegenBreak(cx, terminal.consequent);
} else {
consequent = codegenBlock(cx, cx.ir.blocks.get(terminal.consequent)!);
}
let alternate: t.Statement | null = null;
if (alternateId !== null) {
if (cx.isScheduled(alternateId)) {
alternate = codegenBreak(cx, alternateId);
} else {
alternate = codegenBlock(cx, cx.ir.blocks.get(alternateId)!);
}
}
cx.unscheduleAll(scheduleIds);
if (fallthroughId !== null) {
if (consequent === null && alternate === null) {
body.push(t.expressionStatement(test));
} else {
body.push(
t.labeledStatement(
t.identifier(`bb${fallthroughId}`),
t.ifStatement(test, consequent ?? t.blockStatement([]), alternate)
)
case InstructionKind.Reassign: {
return t.expressionStatement(
t.assignmentExpression("=", codegenLVal(instr.lvalue), value)
);
}
writeBlock(cx, cx.ir.blocks.get(fallthroughId)!, body);
} else {
if (consequent === null && alternate === null) {
body.push(t.expressionStatement(test));
} else {
body.push(
t.ifStatement(test, consequent ?? t.blockStatement([]), alternate)
);
}
}
break;
}
case "switch": {
const test = codegenPlace(cx, terminal.test);
const fallthroughId =
terminal.fallthrough !== null && !cx.isScheduled(terminal.fallthrough)
? terminal.fallthrough
: null;
if (fallthroughId !== null) {
const scheduleId = cx.schedule(fallthroughId, "switch");
scheduleIds.push(scheduleId);
}
const cases: Array<t.SwitchCase> = [];
[...terminal.cases].reverse().forEach((case_, index) => {
const test = case_.test !== null ? codegenPlace(cx, case_.test) : null;
let consequent;
if (cx.isScheduled(case_.block)) {
// cases which are empty or contain only a `break` may point to blocks
// that are already scheduled. emit as follows:
// - if the block is for another case branch, don't emit a break and fall-through
// - else, emit an explicit break.
const break_ = codegenBreak(cx, case_.block);
if (
index === 0 &&
break_ === null &&
case_.block === terminal.fallthrough &&
case_.test === null
) {
// If the last case statement (first in reverse order) is a default that
// jumps to the fallthrough, then we would emit a useless `default: {}`,
// so instead skip this case.
return;
}
const block = [];
if (break_ !== null) {
block.push(break_);
}
consequent = t.blockStatement(block);
} else {
consequent = codegenBlock(cx, cx.ir.blocks.get(case_.block)!);
const scheduleId = cx.schedule(case_.block, "case");
scheduleIds.push(scheduleId);
}
cases.push(t.switchCase(test, [consequent]));
});
cases.reverse();
cx.unscheduleAll(scheduleIds);
if (fallthroughId !== null) {
body.push(
t.labeledStatement(
t.identifier(`bb${fallthroughId}`),
t.switchStatement(test, cases)
)
);
writeBlock(cx, cx.ir.blocks.get(fallthroughId)!, body);
} else {
body.push(t.switchStatement(test, cases));
}
break;
}
case "while": {
const testBlock = cx.ir.blocks.get(terminal.test)!;
const testTerminal = testBlock.terminal;
invariant(
testTerminal.kind === "if",
"Expected while loop test block to end in an if"
);
const bodyLength = body.length;
for (const instr of testBlock.instructions) {
writeInstr(cx, instr, body);
}
invariant(
body.length === bodyLength,
"Expected test to produce only temporaries"
);
const testValue =
cx.temp.get(testTerminal.test.identifier.id) ??
codegenPlace(cx, testTerminal.test);
invariant(
testValue != null,
"Expected test to produce a temporary value"
);
const fallthroughId =
terminal.fallthrough !== null && !cx.isScheduled(terminal.fallthrough)
? terminal.fallthrough
: null;
const loopId =
!cx.isScheduled(terminal.loop) && terminal.loop !== terminal.fallthrough
? terminal.loop
: null;
const scheduleId = cx.scheduleLoop(
terminal.fallthrough,
terminal.test,
terminal.loop
);
scheduleIds.push(scheduleId);
let loopBody: t.Statement;
if (loopId) {
loopBody = codegenBlock(cx, cx.ir.blocks.get(loopId)!);
} else {
const break_ = codegenBreak(cx, terminal.loop);
invariant(
break_ !== null,
"If loop body is already scheduled it must be a break"
);
loopBody = t.blockStatement([break_]);
}
cx.unscheduleAll(scheduleIds);
if (fallthroughId !== null) {
body.push(
t.labeledStatement(
t.identifier(`bb${fallthroughId}`),
t.whileStatement(testValue, loopBody)
)
);
writeBlock(cx, cx.ir.blocks.get(fallthroughId)!, body);
} else {
body.push(t.whileStatement(testValue, loopBody));
}
break;
}
case "goto": {
switch (terminal.variant) {
case GotoVariant.Break: {
const break_ = codegenBreak(cx, terminal.block);
if (break_ !== null) {
body.push(break_);
}
break;
}
case GotoVariant.Continue: {
const continue_ = codegenContinue(cx, terminal.block);
if (continue_ !== null) {
body.push(continue_);
}
break;
}
default: {
assertExhaustive(
terminal.variant,
`Unexpected goto variant '${terminal.variant}'`
instr.lvalue.kind,
`Unexpected instruction kind '${instr.lvalue.kind}'`
);
}
}
break;
}
default: {
assertExhaustive(terminal, "Unexpected terminal");
}
}
}
function codegenBreak(cx: Context, block: BlockId): t.Statement | null {
const target = cx.getBreakTarget(block);
if (target === null) {
// TODO: we should always have a target
visitImplicitTerminal(): t.Statement | null {
return null;
}
switch (target.type) {
case "implicit": {
return null;
visitTerminal(
terminal: BlockTerminal<
t.Statement[],
t.Expression,
t.Statement,
t.SwitchCase
>
): t.Statement {
switch (terminal.kind) {
case "break": {
if (terminal.label) {
return t.breakStatement(t.identifier(terminal.label));
} else {
return t.breakStatement();
}
}
case "continue": {
if (terminal.label) {
return t.continueStatement(t.identifier(terminal.label));
} else {
return t.continueStatement();
}
}
case "if": {
return t.ifStatement(
terminal.test,
terminal.consequent,
terminal.alternate
);
}
case "switch": {
return t.switchStatement(terminal.test, terminal.cases);
}
case "while": {
return t.whileStatement(terminal.test, terminal.loop);
}
case "return": {
if (terminal.value !== null) {
return t.returnStatement(terminal.value);
} else {
return t.returnStatement();
}
}
case "throw": {
return t.throwStatement(terminal.value);
}
default: {
assertExhaustive(
terminal,
`Unexpected terminal kind '${(terminal as any).kind}'`
);
}
}
case "unlabeled": {
return t.breakStatement();
}
visitCase(test: t.Expression | null, block: t.Statement): t.SwitchCase {
return t.switchCase(test, [block]);
}
appendBlock(
block: t.Statement[],
item: t.Statement,
label?: string | undefined
): void {
if (item.type === "EmptyStatement") {
return;
}
case "labeled": {
return t.breakStatement(t.identifier(`bb${target.block}`));
if (label !== undefined) {
block.push(t.labeledStatement(t.identifier(label), item));
} else {
block.push(item);
}
}
leaveBlock(block: t.Statement[]): t.Statement {
return t.blockStatement(block);
}
}
function codegenContinue(cx: Context, block: BlockId): t.Statement | null {
const target = cx.getContinueTarget(block);
invariant(
target !== null,
`Expected continue target to be scheduled for bb${block}`
);
switch (target.type) {
case "labeled": {
return t.continueStatement(t.identifier(`bb${target.block}`));
}
case "unlabeled": {
return t.continueStatement();
}
case "implicit": {
return null;
}
default: {
assertExhaustive(
target.type,
`Unexpected continue target kind '${(target as any).type}'`
);
}
}
}
function writeInstr(cx: Context, instr: Instruction, body: Array<t.Statement>) {
function codegenInstructionValue(
temp: Temporaries,
instrValue: InstructionValue
): t.Expression | t.JSXFragment | t.Statement {
let value: t.Expression;
const instrValue = instr.value;
switch (instrValue.kind) {
case "ArrayExpression": {
const elements = instrValue.elements.map((element) =>
codegenPlace(cx, element)
codegenPlace(temp, element)
);
value = t.arrayExpression(elements);
break;
}
case "BinaryExpression": {
const left = codegenPlace(cx, instrValue.left);
const right = codegenPlace(cx, instrValue.right);
const left = codegenPlace(temp, instrValue.left);
const right = codegenPlace(temp, instrValue.right);
value = t.binaryExpression(instrValue.operator, left, right);
break;
}
case "UnaryExpression": {
value = t.unaryExpression(
instrValue.operator as "throw", // todo
codegenPlace(cx, instrValue.value)
codegenPlace(temp, instrValue.value)
);
break;
}
case "Primitive": {
value = codegenValue(cx, instrValue.value);
value = codegenValue(temp, instrValue.value);
break;
}
case "CallExpression": {
const callee = codegenPlace(cx, instrValue.callee);
const args = instrValue.args.map((arg) => codegenPlace(cx, arg));
const callee = codegenPlace(temp, instrValue.callee);
const args = instrValue.args.map((arg) => codegenPlace(temp, arg));
value = t.callExpression(callee, args);
break;
}
case "NewExpression": {
const callee = codegenPlace(cx, instrValue.callee);
const args = instrValue.args.map((arg) => codegenPlace(cx, arg));
const callee = codegenPlace(temp, instrValue.callee);
const args = instrValue.args.map((arg) => codegenPlace(temp, arg));
value = t.newExpression(callee, args);
break;
}
@@ -603,7 +245,10 @@ function writeInstr(cx: Context, instr: Instruction, body: Array<t.Statement>) {
if (instrValue.properties !== null) {
for (const [property, value] of instrValue.properties) {
properties.push(
t.objectProperty(t.stringLiteral(property), codegenPlace(cx, value))
t.objectProperty(
t.stringLiteral(property),
codegenPlace(temp, value)
)
);
}
}
@@ -620,11 +265,11 @@ function writeInstr(cx: Context, instr: Instruction, body: Array<t.Statement>) {
attributes.push(
t.jsxAttribute(
t.jsxIdentifier(prop),
t.jsxExpressionContainer(codegenPlace(cx, value))
t.jsxExpressionContainer(codegenPlace(temp, value))
)
);
}
let tagValue = codegenPlace(cx, instrValue.tag);
let tagValue = codegenPlace(temp, instrValue.tag);
let tag: string;
if (tagValue.type === "Identifier") {
tag = tagValue.name;
@@ -637,7 +282,7 @@ function writeInstr(cx: Context, instr: Instruction, body: Array<t.Statement>) {
}
const children =
instrValue.children !== null
? instrValue.children.map((child) => codegenJsxElement(cx, child))
? instrValue.children.map((child) => codegenJsxElement(temp, child))
: [];
value = t.jsxElement(
t.jsxOpeningElement(
@@ -657,75 +302,34 @@ function writeInstr(cx: Context, instr: Instruction, body: Array<t.Statement>) {
value = t.jsxFragment(
t.jsxOpeningFragment(),
t.jsxClosingFragment(),
instrValue.children.map((child) => codegenJsxElement(cx, child))
instrValue.children.map((child) => codegenJsxElement(temp, child))
);
break;
}
case "OtherStatement": {
const node = instrValue.node;
if (t.isStatement(node)) {
body.push(node);
return;
if (!t.isExpression(node)) {
return node as any; // TODO handle statements, jsx fragments
}
value = node as any; // TODO(josephsavona) complete handling of JSX fragment/spreadchild elements
value = node;
break;
}
case "Identifier": {
value = codegenPlace(cx, instrValue);
value = codegenPlace(temp, instrValue);
break;
}
default: {
assertExhaustive(instrValue, "Unexpected instruction kind");
assertExhaustive(
instrValue,
`Unexpected instruction value kind '${(instrValue as any).kind}'`
);
}
}
if (instr.lvalue !== null) {
if (
instr.lvalue.place.identifier.name === null &&
instr.lvalue.place.memberPath === null
) {
// Temporary value: don't immediately emit, instead save the value to refer to later
cx.temp.set(instr.lvalue.place.identifier.id, value);
} else {
switch (instr.lvalue.kind) {
case InstructionKind.Const: {
body.push(
t.variableDeclaration("const", [
t.variableDeclarator(codegenLVal(instr.lvalue), value),
])
);
break;
}
case InstructionKind.Let: {
body.push(
t.variableDeclaration("let", [
t.variableDeclarator(codegenLVal(instr.lvalue), value),
])
);
break;
}
case InstructionKind.Reassign: {
body.push(
t.expressionStatement(
t.assignmentExpression("=", codegenLVal(instr.lvalue), value)
)
);
break;
}
default: {
assertExhaustive(
instr.lvalue.kind,
`Unexpected instruction kind '${instr.lvalue.kind}'`
);
}
}
}
} else {
body.push(t.expressionStatement(value));
}
return value;
}
function codegenJsxElement(
cx: Context,
temp: Temporaries,
place: Place
):
| t.JSXText
@@ -733,7 +337,7 @@ function codegenJsxElement(
| t.JSXSpreadChild
| t.JSXElement
| t.JSXFragment {
const value = codegenPlace(cx, place);
const value = codegenPlace(temp, place);
switch (value.type) {
case "StringLiteral": {
return t.jsxText(value.value);
@@ -757,7 +361,7 @@ function codegenLVal(lval: LValue): t.LVal {
}
function codegenValue(
cx: Context,
temp: Temporaries,
value: boolean | number | string | null | undefined
): t.Expression {
if (typeof value === "number") {
@@ -775,10 +379,10 @@ function codegenValue(
}
}
function codegenPlace(cx: Context, place: Place): t.Expression {
function codegenPlace(temp: Temporaries, place: Place): t.Expression {
todoInvariant(place.kind === "Identifier", "support scope values");
if (place.memberPath === null) {
let tmp = cx.temp.get(place.identifier.id);
let tmp = temp.get(place.identifier.id);
if (tmp != null) {
return tmp;
}
+684
View File
@@ -0,0 +1,684 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import invariant from "invariant";
import { assertExhaustive } from "../Common/utils";
import {
BasicBlock,
BlockId,
GotoVariant,
HIR,
HIRFunction,
Instruction,
InstructionValue,
Place,
} from "./HIR";
/**
* Function to visit HIR as a tree of high-level constructs rather than as a sequence
* of lower-level basic blocks. Intended for use in codegen and reactive scope
* construction which need to see the original "shape" of the code.
*
* See the {@link Visitor} interface for more about implementing a visitor.
*/
export function visitTree<TBlock, TValue, TItem, TCase>(
fn: HIRFunction,
visitor: Visitor<TBlock, TValue, TItem, TCase>
): TItem {
const cx = new Context(fn.body);
const driver = new Driver(cx, visitor);
return driver.traverseBlock(cx.block(fn.body.entry));
}
class Driver<TBlock, TValue, TItem, TCase> {
cx: Context;
visitor: Visitor<TBlock, TValue, TItem, TCase>;
constructor(cx: Context, visitor: Visitor<TBlock, TValue, TItem, TCase>) {
this.cx = cx;
this.visitor = visitor;
}
traverseBlock(block: BasicBlock): TItem {
const blockValue = this.visitor.enterBlock();
this.visitBlock(block, blockValue);
return this.visitor.leaveBlock(blockValue);
}
visitBlock(block: BasicBlock, blockValue: TBlock): void {
invariant(
!this.cx.emitted.has(block.id),
`Cannot emit the same block twice: bb${block.id}`
);
this.cx.emitted.add(block.id);
for (const instr of block.instructions) {
this.visitInstr(instr, blockValue);
}
const terminal = block.terminal;
const scheduleIds = [];
switch (terminal.kind) {
case "return": {
const value =
terminal.value != null ? this.visitPlace(terminal.value) : null;
this.visitor.appendBlock(
blockValue,
this.visitor.visitTerminal({
kind: "return",
value,
})
);
break;
}
case "throw": {
const value = this.visitPlace(terminal.value);
this.visitor.appendBlock(
blockValue,
this.visitor.visitTerminal({
kind: "throw",
value,
})
);
break;
}
case "if": {
const test = this.visitPlace(terminal.test);
const fallthroughId =
terminal.fallthrough !== null &&
!this.cx.isScheduled(terminal.fallthrough)
? terminal.fallthrough
: null;
const alternateId =
terminal.alternate !== terminal.fallthrough
? terminal.alternate
: null;
if (fallthroughId !== null) {
const scheduleId = this.cx.schedule(fallthroughId, "if");
scheduleIds.push(scheduleId);
}
let consequent: TItem | null = null;
if (this.cx.isScheduled(terminal.consequent)) {
consequent = this.visitBreak(terminal.consequent);
} else {
consequent = this.traverseBlock(
this.cx.ir.blocks.get(terminal.consequent)!
);
}
let alternate: TItem | null = null;
if (alternateId !== null) {
if (this.cx.isScheduled(alternateId)) {
alternate = this.visitBreak(alternateId);
} else {
alternate = this.traverseBlock(this.cx.ir.blocks.get(alternateId)!);
}
}
this.cx.unscheduleAll(scheduleIds);
if (fallthroughId !== null) {
this.visitor.appendBlock(
blockValue,
this.visitor.visitTerminal({
kind: "if",
test,
consequent: consequent ?? this.emptyBlock(),
alternate: alternate,
}),
`bb${fallthroughId}` //
);
this.visitBlock(this.cx.ir.blocks.get(fallthroughId)!, blockValue);
} else {
this.visitor.appendBlock(
blockValue,
this.visitor.visitTerminal({
kind: "if",
test,
consequent: consequent ?? this.emptyBlock(),
alternate: alternate,
})
);
}
break;
}
case "switch": {
const test = this.visitPlace(terminal.test);
const fallthroughId =
terminal.fallthrough !== null &&
!this.cx.isScheduled(terminal.fallthrough)
? terminal.fallthrough
: null;
if (fallthroughId !== null) {
const scheduleId = this.cx.schedule(fallthroughId, "switch");
scheduleIds.push(scheduleId);
}
const cases: Array<TCase> = [];
[...terminal.cases].reverse().forEach((case_, index) => {
const test = case_.test !== null ? this.visitPlace(case_.test) : null;
let consequent;
if (this.cx.isScheduled(case_.block)) {
// cases which are empty or contain only a `break` may point to blocks
// that are already scheduled. emit as follows:
// - if the block is for another case branch, don't emit a break and fall-through
// - else, emit an explicit break.
const break_ = this.visitBreak(case_.block);
if (
index === 0 &&
break_ === null &&
case_.block === terminal.fallthrough &&
case_.test === null
) {
// If the last case statement (first in reverse order) is a default that
// jumps to the fallthrough, then we would emit a useless `default: {}`,
// so instead skip this case.
return;
}
const block = this.visitor.enterBlock();
if (break_ !== null) {
this.visitor.appendBlock(block, break_);
}
consequent = this.visitor.leaveBlock(block);
} else {
consequent = this.traverseBlock(
this.cx.ir.blocks.get(case_.block)!
);
const scheduleId = this.cx.schedule(case_.block, "case");
scheduleIds.push(scheduleId);
}
cases.push(this.visitor.visitCase(test, consequent));
});
cases.reverse();
this.cx.unscheduleAll(scheduleIds);
if (fallthroughId !== null) {
this.visitor.appendBlock(
blockValue,
this.visitor.visitTerminal({ kind: "switch", test, cases }),
`bb${fallthroughId}`
);
this.visitBlock(this.cx.ir.blocks.get(fallthroughId)!, blockValue);
} else {
this.visitor.appendBlock(
blockValue,
this.visitor.visitTerminal({ kind: "switch", test, cases })
);
}
break;
}
case "while": {
const testBlock = this.cx.ir.blocks.get(terminal.test)!;
const testTerminal = testBlock.terminal;
invariant(
testTerminal.kind === "if",
"Expected while loop test block to end in an if"
);
// const bodyLength = blockValue.length;
for (const instr of testBlock.instructions) {
this.visitInstr(instr, blockValue);
}
// invariant(
// body.length === bodyLength,
// "Expected test to produce only temporaries"
// );
const testValue = this.visitPlace(testTerminal.test);
invariant(
testValue != null,
"Expected test to produce a temporary value"
);
const fallthroughId =
terminal.fallthrough !== null &&
!this.cx.isScheduled(terminal.fallthrough)
? terminal.fallthrough
: null;
const loopId =
!this.cx.isScheduled(terminal.loop) &&
terminal.loop !== terminal.fallthrough
? terminal.loop
: null;
const scheduleId = this.cx.scheduleLoop(
terminal.fallthrough,
terminal.test,
terminal.loop
);
scheduleIds.push(scheduleId);
let loopBody: TItem;
if (loopId) {
loopBody = this.traverseBlock(this.cx.ir.blocks.get(loopId)!);
} else {
const break_ = this.visitBreak(terminal.loop);
invariant(
break_ !== null,
"If loop body is already scheduled it must be a break"
);
const body = this.visitor.enterBlock();
this.visitor.appendBlock(body, break_);
loopBody = this.visitor.leaveBlock(body);
}
this.cx.unscheduleAll(scheduleIds);
if (fallthroughId !== null) {
this.visitor.appendBlock(
blockValue,
this.visitor.visitTerminal({
kind: "while",
test: testValue,
loop: loopBody,
}),
`bb${fallthroughId}`
);
this.visitBlock(this.cx.ir.blocks.get(fallthroughId)!, blockValue);
} else {
this.visitor.appendBlock(
blockValue,
this.visitor.visitTerminal({
kind: "while",
test: testValue,
loop: loopBody,
})
);
}
break;
}
case "goto": {
switch (terminal.variant) {
case GotoVariant.Break: {
const break_ = this.visitBreak(terminal.block);
if (break_ !== null) {
this.visitor.appendBlock(blockValue, break_);
}
break;
}
case GotoVariant.Continue: {
const continue_ = this.visitContinue(terminal.block);
if (continue_ !== null) {
this.visitor.appendBlock(blockValue, continue_);
}
break;
}
default: {
assertExhaustive(
terminal.variant,
`Unexpected goto variant '${terminal.variant}'`
);
}
}
break;
}
default: {
assertExhaustive(terminal, "Unexpected terminal");
}
}
}
emptyBlock(): TItem {
const block = this.visitor.enterBlock();
return this.visitor.leaveBlock(block);
}
visitBreak(block: BlockId): TItem | null {
const target = this.cx.getBreakTarget(block);
if (target === null) {
// TODO: we should always have a target
return null;
}
switch (target.type) {
case "implicit": {
return this.visitor.visitImplicitTerminal();
}
case "unlabeled": {
return this.visitor.visitTerminal({ kind: "break", label: null });
}
case "labeled": {
return this.visitor.visitTerminal({
kind: "break",
label: `bb${target.block}`,
});
}
}
}
visitContinue(block: BlockId): TItem | null {
const target = this.cx.getContinueTarget(block);
invariant(
target !== null,
`Expected continue target to be scheduled for bb${block}`
);
switch (target.type) {
case "labeled": {
return this.visitor.visitTerminal({
kind: "continue",
label: `bb${target.block}`,
});
}
case "unlabeled": {
return this.visitor.visitTerminal({ kind: "continue", label: null });
}
case "implicit": {
return this.visitor.visitImplicitTerminal();
}
default: {
assertExhaustive(
target.type,
`Unexpected continue target kind '${(target as any).type}'`
);
}
}
}
visitInstr(instr: Instruction, blockValue: TBlock): void {
const value = this.visitor.visitValue(instr.value);
const item = this.visitor.visitInstruction(instr, value);
this.visitor.appendBlock(blockValue, item);
}
visitPlace(place: Place): TValue {
return this.visitor.visitValue(place);
}
}
class Context {
ir: HIR;
#nextScheduleId: number = 0;
/**
* Used to track which blocks *have been* generated already in order to
* abort if a block is generated a second time. This is an error catching
* mechanism for debugging purposes, and is not used by the codegen algorithm
* to drive decisions about how to emit blocks.
*/
emitted: Set<BlockId> = new Set();
/**
* A set of blocks that are already scheduled to be emitted by eg a parent.
* This allows child nodes to avoid re-emitting the same block and emit eg
* a break instead.
*/
#scheduled: Set<BlockId> = new Set();
/**
* Represents which control flow operations are currently in scope, with the innermost
* scope last. Roughly speaking, the last ControlFlowTarget on the stack indicates where
* control will implicitly transfer, such that gotos to that block can be elided. Gotos
* targeting items higher up the stack may need labeled break or continue; see
* getBreakTarget() and getContinueTarget() for more details.
*/
#controlFlowStack: Array<ControlFlowTarget> = [];
constructor(ir: HIR) {
this.ir = ir;
}
block(id: BlockId): BasicBlock {
return this.ir.blocks.get(id)!;
}
/**
* Record that the given block will be emitted (eg by the codegen of a parent node)
* so that child nodes can avoid re-emitting it.
*/
schedule(block: BlockId, type: "if" | "switch" | "case"): number {
const id = this.#nextScheduleId++;
invariant(
!this.#scheduled.has(block),
`Break block is already scheduled: bb${block}`
);
this.#scheduled.add(block);
this.#controlFlowStack.push({ block, id, type });
return id;
}
scheduleLoop(
fallthroughBlock: BlockId,
continueBlock: BlockId,
loopBlock: BlockId | null
): number {
const id = this.#nextScheduleId++;
const ownsBlock = !this.#scheduled.has(fallthroughBlock);
this.#scheduled.add(fallthroughBlock);
invariant(
!this.#scheduled.has(continueBlock),
`Continue block is already scheduled: bb${continueBlock}`
);
this.#scheduled.add(continueBlock);
let ownsLoop = false;
if (loopBlock !== null) {
ownsLoop = !this.#scheduled.has(loopBlock);
this.#scheduled.add(loopBlock);
}
this.#controlFlowStack.push({
block: fallthroughBlock,
ownsBlock,
id,
type: "loop",
continueBlock,
loopBlock,
ownsLoop,
});
return id;
}
/**
* Removes a block that was scheduled; must be called after that block is emitted.
*/
unschedule(scheduleId: number): void {
const last = this.#controlFlowStack.pop();
invariant(
last !== undefined && last.id === scheduleId,
"Can only unschedule the last target"
);
if (last.type !== "loop" || last.ownsBlock !== null) {
this.#scheduled.delete(last.block);
}
if (last.type === "loop") {
this.#scheduled.delete(last.continueBlock);
if (last.ownsLoop && last.loopBlock !== null) {
this.#scheduled.delete(last.loopBlock);
}
}
}
/**
* Helper to unschedule multiple scheduled blocks. The ids should be in
* the order in which they were scheduled, ie most recently scheduled last.
*/
unscheduleAll(scheduleIds: Array<number>): void {
for (let i = scheduleIds.length - 1; i >= 0; i--) {
this.unschedule(scheduleIds[i]!);
}
}
/**
* Check if the given @param block is scheduled or not.
*/
isScheduled(block: BlockId): boolean {
return this.#scheduled.has(block);
}
/**
* Given the current control flow stack, determines how a `break` to the given @param block
* must be emitted. Returns as follows:
* - 'implicit' if control would implicitly transfer to that block
* - 'labeled' if a labeled break is required to transfer control to that block
* - 'unlabeled' if an unlabeled break would transfer to that block
* - null if there is no information for this block
*
* The returned 'block' value should be used as the label if necessary.
*/
getBreakTarget(
block: BlockId
): { block: BlockId; type: ControlFlowKind } | null {
let hasPrecedingLoop = false;
for (let i = this.#controlFlowStack.length - 1; i >= 0; i--) {
const target = this.#controlFlowStack[i]!;
if (target.block === block) {
let type: ControlFlowKind;
if (target.type === "loop") {
// breaking out of a loop requires an explicit break,
// but only requires a label if breaking past the innermost loop.
type = hasPrecedingLoop ? "labeled" : "unlabeled";
} else if (i === this.#controlFlowStack.length - 1) {
// breaking to the last break point, which is where control will transfer
// implicitly
type = "implicit";
} else {
// breaking somewhere else requires an explicit break
type = "labeled";
}
return {
block: target.block,
type,
};
}
hasPrecedingLoop ||= target.type === "loop";
}
return null;
}
/**
* Given the current control flow stack, determines how a `continue` to the given @param block
* must be emitted. Returns as follows:
* - 'implicit' if control would implicitly continue to that block
* - 'labeled' if a labeled continue is required to continue to that block
* - 'unlabeled' if an unlabeled continue would transfer to that block
* - null if there is no information for this block
*
* The returned 'block' value should be used as the label if necessary.
*/
getContinueTarget(
block: BlockId
): { block: BlockId; type: ControlFlowKind } | null {
let hasPrecedingLoop = false;
for (let i = this.#controlFlowStack.length - 1; i >= 0; i--) {
const target = this.#controlFlowStack[i]!;
if (target.type == "loop" && target.continueBlock === block) {
let type: ControlFlowKind;
if (hasPrecedingLoop) {
// continuing to a loop that is not the innermost loop always requires
// a label
type = "labeled";
} else if (i === this.#controlFlowStack.length - 1) {
// continuing to the last break point, which is where control will
// transfer to naturally
type = "implicit";
} else {
// the continue is inside some conditional logic, requires an explicit
// continue
type = "unlabeled";
}
return {
block: target.block,
type,
};
}
hasPrecedingLoop ||= target.type === "loop";
}
return null;
}
debugBreakTargets(): Array<ControlFlowTarget> {
return this.#controlFlowStack.map((target) => ({ ...target }));
}
}
type ControlFlowKind = "implicit" | "labeled" | "unlabeled";
type ControlFlowTarget =
| { type: "if"; block: BlockId; id: number }
| { type: "switch"; block: BlockId; id: number }
| { type: "case"; block: BlockId; id: number }
| {
type: "loop";
block: BlockId;
ownsBlock: boolean;
continueBlock: BlockId;
loopBlock: BlockId | null;
ownsLoop: boolean;
id: number;
};
/**
* An object that can receive structured callbacks to visit HIR as a tree,
* and convert it to an alternate format.
*
* TBlock = representation of a list of statements
* TValue = represenation of an InstructionValue
* TItem = representation of an Instruction
* TCase = representation of a switch case
*/
export interface Visitor<TBlock, TValue, TItem, TCase> {
/**
* Must create an "empty" instance of the visitor's represenation for
* the contents of a block.
*/
enterBlock(): TBlock;
/**
* Convert an InstructionValue into the visitor's own representation
* of a value.
*/
visitValue(value: InstructionValue): TValue;
/**
* Convert an Instruction into the visitor's own representation of
* a block item.
*/
visitInstruction(instruction: Instruction, value: TValue): TItem;
/**
* Converts a break/continue that is implicit — that does not strictly
* have to be emitted — to the visitor's representation. The visitor
* can choose to return null if this does not need to be represented.
*/
visitImplicitTerminal(): TItem | null;
/**
* Converts a terminal into the visitor's own representation of a block
* item. Note that the terminal differs from HIR Terminals, because
* values and block ids will have already been converted into the visitor's
* own representations.
*/
visitTerminal(terminal: BlockTerminal<TBlock, TValue, TItem, TCase>): TItem;
/**
* Visits a switch case statement, which is collected into a switch terminal
* variant.
*/
visitCase(test: TValue | null, block: TItem): TCase;
/**
* Appends an item onto the given block, with an optional label. The label
* indicates that a break/continue will proceed to code *after* the given item.
*/
appendBlock(block: TBlock, item: TItem, label?: string): void;
/**
* Converts the visitor's block representation into the representation of a
* block item, simultaneously "closing" the given block. The block will no
* longer be modified by the visitor driver.
*/
leaveBlock(block: TBlock): TItem;
}
export type BlockTerminal<TBlock, TValue, TItem, TCase> =
| { kind: "return"; value: TValue | null }
| { kind: "throw"; value: TValue }
| {
kind: "if";
test: TValue;
consequent: TItem;
alternate: TItem | null;
}
| { kind: "switch"; test: TValue; cases: Array<TCase> }
| { kind: "while"; test: TValue; loop: TItem }
| { kind: "break"; label: string | null }
| { kind: "continue"; label: string | null };
+127
View File
@@ -0,0 +1,127 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import { assertExhaustive } from "../Common/utils";
import { HIRFunction, Instruction, InstructionValue } from "./HIR";
import { BlockTerminal, Visitor, visitTree } from "./HIRTreeVisitor";
import { printLValue, printMixedHIR } from "./PrintHIR";
/**
* Returns a text description of the HIR that has the overall tree shape
* of the original AST, but with the contents of each block printed
* similarly to printHIR's instruction formatting.
*/
export function printHIRTree(fn: HIRFunction): string {
return visitTree(fn, new PrintVisitor());
}
class PrintVisitor implements Visitor<Array<string>, string, string, string> {
depth: number = 0; // for indentation
enterBlock(): string[] {
this.depth++;
return [];
}
visitValue(value: InstructionValue): string {
return printMixedHIR(value);
}
visitInstruction(instr: Instruction, value: string): string {
if (instr.lvalue !== null) {
return `[${instr.id}] ${printLValue(instr.lvalue)} = ${value}`;
} else {
return `[${instr.id}] ${value}`;
}
}
visitImplicitTerminal(): string | null {
return null;
}
visitTerminal(
terminal: BlockTerminal<string[], string, string, string>
): string {
let value: string;
switch (terminal.kind) {
case "break": {
if (terminal.label !== null) {
value = `Break ${terminal.label}`;
} else {
value = "Break";
}
break;
}
case "continue": {
if (terminal.label !== null) {
value = `Continue ${terminal.label}`;
} else {
value = "Continue";
}
break;
}
case "if": {
if (terminal.alternate !== null) {
value = `If (${
terminal.test
}) ${terminal.consequent.trimStart()} else ${terminal.alternate}`;
} else {
value = `If (${terminal.test}) ${terminal.consequent.trimStart()}`;
}
break;
}
case "switch": {
const prefix = " ".repeat(this.depth);
value = `Switch (${terminal.test}) {\n${terminal.cases
.flatMap((case_) => case_.split("\n").map((line) => ` ${line}`))
.join("\n")}\n${prefix}}`;
break;
}
case "while": {
value = `While (${terminal.test}) ${terminal.loop.trimStart()}`;
break;
}
case "return": {
if (terminal.value !== null) {
value = `Return ${terminal.value}`;
} else {
value = "Return";
}
break;
}
case "throw": {
value = `Throw ${terminal.value}`;
break;
}
default: {
assertExhaustive(
terminal,
`Unexpected terminal kind '${(terminal as any).kind}'`
);
}
}
return value;
}
visitCase(test: string | null, block: string): string {
const prefix = " ".repeat(this.depth);
if (test === null) {
return `${prefix}default: ${block.trimStart()}`;
} else {
return `${prefix}case ${test}: ${block.trimStart()}`;
}
}
appendBlock(block: string[], item: string, label?: string | undefined): void {
const prefix = " ".repeat(this.depth);
if (item !== "") {
block.push(`${prefix}${item.trimStart()}`);
}
if (label !== undefined) {
block.push(`${prefix}${label}:`);
}
}
leaveBlock(block: string[]): string {
this.depth--;
const prefix = " ".repeat(this.depth);
return `${prefix}{\n${block.join("\n")}\n${prefix}}`;
}
}
@@ -234,10 +234,18 @@ function Component$0(props$1) {
bb4: if (cond$8(a$2)) break;
}
a$2;
b$3;
c$4;
d$5;
bb7: if (a$2) {
}
bb9: if (b$3) {
}
bb11: if (c$4) {
}
bb13: if (d$5) {
}
mutate$7(d$5, null);
return;
}
@@ -158,11 +158,21 @@ function Component$0(props$1) {
const x$6 = {};
x$6.b = b$3;
const y$7 = mutate$8(x$6, d$5);
a$2;
b$3;
c$4;
d$5;
y$7;
bb1: if (a$2) {
}
bb3: if (b$3) {
}
bb5: if (c$4) {
}
bb7: if (d$5) {
}
bb9: if (y$7) {
}
mutate$8(x$6, null);
return;
}
@@ -214,10 +214,18 @@ function Component$0(props$1) {
bb4: if (cond$7(a$2)) break;
}
a$2;
b$3;
c$4;
d$5;
bb7: if (a$2) {
}
bb9: if (b$3) {
}
bb11: if (c$4) {
}
bb13: if (d$5) {
}
mutate$6(d$5, null);
return;
}
@@ -72,6 +72,7 @@ describe("React Forget (HIR version)", () => {
);
const textHIR = printHIR(ir.body);
// const textHIR = visitTree(ir, new PrintVisitor());
const visualization = visualizeHIRMermaid(ir);
const text = prettier.format(
generate(ast).code.replace("\n\n", "\n"),