From a67fefdebd12be50f61339f57cdf2a39a05237fc Mon Sep 17 00:00:00 2001 From: Joe Savona Date: Thu, 3 Aug 2023 12:41:50 -0700 Subject: [PATCH] [rust] Data structures for semantic analysis This is two things: * A toy semantic analysis that handles a tiny subset of JS, including labeled statements, labeled break/continue, and variable declaration/reference/reassignment. This only exists as a way to prove out the API for the more important bit: * More importantly, this defines a data model for the semantic analysis results and an API for building up the semantic analysis. Subsequent diffs will replace the first bit (toy analysis impl), while keeping the second part. --- compiler/forget/Cargo.lock | 5 + compiler/forget/Cargo.toml | 1 + .../forget/crates/forget_estree/src/visit.rs | 92 ++- .../crates/forget_estree_swc/src/lib.rs | 28 +- .../forget/crates/forget_fixtures/Cargo.toml | 2 +- .../forget_semantic_analysis/Cargo.toml | 9 +- .../forget_semantic_analysis/src/analyze.rs | 500 ++++++++++++ .../forget_semantic_analysis/src/lib.rs | 53 +- .../tests/analysis_test.rs | 19 + .../tests/fixtures/labels.js | 13 + .../snapshots/analysis_test__fixtures.snap | 715 ++++++++++++++++++ 11 files changed, 1350 insertions(+), 87 deletions(-) create mode 100644 compiler/forget/crates/forget_semantic_analysis/src/analyze.rs create mode 100644 compiler/forget/crates/forget_semantic_analysis/tests/analysis_test.rs create mode 100644 compiler/forget/crates/forget_semantic_analysis/tests/fixtures/labels.js create mode 100644 compiler/forget/crates/forget_semantic_analysis/tests/snapshots/analysis_test__fixtures.snap diff --git a/compiler/forget/Cargo.lock b/compiler/forget/Cargo.lock index 1de628bd53..572e7b2e9f 100644 --- a/compiler/forget/Cargo.lock +++ b/compiler/forget/Cargo.lock @@ -566,8 +566,13 @@ dependencies = [ name = "forget_semantic_analysis" version = "0.1.0" dependencies = [ + "forget_diagnostics", "forget_estree", + "forget_hermes_parser", "forget_utils", + "indexmap 2.0.0", + "insta", + "serde_json", ] [[package]] diff --git a/compiler/forget/Cargo.toml b/compiler/forget/Cargo.toml index 8356dc632f..b19f2bab16 100644 --- a/compiler/forget/Cargo.toml +++ b/compiler/forget/Cargo.toml @@ -22,6 +22,7 @@ forget_fixtures = { path = "crates/forget_fixtures" } forget_hermes_parser = { path = "crates/forget_hermes_parser" } forget_hir = { path = "crates/forget_hir" } forget_optimization = { path = "crates/forget_optimization" } +forget_semantic_analysis = { path = "crates/forget_semantic_analysis" } forget_ssa = { path = "crates/forget_ssa" } forget_swc_demo = { path = "crates/forget_swc_demo" } forget_utils = { path = "crates/forget_utils" } diff --git a/compiler/forget/crates/forget_estree/src/visit.rs b/compiler/forget/crates/forget_estree/src/visit.rs index d7ec063a21..e99d9fe910 100644 --- a/compiler/forget/crates/forget_estree/src/visit.rs +++ b/compiler/forget/crates/forget_estree/src/visit.rs @@ -2,10 +2,10 @@ use crate::{ AssignmentPropertyOrRestElement, AssignmentTarget, Class, ClassItem, ClassPrivateProperty, ClassProperty, Declaration, DeclarationOrExpression, ExportAllDeclaration, ExportDefaultDeclaration, ExportNamedDeclaration, Expression, ExpressionOrPrivateIdentifier, - ExpressionOrSpread, ExpressionOrSuper, ForInInit, ForInit, Function, FunctionBody, Identifier, - ImportDeclaration, ImportDeclarationSpecifier, ImportOrExportDeclaration, Literal, - MethodDefinition, ModuleItem, Pattern, PrivateIdentifier, PrivateName, Program, Statement, - StaticBlock, Super, SwitchCase, VariableDeclarator, _Literal, + ExpressionOrSpread, ExpressionOrSuper, ForInInit, ForInit, Function, FunctionBody, + FunctionDeclaration, Identifier, ImportDeclaration, ImportDeclarationSpecifier, + ImportOrExportDeclaration, Literal, MethodDefinition, ModuleItem, Pattern, PrivateIdentifier, + PrivateName, Program, Statement, StaticBlock, Super, SwitchCase, VariableDeclarator, _Literal, }; /// Trait for visiting an estree @@ -17,7 +17,18 @@ pub trait Visitor<'ast> { f(self); } + fn visit_rvalue(&mut self, f: F) + where + F: FnOnce(&mut Self) -> (), + { + f(self); + } + fn visit_program(&mut self, program: &'ast Program) { + self.default_visit_program(program) + } + + fn default_visit_program(&mut self, program: &'ast Program) { for item in &program.body { self.visit_module_item(item); } @@ -73,7 +84,7 @@ pub trait Visitor<'ast> { fn visit_import_declaration(&mut self, declaration: &'ast ImportDeclaration) { self.visit_lvalue(|visitor| { for specifier in &declaration.specifiers { - visitor.visit_import_declaration_specifier(specifier, &declaration.source) + visitor.visit_import_declaration_specifier(specifier); } }); self.visit_import_source(&declaration.source); @@ -101,11 +112,7 @@ pub trait Visitor<'ast> { } } - fn visit_import_declaration_specifier( - &mut self, - specifier: &'ast ImportDeclarationSpecifier, - _source: &'ast _Literal, - ) { + fn visit_import_declaration_specifier(&mut self, specifier: &'ast ImportDeclarationSpecifier) { match specifier { ImportDeclarationSpecifier::ImportSpecifier(specifier) => { self.visit_identifier(&specifier.local); @@ -129,7 +136,7 @@ pub trait Visitor<'ast> { self.visit_class(&declaration.class); } Declaration::FunctionDeclaration(declaration) => { - self.visit_function(&declaration.function); + self.visit_function_declaration(declaration); } Declaration::VariableDeclaration(declaration) => { for declarator in &declaration.declarations { @@ -142,6 +149,10 @@ pub trait Visitor<'ast> { } } + fn visit_function_declaration(&mut self, declaration: &'ast FunctionDeclaration) { + self.visit_function(&declaration.function); + } + fn visit_statement(&mut self, stmt: &'ast Statement) { self.default_visit_statement(stmt); } @@ -198,7 +209,7 @@ pub trait Visitor<'ast> { self.visit_statement(&stmt.body); } Statement::FunctionDeclaration(stmt) => { - self.visit_function(&stmt.function); + self.visit_function_declaration(stmt); } Statement::IfStatement(stmt) => { self.visit_expression(&stmt.test); @@ -270,32 +281,28 @@ pub trait Visitor<'ast> { } for item in &class.body.body { match item { - ClassItem::MethodDefinition(item) => self.visit_method_definition(class, item), + ClassItem::MethodDefinition(item) => self.visit_method_definition(item), ClassItem::ClassProperty(item) => { - self.visit_class_property(class, item); + self.visit_class_property(item); } ClassItem::ClassPrivateProperty(item) => { - self.visit_class_private_property(class, item); + self.visit_class_private_property(item); } ClassItem::StaticBlock(item) => { - self.visit_static_block(class, item); + self.visit_static_block(item); } } } } - fn visit_class_property(&mut self, _class: &'ast Class, property: &'ast ClassProperty) { + fn visit_class_property(&mut self, property: &'ast ClassProperty) { self.visit_expression(&property.key); if let Some(value) = &property.value { self.visit_expression(value) } } - fn visit_class_private_property( - &mut self, - _class: &'ast Class, - property: &'ast ClassPrivateProperty, - ) { + fn visit_class_private_property(&mut self, property: &'ast ClassPrivateProperty) { match &property.key { ExpressionOrPrivateIdentifier::Expression(key) => self.visit_expression(key), ExpressionOrPrivateIdentifier::PrivateIdentifier(key) => { @@ -308,21 +315,17 @@ pub trait Visitor<'ast> { } } - fn visit_static_block(&mut self, _class: &'ast Class, property: &'ast StaticBlock) { + fn visit_static_block(&mut self, property: &'ast StaticBlock) { for stmt in &property.body { self.visit_statement(stmt) } } - fn visit_method_definition(&mut self, class: &'ast Class, method: &'ast MethodDefinition) { - self.default_visit_method_definition(class, method); + fn visit_method_definition(&mut self, method: &'ast MethodDefinition) { + self.default_visit_method_definition(method); } - fn default_visit_method_definition( - &mut self, - _class: &'ast Class, - method: &'ast MethodDefinition, - ) { + fn default_visit_method_definition(&mut self, method: &'ast MethodDefinition) { self.visit_expression(&method.key); self.visit_function(&method.value.function); } @@ -386,8 +389,10 @@ pub trait Visitor<'ast> { } Pattern::RestElement(pattern) => self.visit_pattern(&pattern.argument), Pattern::AssignmentPattern(pattern) => { - self.visit_expression(&pattern.right); self.visit_pattern(&pattern.left); + self.visit_rvalue(|visitor| { + visitor.visit_expression(&pattern.right); + }); } } } @@ -460,8 +465,31 @@ pub trait Visitor<'ast> { } } } + Expression::CallExpression(expr) => { + match &expr.callee { + ExpressionOrSuper::Expression(callee) => self.visit_expression(callee), + ExpressionOrSuper::Super(callee) => self.visit_super(callee), + } + for arg in &expr.arguments { + match arg { + ExpressionOrSpread::Expression(arg) => self.visit_expression(arg), + ExpressionOrSpread::SpreadElement(arg) => { + self.visit_expression(&arg.argument) + } + } + } + } + Expression::UpdateExpression(expr) => { + self.visit_expression(&expr.argument); + } + Expression::BooleanLiteral(_) + | Expression::NullLiteral(_) + | Expression::StringLiteral(_) + | Expression::NumericLiteral(_) => { + // no-op + } _ => { - todo!("more expression types") + todo!("{:#?}", expr) } } } diff --git a/compiler/forget/crates/forget_estree_swc/src/lib.rs b/compiler/forget/crates/forget_estree_swc/src/lib.rs index 43d5b23307..ab0fa810c3 100644 --- a/compiler/forget/crates/forget_estree_swc/src/lib.rs +++ b/compiler/forget/crates/forget_estree_swc/src/lib.rs @@ -11,8 +11,8 @@ use swc_core::ecma::ast::{ AssignOp, BinaryOp, BlockStmt, BlockStmtOrExpr, Callee, Decl, EsVersion, Expr, ExprOrSpread, Function, Ident, JSXAttr, JSXAttrName, JSXAttrOrSpread, JSXAttrValue, JSXElement, JSXElementChild, JSXElementName, JSXExpr, JSXMemberExpr, JSXObject, Lit, MemberExpr, - MemberProp, ModuleItem, OptChainBase, Pat, PatOrExpr, Program, Stmt, UnaryOp, VarDecl, - VarDeclKind, VarDeclOrExpr, + MemberProp, ModuleItem, OptChainBase, Pat, PatOrExpr, Program, Stmt, UnaryOp, UpdateOp, + VarDecl, VarDeclKind, VarDeclOrExpr, }; use swc_core::ecma::parser::{Syntax, TsConfig}; use swc_core::ecma::transforms::base::resolver; @@ -271,6 +271,14 @@ fn convert_statement(cx: &Context, stmt: &Stmt) -> forget_estree::Statement { range: convert_span(&item.span), })) } + Stmt::Labeled(item) => { + forget_estree::Statement::LabeledStatement(Box::new(forget_estree::LabeledStatement { + label: convert_identifier(cx, &item.label), + body: convert_statement(cx, &item.body), + loc: None, + range: convert_span(&item.span), + })) + } _ => todo!("translate statement {:#?}", stmt), } } @@ -461,6 +469,15 @@ fn convert_expression(cx: &Context, expr: &Expr) -> forget_estree::Expression { forget_estree::Expression::JSXElement(Box::new(convert_jsx_element(cx, expr))) } Expr::Paren(expr) => convert_expression(cx, &expr.expr), + Expr::Update(expr) => { + forget_estree::Expression::UpdateExpression(Box::new(forget_estree::UpdateExpression { + operator: convert_update_operator(expr.op), + argument: convert_expression(cx, &expr.arg), + prefix: expr.prefix, + loc: None, + range: convert_span(&expr.span), + })) + } _ => todo!("translate expression {:#?}", expr), } } @@ -885,6 +902,13 @@ fn convert_binary_operator(op: BinaryOp) -> Operator { } } +fn convert_update_operator(op: UpdateOp) -> forget_estree::UpdateOperator { + match op { + UpdateOp::MinusMinus => forget_estree::UpdateOperator::Decrement, + UpdateOp::PlusPlus => forget_estree::UpdateOperator::Increment, + } +} + fn convert_pattern(cx: &Context, pat: &Pat) -> forget_estree::Pattern { match pat { Pat::Ident(pat) => { diff --git a/compiler/forget/crates/forget_fixtures/Cargo.toml b/compiler/forget/crates/forget_fixtures/Cargo.toml index 4bab804f10..9d0435ee9b 100644 --- a/compiler/forget/crates/forget_fixtures/Cargo.toml +++ b/compiler/forget/crates/forget_fixtures/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[dependencies] +[dev-dependencies] insta = { workspace = true } forget_estree = { workspace = true } forget_estree_swc = { workspace = true } diff --git a/compiler/forget/crates/forget_semantic_analysis/Cargo.toml b/compiler/forget/crates/forget_semantic_analysis/Cargo.toml index 726b9a18e3..61fc7e92b1 100644 --- a/compiler/forget/crates/forget_semantic_analysis/Cargo.toml +++ b/compiler/forget/crates/forget_semantic_analysis/Cargo.toml @@ -12,5 +12,12 @@ repository.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +forget_diagnostics = { workspace = true } forget_estree = { workspace = true } -forget_utils = { workspace = true } \ No newline at end of file +forget_utils = { workspace = true } +indexmap = { workspace = true } + +[dev-dependencies] +forget_hermes_parser = { workspace = true } +insta = { workspace = true } +serde_json = { workspace = true } \ No newline at end of file diff --git a/compiler/forget/crates/forget_semantic_analysis/src/analyze.rs b/compiler/forget/crates/forget_semantic_analysis/src/analyze.rs new file mode 100644 index 0000000000..80305d1e5e --- /dev/null +++ b/compiler/forget/crates/forget_semantic_analysis/src/analyze.rs @@ -0,0 +1,500 @@ +use forget_diagnostics::Diagnostic; +use forget_estree::{ + BreakStatement, ContinueStatement, ESTreeNode, Identifier, LabeledStatement, Program, + Statement, Visitor, +}; +use forget_utils::PointerAddress; +use indexmap::IndexMap; + +pub fn analyze(ast: &Program) -> SemanticAnalysis { + let mut analyzer = Analyzer::new(); + analyzer.visit_program(ast); + analyzer.results +} + +pub struct SemanticAnalysis { + root: ScopeId, + + // Storage of the semantic information + scopes: Vec, + labels: Vec