diff --git a/.rubocop.yml b/.rubocop.yml index 6c9be677..daf5a824 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -16,6 +16,15 @@ Layout/LineLength: Lint/AmbiguousBlockAssociation: Enabled: false +Lint/AmbiguousOperatorPrecedence: + Enabled: false + +Lint/AmbiguousRange: + Enabled: false + +Lint/BooleanSymbol: + Enabled: false + Lint/DuplicateBranch: Enabled: false @@ -28,9 +37,18 @@ Lint/InterpolationCheck: Lint/MissingSuper: Enabled: false +Lint/NonLocalExitFromIterator: + Enabled: false + Lint/RedundantRequireStatement: Enabled: false +Lint/SuppressedException: + Enabled: false + +Lint/UnderscorePrefixedVariableName: + Enabled: false + Lint/UnusedMethodArgument: AllowUnusedKeywordArguments: true @@ -46,15 +64,42 @@ Naming/MethodParameterName: Naming/RescuedExceptionsVariableName: PreferredName: error +Naming/VariableNumber: + Enabled: false + +Security/Eval: + Enabled: false + +Style/AccessorGrouping: + Enabled: false + Style/CaseEquality: Enabled: false +Style/CaseLikeIf: + Enabled: false + +Style/ClassVars: + Enabled: false + +Style/DocumentDynamicEvalDefinition: + Enabled: false + +Style/Documentation: + Enabled: false + +Style/EndBlock: + Enabled: false + Style/ExplicitBlockArgument: Enabled: false Style/FormatString: Enabled: false +Style/FormatStringToken: + Enabled: false + Style/GuardClause: Enabled: false @@ -79,6 +124,9 @@ Style/MutableConstant: Style/NegatedIfElseCondition: Enabled: false +Style/Next: + Enabled: false + Style/NumericPredicate: Enabled: false @@ -88,6 +136,9 @@ Style/ParallelAssignment: Style/PerlBackrefs: Enabled: false +Style/SafeNavigation: + Enabled: false + Style/SpecialGlobalVars: Enabled: false diff --git a/CHANGELOG.md b/CHANGELOG.md index e320cd82..557fdf5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +## [5.1.0] - 2022-12-28 + +### Added + +- An experiment in working with instruction sequences has been added to Syntax Tree. This is subject to change, so it is not well documented or tested at the moment. It does not impact other functionality. +- You can now format at a different base layer of indentation. This is an optional third argument to `SyntaxTree::format`. + +### Changed + +- Support forwarding anonymous keyword arguments with `**`. +- The `BodyStmt` node now has a more correct location information. +- Ignore the `textDocument/documentColor` request coming into the language server to support clients that require that request be received. +- Do not attempt to convert `if..else` into ternaries if the predicate has a `Binary` node. +- Properly handle nested pattern matching when a rightward assignment is inside a `when` clause. + ## [5.0.1] - 2022-11-10 ### Changed @@ -456,7 +471,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - 🎉 Initial release! 🎉 -[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...HEAD +[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.1.0...HEAD +[5.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...v5.1.0 [5.0.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.0...v5.0.1 [5.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.3.0...v5.0.0 [4.3.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.2.0...v4.3.0 diff --git a/Gemfile.lock b/Gemfile.lock index ffbdc5d1..47d0c66b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,25 +1,25 @@ PATH remote: . specs: - syntax_tree (5.0.1) - prettier_print (>= 1.1.0) + syntax_tree (5.1.0) + prettier_print (>= 1.2.0) GEM remote: https://rubygems.org/ specs: ast (2.4.2) docile (1.4.0) - json (2.6.2) + json (2.6.3) minitest (5.16.3) parallel (1.22.1) - parser (3.1.2.1) + parser (3.1.3.0) ast (~> 2.4.1) - prettier_print (1.1.0) + prettier_print (1.2.0) rainbow (3.1.1) rake (13.0.6) - regexp_parser (2.6.0) + regexp_parser (2.6.1) rexml (3.2.5) - rubocop (1.38.0) + rubocop (1.41.1) json (~> 2.3) parallel (~> 1.10) parser (>= 3.1.2.1) @@ -29,10 +29,10 @@ GEM rubocop-ast (>= 1.23.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 1.4.0, < 3.0) - rubocop-ast (1.23.0) + rubocop-ast (1.24.0) parser (>= 3.1.1.0) ruby-progressbar (1.11.0) - simplecov (0.21.2) + simplecov (0.22.0) docile (~> 1.1) simplecov-html (~> 0.11) simplecov_json_formatter (~> 0.1) diff --git a/README.md b/README.md index 0f1b626a..7a943ca8 100644 --- a/README.md +++ b/README.md @@ -324,11 +324,11 @@ stree write "**/{[!schema]*,*}.rb" ## Library -Syntax Tree can be used as a library to access the syntax tree underlying Ruby source code. +Syntax Tree can be used as a library to access the syntax tree underlying Ruby source code. The API is described below. For the full library documentation, see the [RDoc documentation](https://ruby-syntax-tree.github.io/syntax_tree/). ### SyntaxTree.read(filepath) -This function takes a filepath and returns a string associated with the content of that file. It is similar in functionality to `File.read`, except htat it takes into account Ruby-level file encoding (through magic comments at the top of the file). +This function takes a filepath and returns a string associated with the content of that file. It is similar in functionality to `File.read`, except that it takes into account Ruby-level file encoding (through magic comments at the top of the file). ### SyntaxTree.parse(source) @@ -570,7 +570,7 @@ SyntaxTree::Formatter.format(source, program.accept(visitor)) ### WithEnvironment The `WithEnvironment` module can be included in visitors to automatically keep track of local variables and arguments -defined inside each environment. A `current_environment` accessor is made availble to the request, allowing it to find +defined inside each environment. A `current_environment` accessor is made available to the request, allowing it to find all usages and definitions of a local. ```ruby @@ -611,7 +611,7 @@ The language server also responds to the relatively new inlay hints request. Thi 1 + 2 * 3 ``` -Implicity, the `2 * 3` is going to be executed first because the `*` operator has higher precedence than the `+` operator. To ease mental overhead, our language server includes small parentheses to make this explicit, as in: +Implicitly, the `2 * 3` is going to be executed first because the `*` operator has higher precedence than the `+` operator. To ease mental overhead, our language server includes small parentheses to make this explicit, as in: ```ruby 1 + ₍2 * 3₎ @@ -686,7 +686,7 @@ Below are listed all of the "official" language plugins hosted under the same Gi ## Integration -Syntax Tree's goal is to seemlessly integrate into your workflow. To this end, it provides a couple of additional tools beyond the CLI and the Ruby library. +Syntax Tree's goal is to seamlessly integrate into your workflow. To this end, it provides a couple of additional tools beyond the CLI and the Ruby library. ### Rake diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 418468a9..ab7ad7f9 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "etc" +require "fiddle" require "json" require "pp" require "prettier_print" @@ -9,6 +10,7 @@ require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" +require_relative "syntax_tree/dsl" require_relative "syntax_tree/version" require_relative "syntax_tree/basic_visitor" @@ -25,6 +27,17 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" +require_relative "syntax_tree/yarv" +require_relative "syntax_tree/yarv/bf" +require_relative "syntax_tree/yarv/compiler" +require_relative "syntax_tree/yarv/decompiler" +require_relative "syntax_tree/yarv/disassembler" +require_relative "syntax_tree/yarv/instruction_sequence" +require_relative "syntax_tree/yarv/instructions" +require_relative "syntax_tree/yarv/legacy" +require_relative "syntax_tree/yarv/local_table" +require_relative "syntax_tree/yarv/assembler" + # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to @@ -44,6 +57,10 @@ module SyntaxTree # It shouldn't really be changed except in very niche circumstances. DEFAULT_RUBY_VERSION = Formatter::SemanticVersion.new(RUBY_VERSION).freeze + # The default indentation level for formatting. We allow changing this so + # that Syntax Tree can format arbitrary parts of a document. + DEFAULT_INDENTATION = 0 + # This is a hook provided so that plugins can register themselves as the # handler for a particular file type. def self.register_handler(extension, handler) @@ -61,12 +78,13 @@ def self.parse(source) def self.format( source, maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, options: Formatter::Options.new ) formatter = Formatter.new(source, [], maxwidth, options: options) parse(source).format(formatter) - formatter.flush + formatter.flush(base_indentation) formatter.output.join end diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb new file mode 100644 index 00000000..860a1fe5 --- /dev/null +++ b/lib/syntax_tree/dsl.rb @@ -0,0 +1,1004 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module provides shortcuts for creating AST nodes. + module DSL + # Create a new BEGINBlock node. + def BEGINBlock(lbrace, statements) + BEGINBlock.new( + lbrace: lbrace, + statements: statements, + location: Location.default + ) + end + + # Create a new CHAR node. + def CHAR(value) + CHAR.new(value: value, location: Location.default) + end + + # Create a new ENDBlock node. + def ENDBlock(lbrace, statements) + ENDBlock.new( + lbrace: lbrace, + statements: statements, + location: Location.default + ) + end + + # Create a new EndContent node. + def EndContent(value) + EndContent.new(value: value, location: Location.default) + end + + # Create a new AliasNode node. + def AliasNode(left, right) + AliasNode.new(left: left, right: right, location: Location.default) + end + + # Create a new ARef node. + def ARef(collection, index) + ARef.new(collection: collection, index: index, location: Location.default) + end + + # Create a new ARefField node. + def ARefField(collection, index) + ARefField.new( + collection: collection, + index: index, + location: Location.default + ) + end + + # Create a new ArgParen node. + def ArgParen(arguments) + ArgParen.new(arguments: arguments, location: Location.default) + end + + # Create a new Args node. + def Args(parts) + Args.new(parts: parts, location: Location.default) + end + + # Create a new ArgBlock node. + def ArgBlock(value) + ArgBlock.new(value: value, location: Location.default) + end + + # Create a new ArgStar node. + def ArgStar(value) + ArgStar.new(value: value, location: Location.default) + end + + # Create a new ArgsForward node. + def ArgsForward + ArgsForward.new(location: Location.default) + end + + # Create a new ArrayLiteral node. + def ArrayLiteral(lbracket, contents) + ArrayLiteral.new( + lbracket: lbracket, + contents: contents, + location: Location.default + ) + end + + # Create a new AryPtn node. + def AryPtn(constant, requireds, rest, posts) + AryPtn.new( + constant: constant, + requireds: requireds, + rest: rest, + posts: posts, + location: Location.default + ) + end + + # Create a new Assign node. + def Assign(target, value) + Assign.new(target: target, value: value, location: Location.default) + end + + # Create a new Assoc node. + def Assoc(key, value) + Assoc.new(key: key, value: value, location: Location.default) + end + + # Create a new AssocSplat node. + def AssocSplat(value) + AssocSplat.new(value: value, location: Location.default) + end + + # Create a new Backref node. + def Backref(value) + Backref.new(value: value, location: Location.default) + end + + # Create a new Backtick node. + def Backtick(value) + Backtick.new(value: value, location: Location.default) + end + + # Create a new BareAssocHash node. + def BareAssocHash(assocs) + BareAssocHash.new(assocs: assocs, location: Location.default) + end + + # Create a new Begin node. + def Begin(bodystmt) + Begin.new(bodystmt: bodystmt, location: Location.default) + end + + # Create a new PinnedBegin node. + def PinnedBegin(statement) + PinnedBegin.new(statement: statement, location: Location.default) + end + + # Create a new Binary node. + def Binary(left, operator, right) + Binary.new( + left: left, + operator: operator, + right: right, + location: Location.default + ) + end + + # Create a new BlockVar node. + def BlockVar(params, locals) + BlockVar.new(params: params, locals: locals, location: Location.default) + end + + # Create a new BlockArg node. + def BlockArg(name) + BlockArg.new(name: name, location: Location.default) + end + + # Create a new BodyStmt node. + def BodyStmt( + statements, + rescue_clause, + else_keyword, + else_clause, + ensure_clause + ) + BodyStmt.new( + statements: statements, + rescue_clause: rescue_clause, + else_keyword: else_keyword, + else_clause: else_clause, + ensure_clause: ensure_clause, + location: Location.default + ) + end + + # Create a new Break node. + def Break(arguments) + Break.new(arguments: arguments, location: Location.default) + end + + # Create a new CallNode node. + def CallNode(receiver, operator, message, arguments) + CallNode.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + location: Location.default + ) + end + + # Create a new Case node. + def Case(keyword, value, consequent) + Case.new( + keyword: keyword, + value: value, + consequent: consequent, + location: Location.default + ) + end + + # Create a new RAssign node. + def RAssign(value, operator, pattern) + RAssign.new( + value: value, + operator: operator, + pattern: pattern, + location: Location.default + ) + end + + # Create a new ClassDeclaration node. + def ClassDeclaration(constant, superclass, bodystmt) + ClassDeclaration.new( + constant: constant, + superclass: superclass, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new Comma node. + def Comma(value) + Comma.new(value: value, location: Location.default) + end + + # Create a new Command node. + def Command(message, arguments, block) + Command.new( + message: message, + arguments: arguments, + block: block, + location: Location.default + ) + end + + # Create a new CommandCall node. + def CommandCall(receiver, operator, message, arguments, block) + CommandCall.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + block: block, + location: Location.default + ) + end + + # Create a new Comment node. + def Comment(value, inline) + Comment.new(value: value, inline: inline, location: Location.default) + end + + # Create a new Const node. + def Const(value) + Const.new(value: value, location: Location.default) + end + + # Create a new ConstPathField node. + def ConstPathField(parent, constant) + ConstPathField.new( + parent: parent, + constant: constant, + location: Location.default + ) + end + + # Create a new ConstPathRef node. + def ConstPathRef(parent, constant) + ConstPathRef.new( + parent: parent, + constant: constant, + location: Location.default + ) + end + + # Create a new ConstRef node. + def ConstRef(constant) + ConstRef.new(constant: constant, location: Location.default) + end + + # Create a new CVar node. + def CVar(value) + CVar.new(value: value, location: Location.default) + end + + # Create a new DefNode node. + def DefNode(target, operator, name, params, bodystmt) + DefNode.new( + target: target, + operator: operator, + name: name, + params: params, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new Defined node. + def Defined(value) + Defined.new(value: value, location: Location.default) + end + + # Create a new BlockNode node. + def BlockNode(opening, block_var, bodystmt) + BlockNode.new( + opening: opening, + block_var: block_var, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new RangeNode node. + def RangeNode(left, operator, right) + RangeNode.new( + left: left, + operator: operator, + right: right, + location: Location.default + ) + end + + # Create a new DynaSymbol node. + def DynaSymbol(parts, quote) + DynaSymbol.new(parts: parts, quote: quote, location: Location.default) + end + + # Create a new Else node. + def Else(keyword, statements) + Else.new( + keyword: keyword, + statements: statements, + location: Location.default + ) + end + + # Create a new Elsif node. + def Elsif(predicate, statements, consequent) + Elsif.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new EmbDoc node. + def EmbDoc(value) + EmbDoc.new(value: value, location: Location.default) + end + + # Create a new EmbExprBeg node. + def EmbExprBeg(value) + EmbExprBeg.new(value: value, location: Location.default) + end + + # Create a new EmbExprEnd node. + def EmbExprEnd(value) + EmbExprEnd.new(value: value, location: Location.default) + end + + # Create a new EmbVar node. + def EmbVar(value) + EmbVar.new(value: value, location: Location.default) + end + + # Create a new Ensure node. + def Ensure(keyword, statements) + Ensure.new( + keyword: keyword, + statements: statements, + location: Location.default + ) + end + + # Create a new ExcessedComma node. + def ExcessedComma(value) + ExcessedComma.new(value: value, location: Location.default) + end + + # Create a new Field node. + def Field(parent, operator, name) + Field.new( + parent: parent, + operator: operator, + name: name, + location: Location.default + ) + end + + # Create a new FloatLiteral node. + def FloatLiteral(value) + FloatLiteral.new(value: value, location: Location.default) + end + + # Create a new FndPtn node. + def FndPtn(constant, left, values, right) + FndPtn.new( + constant: constant, + left: left, + values: values, + right: right, + location: Location.default + ) + end + + # Create a new For node. + def For(index, collection, statements) + For.new( + index: index, + collection: collection, + statements: statements, + location: Location.default + ) + end + + # Create a new GVar node. + def GVar(value) + GVar.new(value: value, location: Location.default) + end + + # Create a new HashLiteral node. + def HashLiteral(lbrace, assocs) + HashLiteral.new( + lbrace: lbrace, + assocs: assocs, + location: Location.default + ) + end + + # Create a new Heredoc node. + def Heredoc(beginning, ending, dedent, parts) + Heredoc.new( + beginning: beginning, + ending: ending, + dedent: dedent, + parts: parts, + location: Location.default + ) + end + + # Create a new HeredocBeg node. + def HeredocBeg(value) + HeredocBeg.new(value: value, location: Location.default) + end + + # Create a new HeredocEnd node. + def HeredocEnd(value) + HeredocEnd.new(value: value, location: Location.default) + end + + # Create a new HshPtn node. + def HshPtn(constant, keywords, keyword_rest) + HshPtn.new( + constant: constant, + keywords: keywords, + keyword_rest: keyword_rest, + location: Location.default + ) + end + + # Create a new Ident node. + def Ident(value) + Ident.new(value: value, location: Location.default) + end + + # Create a new IfNode node. + def IfNode(predicate, statements, consequent) + IfNode.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new IfOp node. + def IfOp(predicate, truthy, falsy) + IfOp.new( + predicate: predicate, + truthy: truthy, + falsy: falsy, + location: Location.default + ) + end + + # Create a new Imaginary node. + def Imaginary(value) + Imaginary.new(value: value, location: Location.default) + end + + # Create a new In node. + def In(pattern, statements, consequent) + In.new( + pattern: pattern, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new Int node. + def Int(value) + Int.new(value: value, location: Location.default) + end + + # Create a new IVar node. + def IVar(value) + IVar.new(value: value, location: Location.default) + end + + # Create a new Kw node. + def Kw(value) + Kw.new(value: value, location: Location.default) + end + + # Create a new KwRestParam node. + def KwRestParam(name) + KwRestParam.new(name: name, location: Location.default) + end + + # Create a new Label node. + def Label(value) + Label.new(value: value, location: Location.default) + end + + # Create a new LabelEnd node. + def LabelEnd(value) + LabelEnd.new(value: value, location: Location.default) + end + + # Create a new Lambda node. + def Lambda(params, statements) + Lambda.new( + params: params, + statements: statements, + location: Location.default + ) + end + + # Create a new LambdaVar node. + def LambdaVar(params, locals) + LambdaVar.new(params: params, locals: locals, location: Location.default) + end + + # Create a new LBrace node. + def LBrace(value) + LBrace.new(value: value, location: Location.default) + end + + # Create a new LBracket node. + def LBracket(value) + LBracket.new(value: value, location: Location.default) + end + + # Create a new LParen node. + def LParen(value) + LParen.new(value: value, location: Location.default) + end + + # Create a new MAssign node. + def MAssign(target, value) + MAssign.new(target: target, value: value, location: Location.default) + end + + # Create a new MethodAddBlock node. + def MethodAddBlock(call, block) + MethodAddBlock.new(call: call, block: block, location: Location.default) + end + + # Create a new MLHS node. + def MLHS(parts, comma) + MLHS.new(parts: parts, comma: comma, location: Location.default) + end + + # Create a new MLHSParen node. + def MLHSParen(contents, comma) + MLHSParen.new( + contents: contents, + comma: comma, + location: Location.default + ) + end + + # Create a new ModuleDeclaration node. + def ModuleDeclaration(constant, bodystmt) + ModuleDeclaration.new( + constant: constant, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new MRHS node. + def MRHS(parts) + MRHS.new(parts: parts, location: Location.default) + end + + # Create a new Next node. + def Next(arguments) + Next.new(arguments: arguments, location: Location.default) + end + + # Create a new Op node. + def Op(value) + Op.new(value: value, location: Location.default) + end + + # Create a new OpAssign node. + def OpAssign(target, operator, value) + OpAssign.new( + target: target, + operator: operator, + value: value, + location: Location.default + ) + end + + # Create a new Params node. + def Params(requireds, optionals, rest, posts, keywords, keyword_rest, block) + Params.new( + requireds: requireds, + optionals: optionals, + rest: rest, + posts: posts, + keywords: keywords, + keyword_rest: keyword_rest, + block: block, + location: Location.default + ) + end + + # Create a new Paren node. + def Paren(lparen, contents) + Paren.new(lparen: lparen, contents: contents, location: Location.default) + end + + # Create a new Period node. + def Period(value) + Period.new(value: value, location: Location.default) + end + + # Create a new Program node. + def Program(statements) + Program.new(statements: statements, location: Location.default) + end + + # Create a new QSymbols node. + def QSymbols(beginning, elements) + QSymbols.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new QSymbolsBeg node. + def QSymbolsBeg(value) + QSymbolsBeg.new(value: value, location: Location.default) + end + + # Create a new QWords node. + def QWords(beginning, elements) + QWords.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new QWordsBeg node. + def QWordsBeg(value) + QWordsBeg.new(value: value, location: Location.default) + end + + # Create a new RationalLiteral node. + def RationalLiteral(value) + RationalLiteral.new(value: value, location: Location.default) + end + + # Create a new RBrace node. + def RBrace(value) + RBrace.new(value: value, location: Location.default) + end + + # Create a new RBracket node. + def RBracket(value) + RBracket.new(value: value, location: Location.default) + end + + # Create a new Redo node. + def Redo + Redo.new(location: Location.default) + end + + # Create a new RegexpContent node. + def RegexpContent(beginning, parts) + RegexpContent.new( + beginning: beginning, + parts: parts, + location: Location.default + ) + end + + # Create a new RegexpBeg node. + def RegexpBeg(value) + RegexpBeg.new(value: value, location: Location.default) + end + + # Create a new RegexpEnd node. + def RegexpEnd(value) + RegexpEnd.new(value: value, location: Location.default) + end + + # Create a new RegexpLiteral node. + def RegexpLiteral(beginning, ending, parts) + RegexpLiteral.new( + beginning: beginning, + ending: ending, + parts: parts, + location: Location.default + ) + end + + # Create a new RescueEx node. + def RescueEx(exceptions, variable) + RescueEx.new( + exceptions: exceptions, + variable: variable, + location: Location.default + ) + end + + # Create a new Rescue node. + def Rescue(keyword, exception, statements, consequent) + Rescue.new( + keyword: keyword, + exception: exception, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new RescueMod node. + def RescueMod(statement, value) + RescueMod.new( + statement: statement, + value: value, + location: Location.default + ) + end + + # Create a new RestParam node. + def RestParam(name) + RestParam.new(name: name, location: Location.default) + end + + # Create a new Retry node. + def Retry + Retry.new(location: Location.default) + end + + # Create a new ReturnNode node. + def ReturnNode(arguments) + ReturnNode.new(arguments: arguments, location: Location.default) + end + + # Create a new RParen node. + def RParen(value) + RParen.new(value: value, location: Location.default) + end + + # Create a new SClass node. + def SClass(target, bodystmt) + SClass.new(target: target, bodystmt: bodystmt, location: Location.default) + end + + # Create a new Statements node. + def Statements(body) + Statements.new(nil, body: body, location: Location.default) + end + + # Create a new StringContent node. + def StringContent(parts) + StringContent.new(parts: parts, location: Location.default) + end + + # Create a new StringConcat node. + def StringConcat(left, right) + StringConcat.new(left: left, right: right, location: Location.default) + end + + # Create a new StringDVar node. + def StringDVar(variable) + StringDVar.new(variable: variable, location: Location.default) + end + + # Create a new StringEmbExpr node. + def StringEmbExpr(statements) + StringEmbExpr.new(statements: statements, location: Location.default) + end + + # Create a new StringLiteral node. + def StringLiteral(parts, quote) + StringLiteral.new(parts: parts, quote: quote, location: Location.default) + end + + # Create a new Super node. + def Super(arguments) + Super.new(arguments: arguments, location: Location.default) + end + + # Create a new SymBeg node. + def SymBeg(value) + SymBeg.new(value: value, location: Location.default) + end + + # Create a new SymbolContent node. + def SymbolContent(value) + SymbolContent.new(value: value, location: Location.default) + end + + # Create a new SymbolLiteral node. + def SymbolLiteral(value) + SymbolLiteral.new(value: value, location: Location.default) + end + + # Create a new Symbols node. + def Symbols(beginning, elements) + Symbols.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new SymbolsBeg node. + def SymbolsBeg(value) + SymbolsBeg.new(value: value, location: Location.default) + end + + # Create a new TLambda node. + def TLambda(value) + TLambda.new(value: value, location: Location.default) + end + + # Create a new TLamBeg node. + def TLamBeg(value) + TLamBeg.new(value: value, location: Location.default) + end + + # Create a new TopConstField node. + def TopConstField(constant) + TopConstField.new(constant: constant, location: Location.default) + end + + # Create a new TopConstRef node. + def TopConstRef(constant) + TopConstRef.new(constant: constant, location: Location.default) + end + + # Create a new TStringBeg node. + def TStringBeg(value) + TStringBeg.new(value: value, location: Location.default) + end + + # Create a new TStringContent node. + def TStringContent(value) + TStringContent.new(value: value, location: Location.default) + end + + # Create a new TStringEnd node. + def TStringEnd(value) + TStringEnd.new(value: value, location: Location.default) + end + + # Create a new Not node. + def Not(statement, parentheses) + Not.new( + statement: statement, + parentheses: parentheses, + location: Location.default + ) + end + + # Create a new Unary node. + def Unary(operator, statement) + Unary.new( + operator: operator, + statement: statement, + location: Location.default + ) + end + + # Create a new Undef node. + def Undef(symbols) + Undef.new(symbols: symbols, location: Location.default) + end + + # Create a new UnlessNode node. + def UnlessNode(predicate, statements, consequent) + UnlessNode.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new UntilNode node. + def UntilNode(predicate, statements) + UntilNode.new( + predicate: predicate, + statements: statements, + location: Location.default + ) + end + + # Create a new VarField node. + def VarField(value) + VarField.new(value: value, location: Location.default) + end + + # Create a new VarRef node. + def VarRef(value) + VarRef.new(value: value, location: Location.default) + end + + # Create a new PinnedVarRef node. + def PinnedVarRef(value) + PinnedVarRef.new(value: value, location: Location.default) + end + + # Create a new VCall node. + def VCall(value) + VCall.new(value: value, location: Location.default) + end + + # Create a new VoidStmt node. + def VoidStmt + VoidStmt.new(location: Location.default) + end + + # Create a new When node. + def When(arguments, statements, consequent) + When.new( + arguments: arguments, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new WhileNode node. + def WhileNode(predicate, statements) + WhileNode.new( + predicate: predicate, + statements: statements, + location: Location.default + ) + end + + # Create a new Word node. + def Word(parts) + Word.new(parts: parts, location: Location.default) + end + + # Create a new Words node. + def Words(beginning, elements) + Words.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new WordsBeg node. + def WordsBeg(value) + WordsBeg.new(value: value, location: Location.default) + end + + # Create a new XString node. + def XString(parts) + XString.new(parts: parts, location: Location.default) + end + + # Create a new XStringLiteral node. + def XStringLiteral(parts) + XStringLiteral.new(parts: parts, location: Location.default) + end + + # Create a new YieldNode node. + def YieldNode(arguments) + YieldNode.new(arguments: arguments, location: Location.default) + end + + # Create a new ZSuper node. + def ZSuper + ZSuper.new(location: Location.default) + end + end +end diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index d5d251c6..fddc06fe 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -84,10 +84,10 @@ def initialize(source, *args, options: Options.new) @target_ruby_version = options.target_ruby_version end - def self.format(source, node) + def self.format(source, node, base_indentation = 0) q = new(source, []) q.format(node) - q.flush + q.flush(base_indentation) q.output.join end diff --git a/lib/syntax_tree/language_server.rb b/lib/syntax_tree/language_server.rb index c2265c32..a7b23664 100644 --- a/lib/syntax_tree/language_server.rb +++ b/lib/syntax_tree/language_server.rb @@ -111,6 +111,8 @@ def run write(id: request[:id], result: PP.pp(SyntaxTree.parse(store[uri]), +"")) when Request[method: %r{\$/.+}] # ignored + when Request[method: "textDocument/documentColor", params: { textDocument: { uri: :any } }] + # ignored else raise ArgumentError, "Unhandled: #{request}" end diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index f32789a3..e5b09044 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -1604,7 +1604,7 @@ def format_contents(q) # { **pairs } # class AssocSplat < Node - # [untyped] the expression that is being splatted + # [nil | untyped] the expression that is being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1643,7 +1643,7 @@ def deconstruct_keys(_keys) def format(q) q.text("**") - q.format(value) + q.format(value) if value end def ===(other) @@ -6160,7 +6160,7 @@ def call(q, node) # want to force it to not be a ternary, like if the predicate is an # assignment because it's hard to read. case node.predicate - when Assign, Command, CommandCall, MAssign, OpAssign + when Assign, Binary, Command, CommandCall, MAssign, OpAssign return false when Not return false unless node.predicate.parentheses? @@ -6183,10 +6183,10 @@ def call(q, node) # and default instead to breaking them into multiple lines. def ternaryable?(statement) case statement - when AliasNode, Assign, Break, Command, CommandCall, Heredoc, IfNode, - IfOp, Lambda, MAssign, Next, OpAssign, RescueMod, ReturnNode, - Super, Undef, UnlessNode, UntilNode, VoidStmt, WhileNode, - YieldNode, ZSuper + when AliasNode, Assign, Break, Command, CommandCall, Defined, Heredoc, + IfNode, IfOp, Lambda, MAssign, Next, OpAssign, RescueMod, + ReturnNode, Super, Undef, UnlessNode, UntilNode, VoidStmt, + WhileNode, YieldNode, ZSuper # This is a list of nodes that should not be allowed to be a part of a # ternary clause. false diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 23a3196c..fcefed30 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -744,7 +744,7 @@ def on_assoc_splat(value) AssocSplat.new( value: value, - location: operator.location.to(value.location) + location: operator.location.to((value || operator).location) ) end @@ -820,13 +820,13 @@ def on_begin(bodystmt) end bodystmt.bind( - keyword.location.end_char, + find_next_statement_start(keyword.location.end_char), keyword.location.end_column, end_location.end_char, end_location.end_column ) - location = keyword.location.to(bodystmt.location) + location = keyword.location.to(end_location) Begin.new(bodystmt: bodystmt, location: location) end end @@ -905,14 +905,15 @@ def on_blockarg(name) # (nil | Ensure) ensure_clause # ) -> BodyStmt def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) + parts = [statements, rescue_clause, else_clause, ensure_clause].compact + BodyStmt.new( statements: statements, rescue_clause: rescue_clause, else_keyword: else_clause && consume_keyword(:else), else_clause: else_clause, ensure_clause: ensure_clause, - location: - Location.fixed(line: lineno, char: char_pos, column: current_column) + location: parts.first.location.to(parts.last.location) ) end @@ -994,22 +995,11 @@ def on_call(receiver, operator, message) # :call-seq: # on_case: (untyped value, untyped consequent) -> Case | RAssign def on_case(value, consequent) - if (keyword = find_keyword(:case)) - tokens.delete(keyword) - - Case.new( - keyword: keyword, - value: value, - consequent: consequent, - location: keyword.location.to(consequent.location) - ) - else - operator = - if (keyword = find_keyword(:in)) - tokens.delete(keyword) - else - consume_operator(:"=>") - end + if value && (operator = find_keyword(:in) || find_operator(:"=>")) && + (value.location.end_char...consequent.location.start_char).cover?( + operator.location.start_char + ) + tokens.delete(operator) node = RAssign.new( @@ -1021,6 +1011,15 @@ def on_case(value, consequent) PinVisitor.visit(node, tokens) node + else + keyword = consume_keyword(:case) + + Case.new( + keyword: keyword, + value: value, + consequent: consequent, + location: keyword.location.to(consequent.location) + ) end end diff --git a/lib/syntax_tree/version.rb b/lib/syntax_tree/version.rb index 340bbbdf..d9bbdfa4 100644 --- a/lib/syntax_tree/version.rb +++ b/lib/syntax_tree/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxTree - VERSION = "5.0.1" + VERSION = "5.1.0" end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb new file mode 100644 index 00000000..97592d4d --- /dev/null +++ b/lib/syntax_tree/yarv.rb @@ -0,0 +1,287 @@ +# frozen_string_literal: true + +require "forwardable" + +module SyntaxTree + # This module provides an object representation of the YARV bytecode. + module YARV + class VM + class Jump + attr_reader :name + + def initialize(name) + @name = name + end + end + + class Leave + attr_reader :value + + def initialize(value) + @value = value + end + end + + class Frame + attr_reader :iseq, :parent, :stack_index, :_self, :nesting, :svars + + def initialize(iseq, parent, stack_index, _self, nesting) + @iseq = iseq + @parent = parent + @stack_index = stack_index + @_self = _self + @nesting = nesting + @svars = {} + end + end + + class TopFrame < Frame + def initialize(iseq) + super(iseq, nil, 0, TOPLEVEL_BINDING.eval("self"), [Object]) + end + end + + class BlockFrame < Frame + def initialize(iseq, parent, stack_index) + super(iseq, parent, stack_index, parent._self, parent.nesting) + end + end + + class MethodFrame < Frame + attr_reader :name, :block + + def initialize(iseq, parent, stack_index, _self, name, block) + super(iseq, parent, stack_index, _self, parent.nesting) + @name = name + @block = block + end + end + + class ClassFrame < Frame + def initialize(iseq, parent, stack_index, _self) + super(iseq, parent, stack_index, _self, parent.nesting + [_self]) + end + end + + class FrozenCore + define_method("core#hash_merge_kwd") { |left, right| left.merge(right) } + + define_method("core#hash_merge_ptr") do |hash, *values| + hash.merge(values.each_slice(2).to_h) + end + + define_method("core#set_method_alias") do |clazz, new_name, old_name| + clazz.alias_method(new_name, old_name) + end + + define_method("core#set_variable_alias") do |new_name, old_name| + # Using eval here since there isn't a reflection API to be able to + # alias global variables. + eval("alias #{new_name} #{old_name}", binding, __FILE__, __LINE__) + end + + define_method("core#set_postexe") { |&block| END { block.call } } + + define_method("core#undef_method") do |clazz, name| + clazz.undef_method(name) + end + end + + FROZEN_CORE = FrozenCore.new.freeze + + extend Forwardable + + attr_reader :stack + def_delegators :stack, :push, :pop + + attr_reader :frame + def_delegators :frame, :_self + + def initialize + @stack = [] + @frame = nil + end + + ########################################################################## + # Helper methods for frames + ########################################################################## + + def run_frame(frame) + # First, set the current frame to the given value. + @frame = frame + + # Next, set up the local table for the frame. This is actually incorrect + # as it could use the values already on the stack, but for now we're + # just doing this for simplicity. + frame.iseq.local_table.size.times { push(nil) } + + # Yield so that some frame-specific setup can be done. + yield if block_given? + + # This hash is going to hold a mapping of label names to their + # respective indices in our instruction list. + labels = {} + + # This array is going to hold our instructions. + insns = [] + + # Here we're going to preprocess the instruction list from the + # instruction sequence to set up the labels hash and the insns array. + frame.iseq.insns.each do |insn| + case insn + when Integer, Symbol + # skip + when InstructionSequence::Label + labels[insn.name] = insns.length + else + insns << insn + end + end + + # Finally we can execute the instructions one at a time. If they return + # jumps or leaves we will handle those appropriately. + pc = 0 + while pc < insns.length + insn = insns[pc] + pc += 1 + + case (result = insn.call(self)) + when Jump + pc = labels[result.name] + when Leave + return result.value + end + end + ensure + @stack = stack[0...frame.stack_index] + @frame = frame.parent + end + + def run_top_frame(iseq) + run_frame(TopFrame.new(iseq)) + end + + def run_block_frame(iseq, *args, &block) + run_frame(BlockFrame.new(iseq, frame, stack.length)) do + locals = [*args, block] + iseq.local_table.size.times do |index| + local_set(index, 0, locals.shift) + end + end + end + + def run_class_frame(iseq, clazz) + run_frame(ClassFrame.new(iseq, frame, stack.length, clazz)) + end + + def run_method_frame(name, iseq, _self, *args, **kwargs, &block) + run_frame( + MethodFrame.new(iseq, frame, stack.length, _self, name, block) + ) do + locals = [*args, block] + + if iseq.argument_options[:keyword] + # First, set up the keyword bits array. + keyword_bits = + iseq.argument_options[:keyword].map do |config| + kwargs.key?(config.is_a?(Array) ? config[0] : config) + end + + iseq.local_table.locals.each_with_index do |local, index| + # If this is the keyword bits local, then set it appropriately. + if local.name == 2 + locals.insert(index, keyword_bits) + next + end + + # First, find the configuration for this local in the keywords + # list if it exists. + name = local.name + config = + iseq.argument_options[:keyword].find do |keyword| + keyword.is_a?(Array) ? keyword[0] == name : keyword == name + end + + # If the configuration doesn't exist, then the local is not a + # keyword local. + next unless config + + if !config.is_a?(Array) + # required keyword + locals.insert(index, kwargs.fetch(name)) + elsif !config[1].nil? + # optional keyword with embedded default value + locals.insert(index, kwargs.fetch(name, config[1])) + else + # optional keyword with expression default value + locals.insert(index, nil) + end + end + end + + iseq.local_table.size.times do |index| + local_set(index, 0, locals.shift) + end + end + end + + ########################################################################## + # Helper methods for instructions + ########################################################################## + + def const_base + frame.nesting.last + end + + def frame_at(level) + current = frame + level.times { current = current.parent } + current + end + + def frame_svar + current = frame + current = current.parent while current.is_a?(BlockFrame) + current + end + + def frame_yield + current = frame + current = current.parent until current.is_a?(MethodFrame) + current + end + + def frozen_core + FROZEN_CORE + end + + def jump(label) + Jump.new(label.name) + end + + def leave + Leave.new(pop) + end + + def local_get(index, level) + stack[frame_at(level).stack_index + index] + end + + def local_set(index, level, value) + stack[frame_at(level).stack_index + index] = value + end + end + + # Compile the given source into a YARV instruction sequence. + def self.compile(source, options = Compiler::Options.new) + SyntaxTree.parse(source).accept(Compiler.new(options)) + end + + # Compile and interpret the given source. + def self.interpret(source, options = Compiler::Options.new) + iseq = RubyVM::InstructionSequence.compile(source, **options) + iseq = InstructionSequence.from(iseq.to_a) + VM.new.run_top_frame(iseq) + end + end +end diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb new file mode 100644 index 00000000..efb179c1 --- /dev/null +++ b/lib/syntax_tree/yarv/assembler.rb @@ -0,0 +1,459 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + class Assembler + class ObjectVisitor < Compiler::RubyVisitor + def visit_dyna_symbol(node) + if node.parts.empty? + :"" + else + raise CompilationError + end + end + + def visit_string_literal(node) + case node.parts.length + when 0 + "" + when 1 + raise CompilationError unless node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + raise CompilationError + end + end + end + + CALLDATA_FLAGS = { + "ARGS_SPLAT" => CallData::CALL_ARGS_SPLAT, + "ARGS_BLOCKARG" => CallData::CALL_ARGS_BLOCKARG, + "FCALL" => CallData::CALL_FCALL, + "VCALL" => CallData::CALL_VCALL, + "ARGS_SIMPLE" => CallData::CALL_ARGS_SIMPLE, + "BLOCKISEQ" => CallData::CALL_BLOCKISEQ, + "KWARG" => CallData::CALL_KWARG, + "KW_SPLAT" => CallData::CALL_KW_SPLAT, + "TAILCALL" => CallData::CALL_TAILCALL, + "SUPER" => CallData::CALL_SUPER, + "ZSUPER" => CallData::CALL_ZSUPER, + "OPT_SEND" => CallData::CALL_OPT_SEND, + "KW_SPLAT_MUT" => CallData::CALL_KW_SPLAT_MUT + }.freeze + + DEFINED_TYPES = [ + nil, + "nil", + "instance-variable", + "local-variable", + "global-variable", + "class variable", + "constant", + "method", + "yield", + "super", + "self", + "true", + "false", + "assignment", + "expression", + "ref", + "func", + "constant-from" + ].freeze + + attr_reader :filepath + + def initialize(filepath) + @filepath = filepath + end + + def assemble + iseq = InstructionSequence.new(:top, "
", nil, Location.default) + assemble_iseq(iseq, File.readlines(filepath, chomp: true)) + + iseq.compile! + iseq + end + + def self.assemble(filepath) + new(filepath).assemble + end + + private + + def assemble_iseq(iseq, lines) + labels = Hash.new { |hash, name| hash[name] = iseq.label } + line_index = 0 + + while line_index < lines.length + line = lines[line_index] + line_index += 1 + + case line.strip + when "", /^;/ + # skip over blank lines and comments + next + when /^(\w+):$/ + # create labels + iseq.push(labels[$1]) + next + when /^__END__/ + # skip over the rest of the file when we hit __END__ + return + end + + insn, operands = line.split(" ", 2) + + case insn + when "adjuststack" + iseq.adjuststack(parse_number(operands)) + when "anytostring" + iseq.anytostring + when "branchif" + iseq.branchif(labels[operands]) + when "branchnil" + iseq.branchnil(labels[operands]) + when "branchunless" + iseq.branchunless(labels[operands]) + when "checkkeyword" + kwbits_index, keyword_index = operands.split(/,\s*/) + iseq.checkkeyword( + parse_number(kwbits_index), + parse_number(keyword_index) + ) + when "checkmatch" + iseq.checkmatch(parse_number(operands)) + when "checktype" + iseq.checktype(parse_number(operands)) + when "concatarray" + iseq.concatarray + when "concatstrings" + iseq.concatstrings(parse_number(operands)) + when "defineclass" + body = parse_nested(lines[line_index..]) + line_index += body.length + + name_value, flags_value = operands.split(/,\s*/) + name = parse_symbol(name_value) + flags = parse_number(flags_value) + + class_iseq = iseq.class_child_iseq(name.to_s, Location.default) + assemble_iseq(class_iseq, body) + iseq.defineclass(name, class_iseq, flags) + when "defined" + type, object, message = operands.split(/,\s*/) + iseq.defined( + DEFINED_TYPES.index(type), + parse_symbol(object), + parse_string(message) + ) + when "definemethod" + body = parse_nested(lines[line_index..]) + line_index += body.length + + name = parse_symbol(operands) + method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + assemble_iseq(method_iseq, body) + + iseq.definemethod(name, method_iseq) + when "definesmethod" + body = parse_nested(lines[line_index..]) + line_index += body.length + + name = parse_symbol(operands) + method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + + assemble_iseq(method_iseq, body) + iseq.definesmethod(name, method_iseq) + when "dup" + iseq.dup + when "dupn" + iseq.dupn(parse_number(operands)) + when "duparray" + iseq.duparray(parse_type(operands, Array)) + when "duphash" + iseq.duphash(parse_type(operands, Hash)) + when "expandarray" + number, flags = operands.split(/,\s*/) + iseq.expandarray(parse_number(number), parse_number(flags)) + when "getblockparam" + lookup = find_local(iseq, operands) + iseq.getblockparam(lookup.index, lookup.level) + when "getblockparamproxy" + lookup = find_local(iseq, operands) + iseq.getblockparamproxy(lookup.index, lookup.level) + when "getclassvariable" + iseq.getclassvariable(parse_symbol(operands)) + when "getconstant" + iseq.getconstant(parse_symbol(operands)) + when "getglobal" + iseq.getglobal(parse_symbol(operands)) + when "getinstancevariable" + iseq.getinstancevariable(parse_symbol(operands)) + when "getlocal" + lookup = find_local(iseq, operands) + iseq.getlocal(lookup.index, lookup.level) + when "getspecial" + key, type = operands.split(/,\s*/) + iseq.getspecial(parse_number(key), parse_number(type)) + when "intern" + iseq.intern + when "invokeblock" + iseq.invokeblock( + operands ? parse_calldata(operands) : YARV.calldata(nil, 0) + ) + when "invokesuper" + calldata = + if operands + parse_calldata(operands) + else + YARV.calldata( + nil, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER + ) + end + + block_iseq = + if lines[line_index].start_with?(" ") + body = parse_nested(lines[line_index..]) + line_index += body.length + + block_iseq = iseq.block_child_iseq(Location.default) + assemble_iseq(block_iseq, body) + block_iseq + end + + iseq.invokesuper(calldata, block_iseq) + when "jump" + iseq.jump(labels[operands]) + when "leave" + iseq.leave + when "newarray" + iseq.newarray(parse_number(operands)) + when "newarraykwsplat" + iseq.newarraykwsplat(parse_number(operands)) + when "newhash" + iseq.newhash(parse_number(operands)) + when "newrange" + iseq.newrange(parse_options(operands, [0, 1])) + when "nop" + iseq.nop + when "objtostring" + iseq.objtostring(YARV.calldata(:to_s)) + when "once" + block_iseq = + if lines[line_index].start_with?(" ") + body = parse_nested(lines[line_index..]) + line_index += body.length + + block_iseq = iseq.block_child_iseq(Location.default) + assemble_iseq(block_iseq, body) + block_iseq + end + + iseq.once(block_iseq, iseq.inline_storage) + when "opt_and" + iseq.send(YARV.calldata(:&, 1)) + when "opt_aref" + iseq.send(YARV.calldata(:[], 1)) + when "opt_aref_with" + iseq.opt_aref_with(parse_string(operands), YARV.calldata(:[], 1)) + when "opt_aset" + iseq.send(YARV.calldata(:[]=, 2)) + when "opt_aset_with" + iseq.opt_aset_with(parse_string(operands), YARV.calldata(:[]=, 2)) + when "opt_case_dispatch" + cdhash_value, else_label_value = operands.split(/\s*\},\s*/) + cdhash_value.sub!(/\A\{/, "") + + pairs = + cdhash_value + .split(/\s*,\s*/) + .map! { |pair| pair.split(/\s*=>\s*/) } + + cdhash = pairs.to_h { |value, nm| [parse(value), labels[nm]] } + else_label = labels[else_label_value] + + iseq.opt_case_dispatch(cdhash, else_label) + when "opt_div" + iseq.send(YARV.calldata(:/, 1)) + when "opt_empty_p" + iseq.send(YARV.calldata(:empty?)) + when "opt_eq" + iseq.send(YARV.calldata(:==, 1)) + when "opt_ge" + iseq.send(YARV.calldata(:>=, 1)) + when "opt_gt" + iseq.send(YARV.calldata(:>, 1)) + when "opt_getconstant_path" + iseq.opt_getconstant_path(parse_type(operands, Array)) + when "opt_le" + iseq.send(YARV.calldata(:<=, 1)) + when "opt_length" + iseq.send(YARV.calldata(:length)) + when "opt_lt" + iseq.send(YARV.calldata(:<, 1)) + when "opt_ltlt" + iseq.send(YARV.calldata(:<<, 1)) + when "opt_minus" + iseq.send(YARV.calldata(:-, 1)) + when "opt_mod" + iseq.send(YARV.calldata(:%, 1)) + when "opt_mult" + iseq.send(YARV.calldata(:*, 1)) + when "opt_neq" + iseq.send(YARV.calldata(:!=, 1)) + when "opt_newarray_max" + iseq.newarray(parse_number(operands)) + iseq.send(YARV.calldata(:max)) + when "opt_newarray_min" + iseq.newarray(parse_number(operands)) + iseq.send(YARV.calldata(:min)) + when "opt_nil_p" + iseq.send(YARV.calldata(:nil?)) + when "opt_not" + iseq.send(YARV.calldata(:!)) + when "opt_or" + iseq.send(YARV.calldata(:|, 1)) + when "opt_plus" + iseq.send(YARV.calldata(:+, 1)) + when "opt_regexpmatch2" + iseq.send(YARV.calldata(:=~, 1)) + when "opt_reverse" + iseq.send(YARV.calldata(:reverse)) + when "opt_send_without_block" + iseq.send(parse_calldata(operands)) + when "opt_size" + iseq.send(YARV.calldata(:size)) + when "opt_str_freeze" + iseq.putstring(parse_string(operands)) + iseq.send(YARV.calldata(:freeze)) + when "opt_str_uminus" + iseq.putstring(parse_string(operands)) + iseq.send(YARV.calldata(:-@)) + when "opt_succ" + iseq.send(YARV.calldata(:succ)) + when "pop" + iseq.pop + when "putnil" + iseq.putnil + when "putobject" + iseq.putobject(parse(operands)) + when "putself" + iseq.putself + when "putspecialobject" + iseq.putspecialobject(parse_options(operands, [1, 2, 3])) + when "putstring" + iseq.putstring(parse_string(operands)) + when "send" + block_iseq = + if lines[line_index].start_with?(" ") + body = parse_nested(lines[line_index..]) + line_index += body.length + + block_iseq = iseq.block_child_iseq(Location.default) + assemble_iseq(block_iseq, body) + block_iseq + end + + iseq.send(parse_calldata(operands), block_iseq) + when "setblockparam" + lookup = find_local(iseq, operands) + iseq.setblockparam(lookup.index, lookup.level) + when "setconstant" + iseq.setconstant(parse_symbol(operands)) + when "setglobal" + iseq.setglobal(parse_symbol(operands)) + when "setlocal" + lookup = find_local(iseq, operands) + iseq.setlocal(lookup.index, lookup.level) + when "setn" + iseq.setn(parse_number(operands)) + when "setclassvariable" + iseq.setclassvariable(parse_symbol(operands)) + when "setinstancevariable" + iseq.setinstancevariable(parse_symbol(operands)) + when "setspecial" + iseq.setspecial(parse_number(operands)) + when "splatarray" + iseq.splatarray(parse_options(operands, [true, false])) + when "swap" + iseq.swap + when "throw" + iseq.throw(parse_number(operands)) + when "topn" + iseq.topn(parse_number(operands)) + when "toregexp" + options, length = operands.split(", ") + iseq.toregexp(parse_number(options), parse_number(length)) + when "ARG_REQ" + iseq.argument_size += 1 + iseq.local_table.plain(operands.to_sym) + when "ARG_BLOCK" + iseq.argument_options[:block_start] = iseq.argument_size + iseq.local_table.block(operands.to_sym) + iseq.argument_size += 1 + else + raise "Could not understand: #{line}" + end + end + end + + def find_local(iseq, operands) + name_string, level_string = operands.split(/,\s*/) + name = name_string.to_sym + level = level_string&.to_i || 0 + + iseq.local_table.plain(name) + iseq.local_table.find(name, level) + end + + def parse(value) + program = SyntaxTree.parse(value) + raise if program.statements.body.length != 1 + + program.statements.body.first.accept(ObjectVisitor.new) + end + + def parse_options(value, options) + parse(value).tap { raise unless options.include?(_1) } + end + + def parse_type(value, type) + parse(value).tap { raise unless _1.is_a?(type) } + end + + def parse_number(value) + parse_type(value, Integer) + end + + def parse_string(value) + parse_type(value, String) + end + + def parse_symbol(value) + parse_type(value, Symbol) + end + + def parse_nested(lines) + body = lines.take_while { |line| line.match?(/^($|;| )/) } + body.map! { |line| line.delete_prefix!(" ") || +"" } + end + + def parse_calldata(value) + message, argc_value, flags_value = value.split + flags = + if flags_value + flags_value.split("|").map(&CALLDATA_FLAGS).inject(:|) + else + CallData::CALL_ARGS_SIMPLE + end + + YARV.calldata(message.to_sym, argc_value&.to_i || 0, flags) + end + end + end +end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb new file mode 100644 index 00000000..f642fb2f --- /dev/null +++ b/lib/syntax_tree/yarv/bf.rb @@ -0,0 +1,179 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Parses the given source code into a syntax tree, compiles that syntax tree + # into YARV bytecode. + class Bf + attr_reader :source + + def initialize(source) + @source = source + end + + def compile + # Set up the top-level instruction sequence that will be returned. + iseq = InstructionSequence.new(:top, "", nil, location) + + # Set up the $tape global variable that will hold our state. + iseq.duphash({ 0 => 0 }) + iseq.setglobal(:$tape) + iseq.getglobal(:$tape) + iseq.putobject(0) + iseq.send(YARV.calldata(:default=, 1)) + + # Set up the $cursor global variable that will hold the current position + # in the tape. + iseq.putobject(0) + iseq.setglobal(:$cursor) + + stack = [] + source + .each_char + .chunk do |char| + # For each character, we're going to assign a type to it. This + # allows a couple of optimizations to be made by combining multiple + # instructions into single instructions, e.g., +++ becomes a single + # change_by(3) instruction. + case char + when "+", "-" + :change + when ">", "<" + :shift + when "." + :output + when "," + :input + when "[", "]" + :loop + else + :ignored + end + end + .each do |type, chunk| + # For each chunk, we're going to emit the appropriate instruction. + case type + when :change + change_by(iseq, chunk.count("+") - chunk.count("-")) + when :shift + shift_by(iseq, chunk.count(">") - chunk.count("<")) + when :output + chunk.length.times { output_char(iseq) } + when :input + chunk.length.times { input_char(iseq) } + when :loop + chunk.each do |char| + case char + when "[" + stack << loop_start(iseq) + when "]" + loop_end(iseq, *stack.pop) + end + end + end + end + + iseq.leave + iseq.compile! + iseq + end + + private + + # This is the location of the top instruction sequence, derived from the + # source string. + def location + Location.new( + start_line: 1, + start_char: 0, + start_column: 0, + end_line: source.count("\n") + 1, + end_char: source.size, + end_column: source.size - (source.rindex("\n") || 0) - 1 + ) + end + + # $tape[$cursor] += value + def change_by(iseq, value) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(YARV.calldata(:[], 1)) + + if value < 0 + iseq.putobject(-value) + iseq.send(YARV.calldata(:-, 1)) + else + iseq.putobject(value) + iseq.send(YARV.calldata(:+, 1)) + end + + iseq.send(YARV.calldata(:[]=, 2)) + end + + # $cursor += value + def shift_by(iseq, value) + iseq.getglobal(:$cursor) + + if value < 0 + iseq.putobject(-value) + iseq.send(YARV.calldata(:-, 1)) + else + iseq.putobject(value) + iseq.send(YARV.calldata(:+, 1)) + end + + iseq.setglobal(:$cursor) + end + + # $stdout.putc($tape[$cursor].chr) + def output_char(iseq) + iseq.getglobal(:$stdout) + + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(YARV.calldata(:[], 1)) + iseq.send(YARV.calldata(:chr)) + + iseq.send(YARV.calldata(:putc, 1)) + end + + # $tape[$cursor] = $stdin.getc.ord + def input_char(iseq) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + + iseq.getglobal(:$stdin) + iseq.send(YARV.calldata(:getc)) + iseq.send(YARV.calldata(:ord)) + + iseq.send(YARV.calldata(:[]=, 2)) + end + + # unless $tape[$cursor] == 0 + def loop_start(iseq) + start_label = iseq.label + end_label = iseq.label + + iseq.push(start_label) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(YARV.calldata(:[], 1)) + + iseq.putobject(0) + iseq.send(YARV.calldata(:==, 1)) + iseq.branchunless(end_label) + + [start_label, end_label] + end + + # Jump back to the start of the loop. + def loop_end(iseq, start_label, end_label) + iseq.jump(start_label) + iseq.push(end_label) + end + end + end +end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb new file mode 100644 index 00000000..4af5d6f0 --- /dev/null +++ b/lib/syntax_tree/yarv/compiler.rb @@ -0,0 +1,2287 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is an experiment in transforming Syntax Tree nodes into their + # corresponding YARV instruction sequences. It attempts to mirror the + # behavior of RubyVM::InstructionSequence.compile. + # + # You use this as with any other visitor. First you parse code into a tree, + # then you visit it with this compiler. Visiting the root node of the tree + # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # With that object you can call #to_a on it, which will return a serialized + # form of the instruction sequence as an array. This array _should_ mirror + # the array given by RubyVM::InstructionSequence#to_a. + # + # As an example, here is how you would compile a single expression: + # + # program = SyntaxTree.parse("1 + 2") + # program.accept(SyntaxTree::YARV::Compiler.new).to_a + # + # [ + # "YARVInstructionSequence/SimpleDataFormat", + # 3, + # 1, + # 1, + # {:arg_size=>0, :local_size=>0, :stack_max=>2}, + # "", + # "", + # "", + # 1, + # :top, + # [], + # {}, + # [], + # [ + # [:putobject_INT2FIX_1_], + # [:putobject, 2], + # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], + # [:leave] + # ] + # ] + # + # Note that this is the same output as calling: + # + # RubyVM::InstructionSequence.compile("1 + 2").to_a + # + class Compiler < BasicVisitor + # This represents a set of options that can be passed to the compiler to + # control how it compiles the code. It mirrors the options that can be + # passed to RubyVM::InstructionSequence.compile, except it only includes + # options that actually change the behavior. + class Options + def initialize( + frozen_string_literal: false, + inline_const_cache: true, + operands_unification: true, + peephole_optimization: true, + specialized_instruction: true, + tailcall_optimization: false + ) + @frozen_string_literal = frozen_string_literal + @inline_const_cache = inline_const_cache + @operands_unification = operands_unification + @peephole_optimization = peephole_optimization + @specialized_instruction = specialized_instruction + @tailcall_optimization = tailcall_optimization + end + + def to_hash + { + frozen_string_literal: @frozen_string_literal, + inline_const_cache: @inline_const_cache, + operands_unification: @operands_unification, + peephole_optimization: @peephole_optimization, + specialized_instruction: @specialized_instruction, + tailcall_optimization: @tailcall_optimization + } + end + + def frozen_string_literal! + @frozen_string_literal = true + end + + def frozen_string_literal? + @frozen_string_literal + end + + def inline_const_cache? + @inline_const_cache + end + + def operands_unification? + @operands_unification + end + + def peephole_optimization? + @peephole_optimization + end + + def specialized_instruction? + @specialized_instruction + end + + def tailcall_optimization? + @tailcall_optimization + end + end + + # This visitor is responsible for converting Syntax Tree nodes into their + # corresponding Ruby structures. This is used to convert the operands of + # some instructions like putobject that push a Ruby object directly onto + # the stack. It is only used when the entire structure can be represented + # at compile-time, as opposed to constructed at run-time. + class RubyVisitor < BasicVisitor + # This error is raised whenever a node cannot be converted into a Ruby + # object at compile-time. + class CompilationError < StandardError + end + + # This will attempt to compile the given node. If it's possible, then + # it will return the compiled object. Otherwise it will return nil. + def self.compile(node) + node.accept(new) + rescue CompilationError + end + + def visit_array(node) + node.contents ? visit_all(node.contents.parts) : [] + end + + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end + end + + def visit_float(node) + node.value.to_f + end + + alias visit_hash visit_bare_assoc_hash + + def visit_imaginary(node) + node.value.to_c + end + + def visit_int(node) + case (value = node.value) + when /^0b/ + value[2..].to_i(2) + when /^0o/ + value[2..].to_i(8) + when /^0d/ + value[2..].to_i + when /^0x/ + value[2..].to_i(16) + else + value.to_i + end + end + + def visit_label(node) + node.value.chomp(":").to_sym + end + + def visit_mrhs(node) + visit_all(node.parts) + end + + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_qwords(node) + visit_all(node.elements) + end + + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end + + def visit_rational(node) + node.value.to_r + end + + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + # This isn't actually a visit method, though maybe it should be. It is + # responsible for converting the set of string options on a regular + # expression into its equivalent integer. + def visit_regexp_literal_flags(node) + node + .options + .chars + .inject(0) do |accum, option| + accum | + case option + when "i" + Regexp::IGNORECASE + when "x" + Regexp::EXTENDED + when "m" + Regexp::MULTILINE + else + raise "Unknown regexp option: #{option}" + end + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_var_ref(node) + raise CompilationError unless node.value.is_a?(Kw) + + case node.value.value + when "nil" + nil + when "true" + true + when "false" + false + else + raise CompilationError + end + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_words(node) + visit_all(node.elements) + end + + def visit_unsupported(_node) + raise CompilationError + end + + # Please forgive the metaprogramming here. This is used to create visit + # methods for every node that we did not explicitly handle. By default + # each of these methods will raise a CompilationError. + handled = instance_methods(false) + (Visitor.instance_methods(false) - handled).each do |method| + alias_method method, :visit_unsupported + end + end + + # These options mirror the compilation options that we currently support + # that can be also passed to RubyVM::InstructionSequence.compile. + attr_reader :options + + # The current instruction sequence that is being compiled. + attr_reader :iseq + + # A boolean to track if we're currently compiling the last statement + # within a set of statements. This information is necessary to determine + # if we need to return the value of the last statement. + attr_reader :last_statement + + def initialize(options) + @options = options + @iseq = nil + @last_statement = false + end + + def visit_BEGIN(node) + visit(node.statements) + end + + def visit_CHAR(node) + if options.frozen_string_literal? + iseq.putobject(node.value[1..]) + else + iseq.putstring(node.value[1..]) + end + end + + def visit_END(node) + once_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + postexe_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + + *statements, last_statement = node.statements.body + visit_all(statements) + with_last_statement { visit(last_statement) } + + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.send( + YARV.calldata(:"core#set_postexe", 0, CallData::CALL_FCALL), + postexe_iseq + ) + iseq.leave + end + + iseq.once(once_iseq, iseq.inline_storage) + iseq.pop + end + + def visit_alias(node) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) + visit(node.left) + visit(node.right) + iseq.send(YARV.calldata(:"core#set_method_alias", 3)) + end + + def visit_aref(node) + calldata = YARV.calldata(:[], 1) + visit(node.collection) + + if !options.frozen_string_literal? && + options.specialized_instruction? && (node.index.parts.length == 1) + arg = node.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + iseq.opt_aref_with(string_part.value, calldata) + return + end + end + end + + visit(node.index) + iseq.send(calldata) + end + + def visit_arg_block(node) + visit(node.value) + end + + def visit_arg_paren(node) + visit(node.arguments) + end + + def visit_arg_star(node) + visit(node.value) + iseq.splatarray(false) + end + + def visit_args(node) + visit_all(node.parts) + end + + def visit_array(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + elsif node.contents && node.contents.parts.length == 1 && + node.contents.parts.first.is_a?(BareAssocHash) && + node.contents.parts.first.assocs.length == 1 && + node.contents.parts.first.assocs.first.is_a?(AssocSplat) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.newhash(0) + visit(node.contents.parts.first) + iseq.send(YARV.calldata(:"core#hash_merge_kwd", 2)) + iseq.newarraykwsplat(1) + else + length = 0 + + node.contents.parts.each do |part| + if part.is_a?(ArgStar) + if length > 0 + iseq.newarray(length) + length = 0 + end + + visit(part.value) + iseq.concatarray + else + visit(part) + length += 1 + end + end + + iseq.newarray(length) if length > 0 + iseq.concatarray if length > 0 && length != node.contents.parts.length + end + end + + def visit_aryptn(node) + end + + def visit_assign(node) + case node.target + when ARefField + calldata = YARV.calldata(:[]=, 2) + + if !options.frozen_string_literal? && + options.specialized_instruction? && + (node.target.index.parts.length == 1) + arg = node.target.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + visit(node.target.collection) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.opt_aset_with(string_part.value, calldata) + iseq.pop + return + end + end + end + + iseq.putnil + visit(node.target.collection) + visit(node.target.index) + visit(node.value) + iseq.setn(3) + iseq.send(calldata) + iseq.pop + when ConstPathField + names = constant_names(node.target) + name = names.pop + + if RUBY_VERSION >= "3.2" + iseq.opt_getconstant_path(names) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) + else + visit(node.value) + iseq.dup if last_statement? + iseq.opt_getconstant_path(names) + iseq.setconstant(name) + end + when Field + iseq.putnil + visit(node.target) + visit(node.value) + iseq.setn(2) + iseq.send(YARV.calldata(:"#{node.target.name.value}=", 1)) + iseq.pop + when TopConstField + name = node.target.constant.value.to_sym + + if RUBY_VERSION >= "3.2" + iseq.putobject(Object) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) + else + visit(node.value) + iseq.dup if last_statement? + iseq.putobject(Object) + iseq.setconstant(name) + end + when VarField + visit(node.value) + iseq.dup if last_statement? + + case node.target.value + when Const + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(node.target.value.value.to_sym) + when CVar + iseq.setclassvariable(node.target.value.value.to_sym) + when GVar + iseq.setglobal(node.target.value.value.to_sym) + when Ident + lookup = visit(node.target) + + if lookup.local.is_a?(LocalTable::BlockLocal) + iseq.setblockparam(lookup.index, lookup.level) + else + iseq.setlocal(lookup.index, lookup.level) + end + when IVar + iseq.setinstancevariable(node.target.value.value.to_sym) + end + end + end + + def visit_assoc(node) + visit(node.key) + visit(node.value) + end + + def visit_assoc_splat(node) + visit(node.value) + end + + def visit_backref(node) + iseq.getspecial(GetSpecial::SVAR_BACKREF, node.value[1..].to_i << 1) + end + + def visit_bare_assoc_hash(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duphash(compiled) + else + visit_all(node.assocs) + end + end + + def visit_begin(node) + end + + def visit_binary(node) + case node.operator + when :"&&" + done_label = iseq.label + + visit(node.left) + iseq.dup + iseq.branchunless(done_label) + + iseq.pop + visit(node.right) + iseq.push(done_label) + when :"||" + visit(node.left) + iseq.dup + + skip_right_label = iseq.label + iseq.branchif(skip_right_label) + iseq.pop + + visit(node.right) + iseq.push(skip_right_label) + else + visit(node.left) + visit(node.right) + iseq.send(YARV.calldata(node.operator, 1)) + end + end + + def visit_block(node) + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + visit(node.block_var) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + end + + def visit_block_var(node) + params = node.params + + if params.requireds.length == 1 && params.optionals.empty? && + !params.rest && params.posts.empty? && params.keywords.empty? && + !params.keyword_rest && !params.block + iseq.argument_options[:ambiguous_param0] = true + end + + visit(node.params) + + node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } + end + + def visit_blockarg(node) + iseq.argument_options[:block_start] = iseq.argument_size + iseq.local_table.block(node.name.value.to_sym) + iseq.argument_size += 1 + end + + def visit_bodystmt(node) + visit(node.statements) + end + + def visit_break(node) + end + + def visit_call(node) + if node.is_a?(CallNode) + return( + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + ) + end + + # Track whether or not this is a method call on a block proxy receiver. + # If it is, we can potentially do tailcall optimizations on it. + block_receiver = false + + if node.receiver + if node.receiver.is_a?(VarRef) + lookup = iseq.local_variable(node.receiver.value.value.to_sym) + + if lookup.local.is_a?(LocalTable::BlockLocal) + iseq.getblockparamproxy(lookup.index, lookup.level) + block_receiver = true + else + visit(node.receiver) + end + else + visit(node.receiver) + end + else + iseq.putself + end + + after_call_label = nil + if node.operator&.value == "&." + iseq.dup + after_call_label = iseq.label + iseq.branchnil(after_call_label) + end + + arg_parts = argument_parts(node.arguments) + argc = arg_parts.length + flag = 0 + + arg_parts.each do |arg_part| + case arg_part + when ArgBlock + argc -= 1 + flag |= CallData::CALL_ARGS_BLOCKARG + visit(arg_part) + when ArgStar + flag |= CallData::CALL_ARGS_SPLAT + visit(arg_part) + when ArgsForward + flag |= CallData::CALL_TAILCALL if options.tailcall_optimization? + + flag |= CallData::CALL_ARGS_SPLAT + lookup = iseq.local_table.find(:*) + iseq.getlocal(lookup.index, lookup.level) + iseq.splatarray(arg_parts.length != 1) + + flag |= CallData::CALL_ARGS_BLOCKARG + lookup = iseq.local_table.find(:&) + iseq.getblockparamproxy(lookup.index, lookup.level) + when BareAssocHash + flag |= CallData::CALL_KW_SPLAT + visit(arg_part) + else + visit(arg_part) + end + end + + block_iseq = visit(node.block) if node.block + + # If there's no block and we don't already have any special flags set, + # then we can safely call this simple arguments. Note that has to be the + # first flag we set after looking at the arguments to get the flags + # correct. + flag |= CallData::CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + + # If there's no receiver, then this is an "fcall". + flag |= CallData::CALL_FCALL if node.receiver.nil? + + # If we're calling a method on the passed block object and we have + # tailcall optimizations turned on, then we can set the tailcall flag. + if block_receiver && options.tailcall_optimization? + flag |= CallData::CALL_TAILCALL + end + + iseq.send( + YARV.calldata(node.message.value.to_sym, argc, flag), + block_iseq + ) + iseq.event(after_call_label) if after_call_label + end + + def visit_case(node) + visit(node.value) if node.value + + clauses = [] + else_clause = nil + current = node.consequent + + while current + clauses << current + + if (current = current.consequent).is_a?(Else) + else_clause = current + break + end + end + + branches = + clauses.map do |clause| + visit(clause.arguments) + iseq.topn(1) + iseq.send( + YARV.calldata( + :===, + 1, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + + label = iseq.label + iseq.branchif(label) + [clause, label] + end + + iseq.pop + else_clause ? visit(else_clause) : iseq.putnil + iseq.leave + + branches.each_with_index do |(clause, label), index| + iseq.leave if index != 0 + iseq.push(label) + iseq.pop + visit(clause) + end + end + + def visit_class(node) + name = node.constant.constant.value.to_sym + class_iseq = + with_child_iseq(iseq.class_child_iseq(name, node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + flags = DefineClass::TYPE_CLASS + + case node.constant + when ConstPathRef + flags |= DefineClass::FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + when TopConstRef + flags |= DefineClass::FLAG_SCOPED + iseq.putobject(Object) + end + + if node.superclass + flags |= DefineClass::FLAG_HAS_SUPERCLASS + visit(node.superclass) + else + iseq.putnil + end + + iseq.defineclass(name, class_iseq, flags) + end + + def visit_command(node) + visit_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_command_call(node) + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_const_path_field(node) + visit(node.parent) + end + + def visit_const_path_ref(node) + names = constant_names(node) + iseq.opt_getconstant_path(names) + end + + def visit_def(node) + name = node.name.value.to_sym + method_iseq = iseq.method_child_iseq(name.to_s, node.location) + + with_child_iseq(method_iseq) do + visit(node.params) if node.params + iseq.event(:RUBY_EVENT_CALL) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_RETURN) + iseq.leave + end + + if node.target + visit(node.target) + iseq.definesmethod(name, method_iseq) + else + iseq.definemethod(name, method_iseq) + end + + iseq.putobject(name) + end + + def visit_defined(node) + case node.value + when Assign + # If we're assigning to a local variable, then we need to make sure + # that we put it into the local table. + if node.value.target.is_a?(VarField) && + node.value.target.value.is_a?(Ident) + iseq.local_table.plain(node.value.target.value.value.to_sym) + end + + iseq.putobject("assignment") + when VarRef + value = node.value.value + name = value.value.to_sym + + case value + when Const + iseq.putnil + iseq.defined(Defined::TYPE_CONST, name, "constant") + when CVar + iseq.putnil + iseq.defined(Defined::TYPE_CVAR, name, "class variable") + when GVar + iseq.putnil + iseq.defined(Defined::TYPE_GVAR, name, "global-variable") + when Ident + iseq.putobject("local-variable") + when IVar + iseq.putnil + iseq.defined(Defined::TYPE_IVAR, name, "instance-variable") + when Kw + case name + when :false + iseq.putobject("false") + when :nil + iseq.putobject("nil") + when :self + iseq.putobject("self") + when :true + iseq.putobject("true") + end + end + when VCall + iseq.putself + + name = node.value.value.value.to_sym + iseq.defined(Defined::TYPE_FUNC, name, "method") + when YieldNode + iseq.putnil + iseq.defined(Defined::TYPE_YIELD, false, "yield") + when ZSuper + iseq.putnil + iseq.defined(Defined::TYPE_ZSUPER, false, "super") + else + iseq.putobject("expression") + end + end + + def visit_dyna_symbol(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + iseq.putobject(node.parts.first.value.to_sym) + end + end + + def visit_else(node) + visit(node.statements) + iseq.pop unless last_statement? + end + + def visit_elsif(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.statements, + consequent: node.consequent, + location: node.location + ) + ) + end + + def visit_ensure(node) + end + + def visit_field(node) + visit(node.parent) + end + + def visit_float(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_fndptn(node) + end + + def visit_for(node) + visit(node.collection) + + name = node.index.value.value.to_sym + iseq.local_table.plain(name) + + block_iseq = + with_child_iseq(iseq.block_child_iseq(node.statements.location)) do + iseq.argument_options[:lead_num] ||= 0 + iseq.argument_options[:lead_num] += 1 + iseq.argument_options[:ambiguous_param0] = true + + iseq.argument_size += 1 + iseq.local_table.plain(2) + + iseq.getlocal(0, 0) + + local_variable = iseq.local_variable(name) + iseq.setlocal(local_variable.index, local_variable.level) + + iseq.event(:RUBY_EVENT_B_CALL) + iseq.nop + + visit(node.statements) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.send(YARV.calldata(:each, 0, 0), block_iseq) + end + + def visit_hash(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duphash(compiled) + else + visit_all(node.assocs) + iseq.newhash(node.assocs.length * 2) + end + end + + def visit_hshptn(node) + end + + def visit_heredoc(node) + if node.beginning.value.end_with?("`") + visit_xstring_literal(node) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_if(node) + if node.predicate.is_a?(RangeNode) + true_label = iseq.label + false_label = iseq.label + end_label = iseq.label + + iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) + iseq.branchif(true_label) + + visit(node.predicate.left) + iseq.branchunless(end_label) + + iseq.putobject(true) + iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) + + iseq.push(true_label) + visit(node.predicate.right) + iseq.branchunless(false_label) + + iseq.putobject(false) + iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) + + iseq.push(false_label) + visit(node.statements) + iseq.leave + iseq.push(end_label) + iseq.putnil + else + consequent_label = iseq.label + + visit(node.predicate) + iseq.branchunless(consequent_label) + visit(node.statements) + + if last_statement? + iseq.leave + iseq.push(consequent_label) + node.consequent ? visit(node.consequent) : iseq.putnil + else + iseq.pop + + if node.consequent + done_label = iseq.label + iseq.jump(done_label) + iseq.push(consequent_label) + visit(node.consequent) + iseq.push(done_label) + else + iseq.push(consequent_label) + end + end + end + end + + def visit_if_op(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.truthy, + consequent: + Else.new( + keyword: Kw.new(value: "else", location: Location.default), + statements: node.falsy, + location: Location.default + ), + location: Location.default + ) + ) + end + + def visit_imaginary(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_int(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_kwrest_param(node) + iseq.argument_options[:kwrest] = iseq.argument_size + iseq.argument_size += 1 + iseq.local_table.plain(node.name.value.to_sym) + end + + def visit_label(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_lambda(node) + lambda_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + visit(node.params) + visit(node.statements) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.send(YARV.calldata(:lambda, 0, CallData::CALL_FCALL), lambda_iseq) + end + + def visit_lambda_var(node) + visit_block_var(node) + end + + def visit_massign(node) + visit(node.value) + iseq.dup + visit(node.target) + end + + def visit_method_add_block(node) + visit_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_mlhs(node) + lookups = [] + node.parts.each do |part| + case part + when VarField + lookups << visit(part) + end + end + + iseq.expandarray(lookups.length, 0) + lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } + end + + def visit_module(node) + name = node.constant.constant.value.to_sym + module_iseq = + with_child_iseq(iseq.module_child_iseq(name, node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + flags = DefineClass::TYPE_MODULE + + case node.constant + when ConstPathRef + flags |= DefineClass::FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + when TopConstRef + flags |= DefineClass::FLAG_SCOPED + iseq.putobject(Object) + end + + iseq.putnil + iseq.defineclass(name, module_iseq, flags) + end + + def visit_mrhs(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + visit_all(node.parts) + iseq.newarray(node.parts.length) + end + end + + def visit_next(node) + end + + def visit_not(node) + visit(node.statement) + iseq.send(YARV.calldata(:!)) + end + + def visit_opassign(node) + flag = CallData::CALL_ARGS_SIMPLE + if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) + flag |= CallData::CALL_FCALL + end + + case (operator = node.operator.value.chomp("=").to_sym) + when :"&&" + done_label = iseq.label + + with_opassign(node) do + iseq.dup + iseq.branchunless(done_label) + iseq.pop + visit(node.value) + end + + case node.target + when ARefField + iseq.leave + iseq.push(done_label) + iseq.setn(3) + iseq.adjuststack(3) + when ConstPathField, TopConstField + iseq.push(done_label) + iseq.swap + iseq.pop + else + iseq.push(done_label) + end + when :"||" + if node.target.is_a?(ConstPathField) || + node.target.is_a?(TopConstField) + opassign_defined(node) + iseq.swap + iseq.pop + elsif node.target.is_a?(VarField) && + [Const, CVar, GVar].include?(node.target.value.class) + opassign_defined(node) + else + skip_value_label = iseq.label + + with_opassign(node) do + iseq.dup + iseq.branchif(skip_value_label) + iseq.pop + visit(node.value) + end + + if node.target.is_a?(ARefField) + iseq.leave + iseq.push(skip_value_label) + iseq.setn(3) + iseq.adjuststack(3) + else + iseq.push(skip_value_label) + end + end + else + with_opassign(node) do + visit(node.value) + iseq.send(YARV.calldata(operator, 1, flag)) + end + end + end + + def visit_params(node) + if node.requireds.any? + iseq.argument_options[:lead_num] = 0 + + node.requireds.each do |required| + iseq.local_table.plain(required.value.to_sym) + iseq.argument_size += 1 + iseq.argument_options[:lead_num] += 1 + end + end + + node.optionals.each do |(optional, value)| + index = iseq.local_table.size + name = optional.value.to_sym + + iseq.local_table.plain(name) + iseq.argument_size += 1 + + unless iseq.argument_options.key?(:opt) + start_label = iseq.label + iseq.push(start_label) + iseq.argument_options[:opt] = [start_label] + end + + visit(value) + iseq.setlocal(index, 0) + + arg_given_label = iseq.label + iseq.push(arg_given_label) + iseq.argument_options[:opt] << arg_given_label + end + + visit(node.rest) if node.rest + + if node.posts.any? + iseq.argument_options[:post_start] = iseq.argument_size + iseq.argument_options[:post_num] = 0 + + node.posts.each do |post| + iseq.local_table.plain(post.value.to_sym) + iseq.argument_size += 1 + iseq.argument_options[:post_num] += 1 + end + end + + if node.keywords.any? + iseq.argument_options[:kwbits] = 0 + iseq.argument_options[:keyword] = [] + + keyword_bits_name = node.keyword_rest ? 3 : 2 + iseq.argument_size += 1 + keyword_bits_index = iseq.local_table.locals.size + node.keywords.size + + node.keywords.each_with_index do |(keyword, value), keyword_index| + name = keyword.value.chomp(":").to_sym + index = iseq.local_table.size + + iseq.local_table.plain(name) + iseq.argument_size += 1 + iseq.argument_options[:kwbits] += 1 + + if value.nil? + iseq.argument_options[:keyword] << name + elsif (compiled = RubyVisitor.compile(value)) + iseq.argument_options[:keyword] << [name, compiled] + else + skip_value_label = iseq.label + + iseq.argument_options[:keyword] << [name] + iseq.checkkeyword(keyword_bits_index, keyword_index) + iseq.branchif(skip_value_label) + visit(value) + iseq.setlocal(index, 0) + iseq.push(skip_value_label) + end + end + + iseq.local_table.plain(keyword_bits_name) + end + + if node.keyword_rest.is_a?(ArgsForward) + if RUBY_VERSION >= "3.2" + iseq.local_table.plain(:*) + iseq.local_table.plain(:&) + iseq.local_table.plain(:"...") + + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_options[:block_start] = iseq.argument_size + 1 + + iseq.argument_size += 2 + else + iseq.local_table.plain(:*) + iseq.local_table.plain(:&) + + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_options[:block_start] = iseq.argument_size + 1 + + iseq.argument_size += 2 + end + elsif node.keyword_rest + visit(node.keyword_rest) + end + + visit(node.block) if node.block + end + + def visit_paren(node) + visit(node.contents) + end + + def visit_pinned_begin(node) + end + + def visit_pinned_var_ref(node) + end + + def visit_program(node) + node.statements.body.each do |statement| + break unless statement.is_a?(Comment) + + if statement.value == "# frozen_string_literal: true" + options.frozen_string_literal! + end + end + + preexes = [] + statements = [] + + node.statements.body.each do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + # ignore + when BEGINBlock + preexes << statement + else + statements << statement + end + end + + top_iseq = + InstructionSequence.new( + :top, + "", + nil, + node.location, + options + ) + + with_child_iseq(top_iseq) do + visit_all(preexes) + + if statements.empty? + iseq.putnil + else + *statements, last_statement = statements + visit_all(statements) + with_last_statement { visit(last_statement) } + end + + iseq.leave + end + + top_iseq.compile! + top_iseq + end + + def visit_qsymbols(node) + iseq.duparray(node.accept(RubyVisitor.new)) + end + + def visit_qwords(node) + if options.frozen_string_literal? + iseq.duparray(node.accept(RubyVisitor.new)) + else + visit_all(node.elements) + iseq.newarray(node.elements.length) + end + end + + def visit_range(node) + if (compiled = RubyVisitor.compile(node)) + iseq.putobject(compiled) + else + visit(node.left) + visit(node.right) + iseq.newrange(node.operator.value == ".." ? 0 : 1) + end + end + + def visit_rassign(node) + iseq.putnil + + if node.operator.is_a?(Kw) + match_label = iseq.label + + visit(node.value) + iseq.dup + + visit_pattern(node.pattern, match_label) + + iseq.pop + iseq.pop + iseq.putobject(false) + iseq.leave + + iseq.push(match_label) + iseq.adjuststack(2) + iseq.putobject(true) + else + no_key_label = iseq.label + end_leave_label = iseq.label + end_label = iseq.label + + iseq.putnil + iseq.putobject(false) + iseq.putnil + iseq.putnil + visit(node.value) + iseq.dup + + visit_pattern(node.pattern, end_label) + + # First we're going to push the core onto the stack, then we'll check + # if the value to match is truthy. If it is, we'll jump down to raise + # NoMatchingPatternKeyError. Otherwise we'll raise + # NoMatchingPatternError. + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.topn(4) + iseq.branchif(no_key_label) + + # Here we're going to raise NoMatchingPatternError. + iseq.putobject(NoMatchingPatternError) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(YARV.calldata(:"core#sprintf", 3)) + iseq.send(YARV.calldata(:"core#raise", 2)) + iseq.jump(end_leave_label) + + # Here we're going to raise NoMatchingPatternKeyError. + iseq.push(no_key_label) + iseq.putobject(NoMatchingPatternKeyError) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(YARV.calldata(:"core#sprintf", 3)) + iseq.topn(7) + iseq.topn(9) + iseq.send( + YARV.calldata(:new, 1, CallData::CALL_KWARG, %i[matchee key]) + ) + iseq.send(YARV.calldata(:"core#raise", 1)) + + iseq.push(end_leave_label) + iseq.adjuststack(7) + iseq.putnil + iseq.leave + + iseq.push(end_label) + iseq.adjuststack(6) + iseq.putnil + end + end + + def visit_rational(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_redo(node) + end + + def visit_regexp_literal(node) + if (compiled = RubyVisitor.compile(node)) + iseq.putobject(compiled) + else + flags = RubyVisitor.new.visit_regexp_literal_flags(node) + length = visit_string_parts(node) + iseq.toregexp(flags, length) + end + end + + def visit_rescue(node) + end + + def visit_rescue_ex(node) + end + + def visit_rescue_mod(node) + end + + def visit_rest_param(node) + iseq.local_table.plain(node.name.value.to_sym) + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_size += 1 + end + + def visit_retry(node) + end + + def visit_return(node) + end + + def visit_sclass(node) + visit(node.target) + iseq.putnil + + singleton_iseq = + with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + iseq.defineclass( + :singletonclass, + singleton_iseq, + DefineClass::TYPE_SINGLETON_CLASS + ) + end + + def visit_statements(node) + statements = + node.body.select do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + false + else + true + end + end + + statements.empty? ? iseq.putnil : visit_all(statements) + end + + def visit_string_concat(node) + value = node.left.parts.first.value + node.right.parts.first.value + + visit_string_literal( + StringLiteral.new( + parts: [TStringContent.new(value: value, location: node.location)], + quote: node.left.quote, + location: node.location + ) + ) + end + + def visit_string_embexpr(node) + visit(node.statements) + end + + def visit_string_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_super(node) + iseq.putself + visit(node.arguments) + iseq.invokesuper( + YARV.calldata( + nil, + argument_parts(node.arguments).length, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER + ), + nil + ) + end + + def visit_symbol_literal(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_symbols(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + node.elements.each do |element| + if element.parts.length == 1 && + element.parts.first.is_a?(TStringContent) + iseq.putobject(element.parts.first.value.to_sym) + else + length = visit_string_parts(element) + iseq.concatstrings(length) + iseq.intern + end + end + + iseq.newarray(node.elements.length) + end + end + + def visit_top_const_ref(node) + iseq.opt_getconstant_path(constant_names(node)) + end + + def visit_tstring_content(node) + if options.frozen_string_literal? + iseq.putobject(node.accept(RubyVisitor.new)) + else + iseq.putstring(node.accept(RubyVisitor.new)) + end + end + + def visit_unary(node) + method_id = + case node.operator + when "+", "-" + "#{node.operator}@" + else + node.operator + end + + visit_call( + CommandCall.new( + receiver: node.statement, + operator: nil, + message: Ident.new(value: method_id, location: Location.default), + arguments: nil, + block: nil, + location: Location.default + ) + ) + end + + def visit_undef(node) + node.symbols.each_with_index do |symbol, index| + iseq.pop if index != 0 + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) + visit(symbol) + iseq.send(YARV.calldata(:"core#undef_method", 2)) + end + end + + def visit_unless(node) + statements_label = iseq.label + + visit(node.predicate) + iseq.branchunless(statements_label) + node.consequent ? visit(node.consequent) : iseq.putnil + + if last_statement? + iseq.leave + iseq.push(statements_label) + visit(node.statements) + else + iseq.pop + + if node.consequent + done_label = iseq.label + iseq.jump(done_label) + iseq.push(statements_label) + visit(node.consequent) + iseq.push(done_label) + else + iseq.push(statements_label) + end + end + end + + def visit_until(node) + predicate_label = iseq.label + statements_label = iseq.label + + iseq.jump(predicate_label) + iseq.putnil + iseq.pop + iseq.jump(predicate_label) + + iseq.push(statements_label) + visit(node.statements) + iseq.pop + + iseq.push(predicate_label) + visit(node.predicate) + iseq.branchunless(statements_label) + iseq.putnil if last_statement? + end + + def visit_var_field(node) + case node.value + when CVar, IVar + name = node.value.value.to_sym + iseq.inline_storage_for(name) + when Ident + name = node.value.value.to_sym + + if (local_variable = iseq.local_variable(name)) + local_variable + else + iseq.local_table.plain(name) + iseq.local_variable(name) + end + end + end + + def visit_var_ref(node) + case node.value + when Const + iseq.opt_getconstant_path(constant_names(node)) + when CVar + name = node.value.value.to_sym + iseq.getclassvariable(name) + when GVar + iseq.getglobal(node.value.value.to_sym) + when Ident + lookup = iseq.local_variable(node.value.value.to_sym) + + case lookup.local + when LocalTable::BlockLocal + iseq.getblockparam(lookup.index, lookup.level) + when LocalTable::PlainLocal + iseq.getlocal(lookup.index, lookup.level) + end + when IVar + name = node.value.value.to_sym + iseq.getinstancevariable(name) + when Kw + case node.value.value + when "false" + iseq.putobject(false) + when "nil" + iseq.putnil + when "self" + iseq.putself + when "true" + iseq.putobject(true) + end + end + end + + def visit_vcall(node) + iseq.putself + iseq.send( + YARV.calldata( + node.value.value.to_sym, + 0, + CallData::CALL_FCALL | CallData::CALL_VCALL | + CallData::CALL_ARGS_SIMPLE + ) + ) + end + + def visit_when(node) + visit(node.statements) + end + + def visit_while(node) + predicate_label = iseq.label + statements_label = iseq.label + + iseq.jump(predicate_label) + iseq.putnil + iseq.pop + iseq.jump(predicate_label) + + iseq.push(statements_label) + visit(node.statements) + iseq.pop + + iseq.push(predicate_label) + visit(node.predicate) + iseq.branchif(statements_label) + iseq.putnil if last_statement? + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_words(node) + if options.frozen_string_literal? && + (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + visit_all(node.elements) + iseq.newarray(node.elements.length) + end + end + + def visit_xstring_literal(node) + iseq.putself + length = visit_string_parts(node) + iseq.concatstrings(node.parts.length) if length > 1 + iseq.send( + YARV.calldata( + :`, + 1, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + end + + def visit_yield(node) + parts = argument_parts(node.arguments) + visit_all(parts) + iseq.invokeblock(YARV.calldata(nil, parts.length)) + end + + def visit_zsuper(_node) + iseq.putself + iseq.invokesuper( + YARV.calldata( + nil, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER | CallData::CALL_ZSUPER + ), + nil + ) + end + + private + + # This is a helper that is used in places where arguments may be present + # or they may be wrapped in parentheses. It's meant to descend down the + # tree and return an array of argument nodes. + def argument_parts(node) + case node + when nil + [] + when Args + node.parts + when ArgParen + if node.arguments.is_a?(ArgsForward) + [node.arguments] + else + node.arguments.parts + end + when Paren + node.contents.parts + end + end + + # Constant names when they are being assigned or referenced come in as a + # tree, but it's more convenient to work with them as an array. This + # method converts them into that array. This is nice because it's the + # operand that goes to opt_getconstant_path in Ruby 3.2. + def constant_names(node) + current = node + names = [] + + while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) + names.unshift(current.constant.value.to_sym) + current = current.parent + end + + case current + when VarField, VarRef + names.unshift(current.value.value.to_sym) + when TopConstRef + names.unshift(current.constant.value.to_sym) + names.unshift(:"") + end + + names + end + + # For the most part when an OpAssign (operator assignment) node with a ||= + # operator is being compiled it's a matter of reading the target, checking + # if the value should be evaluated, evaluating it if so, and then writing + # the result back to the target. + # + # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we + # first check if the value is defined using the defined instruction. I + # don't know why it is necessary, and suspect that it isn't. + def opassign_defined(node) + value_label = iseq.label + skip_value_label = iseq.label + + case node.target + when ConstPathField + visit(node.target.parent) + name = node.target.constant.value.to_sym + + iseq.dup + iseq.defined(Defined::TYPE_CONST_FROM, name, true) + when TopConstField + name = node.target.constant.value.to_sym + + iseq.putobject(Object) + iseq.dup + iseq.defined(Defined::TYPE_CONST_FROM, name, true) + when VarField + name = node.target.value.value.to_sym + iseq.putnil + + case node.target.value + when Const + iseq.defined(Defined::TYPE_CONST, name, true) + when CVar + iseq.defined(Defined::TYPE_CVAR, name, true) + when GVar + iseq.defined(Defined::TYPE_GVAR, name, true) + end + end + + iseq.branchunless(value_label) + + case node.target + when ConstPathField, TopConstField + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + when VarField + case node.target.value + when Const + iseq.opt_getconstant_path(constant_names(node.target)) + when CVar + iseq.getclassvariable(name) + when GVar + iseq.getglobal(name) + end + end + + iseq.dup + iseq.branchif(skip_value_label) + + iseq.pop + iseq.push(value_label) + visit(node.value) + + case node.target + when ConstPathField, TopConstField + iseq.dupn(2) + iseq.swap + iseq.setconstant(name) + when VarField + iseq.dup + + case node.target.value + when Const + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(name) + when CVar + iseq.setclassvariable(name) + when GVar + iseq.setglobal(name) + end + end + + iseq.push(skip_value_label) + end + + # Whenever a value is interpolated into a string-like structure, these + # three instructions are pushed. + def push_interpolate + iseq.dup + iseq.objtostring( + YARV.calldata( + :to_s, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + iseq.anytostring + end + + # Visit a type of pattern in a pattern match. + def visit_pattern(node, end_label) + case node + when AryPtn + length_label = iseq.label + match_failure_label = iseq.label + match_error_label = iseq.label + + # If there's a constant, then check if we match against that constant + # or not first. Branch to failure if we don't. + if node.constant + iseq.dup + visit(node.constant) + iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.branchunless(match_failure_label) + end + + # First, check if the #deconstruct cache is nil. If it is, we're going + # to call #deconstruct on the object and cache the result. + iseq.topn(2) + deconstruct_label = iseq.label + iseq.branchnil(deconstruct_label) + + # Next, ensure that the cached value was cached correctly, otherwise + # fail the match. + iseq.topn(2) + iseq.branchunless(match_failure_label) + + # Since we have a valid cached value, we can skip past the part where + # we call #deconstruct on the object. + iseq.pop + iseq.topn(1) + iseq.jump(length_label) + + # Check if the object responds to #deconstruct, fail the match + # otherwise. + iseq.event(deconstruct_label) + iseq.dup + iseq.putobject(:deconstruct) + iseq.send(YARV.calldata(:respond_to?, 1)) + iseq.setn(3) + iseq.branchunless(match_failure_label) + + # Call #deconstruct and ensure that it's an array, raise an error + # otherwise. + iseq.send(YARV.calldata(:deconstruct)) + iseq.setn(2) + iseq.dup + iseq.checktype(CheckType::TYPE_ARRAY) + iseq.branchunless(match_error_label) + + # Ensure that the deconstructed array has the correct size, fail the + # match otherwise. + iseq.push(length_label) + iseq.dup + iseq.send(YARV.calldata(:length)) + iseq.putobject(node.requireds.length) + iseq.send(YARV.calldata(:==, 1)) + iseq.branchunless(match_failure_label) + + # For each required element, check if the deconstructed array contains + # the element, otherwise jump out to the top-level match failure. + iseq.dup + node.requireds.each_with_index do |required, index| + iseq.putobject(index) + iseq.send(YARV.calldata(:[], 1)) + + case required + when VarField + lookup = visit(required) + iseq.setlocal(lookup.index, lookup.level) + else + visit(required) + iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.branchunless(match_failure_label) + end + + if index < node.requireds.length - 1 + iseq.dup + else + iseq.pop + iseq.jump(end_label) + end + end + + # Set up the routine here to raise an error to indicate that the type + # of the deconstructed array was incorrect. + iseq.push(match_error_label) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject(TypeError) + iseq.putobject("deconstruct must return Array") + iseq.send(YARV.calldata(:"core#raise", 2)) + iseq.pop + + # Patch all of the match failures to jump here so that we pop a final + # value before returning to the parent node. + iseq.push(match_failure_label) + iseq.pop + when VarField + lookup = visit(node) + iseq.setlocal(lookup.index, lookup.level) + iseq.jump(end_label) + end + end + + # There are a lot of nodes in the AST that act as contains of parts of + # strings. This includes things like string literals, regular expressions, + # heredocs, etc. This method will visit all the parts of a string within + # those containers. + def visit_string_parts(node) + length = 0 + + unless node.parts.first.is_a?(TStringContent) + iseq.putobject("") + length += 1 + end + + node.parts.each do |part| + case part + when StringDVar + visit(part.variable) + push_interpolate + when StringEmbExpr + visit(part) + push_interpolate + when TStringContent + iseq.putobject(part.accept(RubyVisitor.new)) + end + + length += 1 + end + + length + end + + # The current instruction sequence that we're compiling is always stored + # on the compiler. When we descend into a node that has its own + # instruction sequence, this method can be called to temporarily set the + # new value of the instruction sequence, yield, and then set it back. + def with_child_iseq(child_iseq) + parent_iseq = iseq + + begin + @iseq = child_iseq + yield + child_iseq + ensure + @iseq = parent_iseq + end + end + + # When we're compiling the last statement of a set of statements within a + # scope, the instructions sometimes change from pops to leaves. These + # kinds of peephole optimizations can reduce the overall number of + # instructions. Therefore, we keep track of whether we're compiling the + # last statement of a scope and allow visit methods to query that + # information. + def with_last_statement + previous = @last_statement + @last_statement = true + + begin + yield + ensure + @last_statement = previous + end + end + + def last_statement? + @last_statement + end + + # OpAssign nodes can have a number of different kinds of nodes as their + # "target" (i.e., the left-hand side of the assignment). When compiling + # these nodes we typically need to first fetch the current value of the + # variable, then perform some kind of action, then store the result back + # into the variable. This method handles that by first fetching the value, + # then yielding to the block, then storing the result. + def with_opassign(node) + case node.target + when ARefField + iseq.putnil + visit(node.target.collection) + visit(node.target.index) + + iseq.dupn(2) + iseq.send(YARV.calldata(:[], 1)) + + yield + + iseq.setn(3) + iseq.send(YARV.calldata(:[]=, 2)) + iseq.pop + when ConstPathField + name = node.target.constant.value.to_sym + + visit(node.target.parent) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + + yield + + if node.operator.value == "&&=" + iseq.dupn(2) + else + iseq.swap + iseq.topn(1) + end + + iseq.swap + iseq.setconstant(name) + when TopConstField + name = node.target.constant.value.to_sym + + iseq.putobject(Object) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + + yield + + if node.operator.value == "&&=" + iseq.dupn(2) + else + iseq.swap + iseq.topn(1) + end + + iseq.swap + iseq.setconstant(name) + when VarField + case node.target.value + when Const + names = constant_names(node.target) + iseq.opt_getconstant_path(names) + + yield + + iseq.dup + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(names.last) + when CVar + name = node.target.value.value.to_sym + iseq.getclassvariable(name) + + yield + + iseq.dup + iseq.setclassvariable(name) + when GVar + name = node.target.value.value.to_sym + iseq.getglobal(name) + + yield + + iseq.dup + iseq.setglobal(name) + when Ident + local_variable = visit(node.target) + iseq.getlocal(local_variable.index, local_variable.level) + + yield + + iseq.dup + iseq.setlocal(local_variable.index, local_variable.level) + when IVar + name = node.target.value.value.to_sym + iseq.getinstancevariable(name) + + yield + + iseq.dup + iseq.setinstancevariable(name) + end + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb new file mode 100644 index 00000000..a6a567fb --- /dev/null +++ b/lib/syntax_tree/yarv/decompiler.rb @@ -0,0 +1,254 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Decompiler + # When we're decompiling, we use a looped case statement to emulate + # jumping around in the same way the virtual machine would. This class + # provides convenience methods for generating the AST nodes that have to + # do with that label. + class BlockLabel + include DSL + attr_reader :name + + def initialize(name) + @name = name + end + + def field + VarField(Ident(name)) + end + + def ref + VarRef(Ident(name)) + end + end + + include DSL + attr_reader :iseq, :block_label + + def initialize(iseq) + @iseq = iseq + @block_label = BlockLabel.new("__block_label") + end + + def to_ruby + Program(decompile(iseq)) + end + + private + + def node_for(value) + case value + when Integer + Int(value.to_s) + when Symbol + SymbolLiteral(Ident(value.to_s)) + end + end + + def decompile(iseq) + label = :label_0 + clauses = {} + clause = [] + + iseq.insns.each do |insn| + case insn + when InstructionSequence::Label + unless clause.last.is_a?(Next) + clause << Assign(block_label.field, node_for(insn.name)) + end + + clauses[label] = clause + clause = [] + label = insn.name + when BranchUnless + body = [ + Assign(block_label.field, node_for(insn.label.name)), + Next(Args([])) + ] + + clause << IfNode(clause.pop, Statements(body), nil) + when Dup + clause << clause.last + when DupHash + assocs = + insn.object.map do |key, value| + Assoc(node_for(key), node_for(value)) + end + + clause << HashLiteral(LBrace("{"), assocs) + when GetGlobal + clause << VarRef(GVar(insn.name.to_s)) + when GetLocalWC0 + local = iseq.local_table.locals[insn.index] + clause << VarRef(Ident(local.name.to_s)) + when Jump + clause << Assign(block_label.field, node_for(insn.label.name)) + clause << Next(Args([])) + when Leave + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) + when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, + OptMinus, OptMod, OptMult, OptOr, OptPlus + left, right = clause.pop(2) + clause << Binary(left, insn.calldata.method, right) + when OptAref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when OptAset + collection, arg, value = clause.pop(3) + + clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && + collection === value.left.collection && + arg === value.left.index.parts[0] + OpAssign( + ARefField(collection, Args([arg])), + Op("#{value.operator}="), + value.right + ) + else + Assign(ARefField(collection, Args([arg])), value) + end + when OptNEq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when OptSendWithoutBlock + method = insn.calldata.method.to_s + argc = insn.calldata.argc + + if insn.calldata.flag?(CallData::CALL_FCALL) + if argc == 0 + clause.pop + clause << CallNode(nil, nil, Ident(method), Args([])) + elsif argc == 1 && method.end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign( + CallNode(nil, nil, Ident(method[0..-2]), nil), + argument + ) + else + _receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + nil, + nil, + Ident(method), + ArgParen(Args(arguments)) + ) + end + else + if argc == 0 + clause << CallNode(clause.pop, Period("."), Ident(method), nil) + elsif argc == 1 && method.end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign( + CallNode(receiver, Period("."), Ident(method[0..-2]), nil), + argument + ) + else + receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + receiver, + Period("."), + Ident(method), + ArgParen(Args(arguments)) + ) + end + end + when PutObject + case insn.object + when Float + clause << FloatLiteral(insn.object.inspect) + when Integer + clause << Int(insn.object.inspect) + else + raise "Unknown object type: #{insn.object.class.name}" + end + when PutObjectInt2Fix0 + clause << Int("0") + when PutObjectInt2Fix1 + clause << Int("1") + when PutSelf + clause << VarRef(Kw("self")) + when SetGlobal + target = GVar(insn.name.to_s) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + when SetLocalWC0 + target = Ident(local_name(insn.index, 0)) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + else + raise "Unknown instruction #{insn}" + end + end + + # If there's only one clause, then we don't need a case statement, and + # we can just disassemble the first clause. + clauses[label] = clause + return Statements(clauses.values.first) if clauses.size == 1 + + # Here we're going to build up a big case statement that will handle all + # of the different labels. + current = nil + clauses.reverse_each do |current_label, current_clause| + current = + When( + Args([node_for(current_label)]), + Statements(current_clause), + current + ) + end + switch = Case(Kw("case"), block_label.ref, current) + + # Here we're going to make sure that any locals that were established in + # the label_0 block are initialized so that scoping rules work + # correctly. + stack = [] + locals = [block_label.name] + + clauses[:label_0].each do |node| + if node.is_a?(Assign) && node.target.is_a?(VarField) && + node.target.value.is_a?(Ident) + value = node.target.value.value + next if locals.include?(value) + + stack << Assign(node.target, VarRef(Kw("nil"))) + locals << value + end + end + + # Finally, we'll set up the initial label and loop the entire case + # statement. + stack << Assign(block_label.field, node_for(:label_0)) + stack << MethodAddBlock( + CallNode(nil, nil, Ident("loop"), Args([])), + BlockNode( + Kw("do"), + nil, + BodyStmt(Statements([switch]), nil, nil, nil, nil) + ) + ) + Statements(stack) + end + + def local_name(index, level) + current = iseq + level.times { current = current.parent_iseq } + current.local_table.locals[index].name.to_s + end + end + end +end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb new file mode 100644 index 00000000..033b6d3d --- /dev/null +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -0,0 +1,211 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + class Disassembler + attr_reader :output, :queue + attr_reader :current_prefix, :current_iseq + + def initialize + @output = StringIO.new + @queue = [] + + @current_prefix = "" + @current_iseq = nil + end + + ######################################################################## + # Helpers for various instructions + ######################################################################## + + def calldata(value) + flag_names = [] + flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) + if value.flag?(CallData::CALL_ARGS_BLOCKARG) + flag_names << :ARGS_BLOCKARG + end + flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) + flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) + flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) + flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) + flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) + flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) + flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) + flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) + flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) + flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) + flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{value.method}" if value.method + parts << "argc:#{value.argc}" + parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg + parts << flag_names.join("|") if flag_names.any? + + "" + end + + def enqueue(iseq) + queue << iseq + end + + def event(name) + case name + when :RUBY_EVENT_B_CALL + "Bc" + when :RUBY_EVENT_B_RETURN + "Br" + when :RUBY_EVENT_CALL + "Ca" + when :RUBY_EVENT_CLASS + "Cl" + when :RUBY_EVENT_END + "En" + when :RUBY_EVENT_LINE + "Li" + when :RUBY_EVENT_RETURN + "Re" + else + raise "Unknown event: #{name}" + end + end + + def inline_storage(cache) + "" + end + + def instruction(name, operands = []) + operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")] + end + + def label(value) + value.name["label_".length..] + end + + def local(index, explicit: nil, implicit: nil) + current = current_iseq + (explicit || implicit).times { current = current.parent_iseq } + + value = "#{current.local_table.name_at(index)}@#{index}" + value << ", #{explicit}" if explicit + value + end + + def object(value) + value.inspect + end + + ######################################################################## + # Main entrypoint + ######################################################################## + + def format! + while (@current_iseq = queue.shift) + output << "\n" if output.pos > 0 + format_iseq(@current_iseq) + end + + output.string + end + + private + + def format_iseq(iseq) + output << "#{current_prefix}== disasm: " + output << "#:1 " + + location = iseq.location + output << "(#{location.start_line},#{location.start_column})-" + output << "(#{location.end_line},#{location.end_column})" + output << "> " + + if iseq.catch_table.any? + output << "(catch: TRUE)\n" + output << "#{current_prefix}== catch table\n" + + with_prefix("#{current_prefix}| ") do + iseq.catch_table.each do |entry| + case entry + when InstructionSequence::CatchBreak + output << "#{current_prefix}catch type: break\n" + format_iseq(entry.iseq) + when InstructionSequence::CatchNext + output << "#{current_prefix}catch type: next\n" + when InstructionSequence::CatchRedo + output << "#{current_prefix}catch type: redo\n" + when InstructionSequence::CatchRescue + output << "#{current_prefix}catch type: rescue\n" + format_iseq(entry.iseq) + end + end + end + + output << "#{current_prefix}|#{"-" * 72}\n" + else + output << "(catch: FALSE)\n" + end + + if (local_table = iseq.local_table) && !local_table.empty? + output << "#{current_prefix}local table (size: #{local_table.size})\n" + + locals = + local_table.locals.each_with_index.map do |local, index| + "[%2d] %s@%d" % [local_table.offset(index), local.name, index] + end + + output << "#{current_prefix}#{locals.join(" ")}\n" + end + + length = 0 + events = [] + lines = [] + + iseq.insns.each do |insn| + case insn + when Integer + lines << insn + when Symbol + events << event(insn) + when InstructionSequence::Label + # skip + else + output << "#{current_prefix}%04d " % length + + disasm = insn.disasm(self) + output << disasm + + if lines.any? + output << " " * (65 - disasm.length) if disasm.length < 65 + elsif events.any? + output << " " * (39 - disasm.length) if disasm.length < 39 + end + + if lines.any? + output << "(%4d)" % lines.last + lines.clear + end + + if events.any? + output << "[#{events.join}]" + events.clear + end + + output << "\n" + length += insn.length + end + end + end + + def with_prefix(value) + previous = @current_prefix + + begin + @current_prefix = value + yield + ensure + @current_prefix = previous + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb new file mode 100644 index 00000000..48305be6 --- /dev/null +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -0,0 +1,1171 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module provides an object representation of the YARV bytecode. + module YARV + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence + # When the list of instructions is first being created, it's stored as a + # linked list. This is to make it easier to perform peephole optimizations + # and other transformations like instruction specialization. + class InstructionList + class Node + attr_accessor :value, :next_node + + def initialize(value, next_node = nil) + @value = value + @next_node = next_node + end + end + + include Enumerable + attr_reader :head_node, :tail_node + + def initialize + @head_node = nil + @tail_node = nil + end + + def each + return to_enum(__method__) unless block_given? + each_node { |node| yield node.value } + end + + def each_node + return to_enum(__method__) unless block_given? + node = head_node + + while node + yield node, node.value + node = node.next_node + end + end + + def push(instruction) + node = Node.new(instruction) + + if head_node.nil? + @head_node = node + @tail_node = node + else + @tail_node.next_node = node + @tail_node = node + end + + node + end + end + + MAGIC = "YARVInstructionSequence/SimpleDataFormat" + + # This provides a handle to the rb_iseq_load function, which allows you to + # pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + ISEQ_LOAD = + begin + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + rescue NameError + end + + # This object is used to track the size of the stack at any given time. It + # is effectively a mini symbolic interpreter. It's necessary because when + # instruction sequences get serialized they include a :stack_max field on + # them. This field is used to determine how much stack space to allocate + # for the instruction sequence. + class Stack + attr_reader :current_size, :maximum_size + + def initialize + @current_size = 0 + @maximum_size = 0 + end + + def change_by(value) + @current_size += value + @maximum_size = @current_size if @current_size > @maximum_size + end + end + + # This represents the destination of instructions that jump. Initially it + # does not track its position so that when we perform optimizations the + # indices don't get messed up. + class Label + attr_reader :name + + # When we're serializing the instruction sequence, we need to be able to + # look up the label from the branch instructions and then access the + # subsequent node. So we'll store the reference here. + attr_accessor :node + + def initialize(name = nil) + @name = name + end + + def patch!(name) + @name = name + end + + def inspect + name.inspect + end + end + + # The type of the instruction sequence. + attr_reader :type + + # The name of the instruction sequence. + attr_reader :name + + # The parent instruction sequence, if there is one. + attr_reader :parent_iseq + + # The location of the root node of this instruction sequence. + attr_reader :location + + # This is the list of information about the arguments to this + # instruction sequence. + attr_accessor :argument_size + attr_reader :argument_options + + # The catch table for this instruction sequence. + attr_reader :catch_table + + # The list of instructions for this instruction sequence. + attr_reader :insns + + # The table of local variables. + attr_reader :local_table + + # The hash of names of instance and class variables pointing to the + # index of their associated inline storage. + attr_reader :inline_storages + + # The index of the next inline storage that will be created. + attr_reader :storage_index + + # An object that will track the current size of the stack and the + # maximum size of the stack for this instruction sequence. + attr_reader :stack + + # These are various compilation options provided. + attr_reader :options + + def initialize( + type, + name, + parent_iseq, + location, + options = Compiler::Options.new + ) + @type = type + @name = name + @parent_iseq = parent_iseq + @location = location + + @argument_size = 0 + @argument_options = {} + @catch_table = [] + + @local_table = LocalTable.new + @inline_storages = {} + @insns = InstructionList.new + @storage_index = 0 + @stack = Stack.new + + @options = options + end + + ########################################################################## + # Query methods + ########################################################################## + + def local_variable(name, level = 0) + if (lookup = local_table.find(name, level)) + lookup + elsif parent_iseq + parent_iseq.local_variable(name, level + 1) + end + end + + def inline_storage + storage = storage_index + @storage_index += 1 + storage + end + + def inline_storage_for(name) + inline_storages[name] = inline_storage unless inline_storages.key?(name) + + inline_storages[name] + end + + def length + insns + .each + .inject(0) do |sum, insn| + case insn + when Integer, Label, Symbol + sum + else + sum + insn.length + end + end + end + + def eval + raise "Unsupported platform" if ISEQ_LOAD.nil? + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval + end + + def to_a + versions = RUBY_VERSION.split(".").map(&:to_i) + + # Dump all of the instructions into a flat list. + dumped = + insns.map do |insn| + case insn + when Integer, Symbol + insn + when Label + insn.name + else + insn.to_a(self) + end + end + + dumped_options = argument_options.dup + dumped_options[:opt].map!(&:name) if dumped_options[:opt] + + # Next, return the instruction sequence as an array. + [ + MAGIC, + versions[0], + versions[1], + 1, + { + arg_size: argument_size, + local_size: local_table.size, + stack_max: stack.maximum_size, + node_id: -1, + node_ids: [-1] * insns.length + }, + name, + "", + "", + location.start_line, + type, + local_table.names, + dumped_options, + catch_table.map(&:to_a), + dumped + ] + end + + def disasm + disassembler = Disassembler.new + disassembler.enqueue(self) + disassembler.format! + end + + # This method converts our linked list of instructions into a final array + # and performs any other compilation steps necessary. + def compile! + specialize_instructions! if options.specialized_instruction? + + length = 0 + insns.each do |insn| + case insn + when Integer, Symbol + # skip + when Label + insn.patch!(:"label_#{length}") + when DefineClass + insn.class_iseq.compile! + length += insn.length + when DefineMethod, DefineSMethod + insn.method_iseq.compile! + length += insn.length + when InvokeSuper, Send + insn.block_iseq.compile! if insn.block_iseq + length += insn.length + when Once + insn.iseq.compile! + length += insn.length + else + length += insn.length + end + end + + @insns = insns.to_a + end + + def specialize_instructions! + insns.each_node do |node, value| + case value + when NewArray + next unless node.next_node + + next_node = node.next_node + next unless next_node.value.is_a?(Send) + next if next_node.value.block_iseq + + calldata = next_node.value.calldata + next unless calldata.flags == CallData::CALL_ARGS_SIMPLE + next unless calldata.argc == 0 + + case calldata.method + when :max + node.value = OptNewArrayMax.new(value.number) + node.next_node = next_node.next_node + when :min + node.value = OptNewArrayMin.new(value.number) + node.next_node = next_node.next_node + end + when PutObject, PutString + next unless node.next_node + next if value.is_a?(PutObject) && !value.object.is_a?(String) + + next_node = node.next_node + next unless next_node.value.is_a?(Send) + next if next_node.value.block_iseq + + calldata = next_node.value.calldata + next unless calldata.flags == CallData::CALL_ARGS_SIMPLE + next unless calldata.argc == 0 + + case calldata.method + when :freeze + node.value = OptStrFreeze.new(value.object, calldata) + node.next_node = next_node.next_node + when :-@ + node.value = OptStrUMinus.new(value.object, calldata) + node.next_node = next_node.next_node + end + when Send + calldata = value.calldata + + if !value.block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and + # the number of arguments. + node.value = + case [calldata.method, calldata.argc] + when [:length, 0] + OptLength.new(calldata) + when [:size, 0] + OptSize.new(calldata) + when [:empty?, 0] + OptEmptyP.new(calldata) + when [:nil?, 0] + OptNilP.new(calldata) + when [:succ, 0] + OptSucc.new(calldata) + when [:!, 0] + OptNot.new(calldata) + when [:+, 1] + OptPlus.new(calldata) + when [:-, 1] + OptMinus.new(calldata) + when [:*, 1] + OptMult.new(calldata) + when [:/, 1] + OptDiv.new(calldata) + when [:%, 1] + OptMod.new(calldata) + when [:==, 1] + OptEq.new(calldata) + when [:!=, 1] + OptNEq.new(YARV.calldata(:==, 1), calldata) + when [:=~, 1] + OptRegExpMatch2.new(calldata) + when [:<, 1] + OptLT.new(calldata) + when [:<=, 1] + OptLE.new(calldata) + when [:>, 1] + OptGT.new(calldata) + when [:>=, 1] + OptGE.new(calldata) + when [:<<, 1] + OptLTLT.new(calldata) + when [:[], 1] + OptAref.new(calldata) + when [:&, 1] + OptAnd.new(calldata) + when [:|, 1] + OptOr.new(calldata) + when [:[]=, 2] + OptAset.new(calldata) + else + OptSendWithoutBlock.new(calldata) + end + end + end + end + end + + ########################################################################## + # Child instruction sequence methods + ########################################################################## + + def child_iseq(type, name, location) + InstructionSequence.new(type, name, self, location, options) + end + + def block_child_iseq(location) + current = self + current = current.parent_iseq while current.type == :block + child_iseq(:block, "block in #{current.name}", location) + end + + def class_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def method_child_iseq(name, location) + child_iseq(:method, name, location) + end + + def module_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def singleton_class_child_iseq(location) + child_iseq(:class, "singleton class", location) + end + + ########################################################################## + # Catch table methods + ########################################################################## + + class CatchEntry + attr_reader :iseq, :begin_label, :end_label, :exit_label + + def initialize(iseq, begin_label, end_label, exit_label) + @iseq = iseq + @begin_label = begin_label + @end_label = end_label + @exit_label = exit_label + end + end + + class CatchBreak < CatchEntry + def to_a + [:break, iseq.to_a, begin_label.name, end_label.name, exit_label.name] + end + end + + class CatchNext < CatchEntry + def to_a + [:next, nil, begin_label.name, end_label.name, exit_label.name] + end + end + + class CatchRedo < CatchEntry + def to_a + [:redo, nil, begin_label.name, end_label.name, exit_label.name] + end + end + + class CatchRescue < CatchEntry + def to_a + [ + :rescue, + iseq.to_a, + begin_label.name, + end_label.name, + exit_label.name + ] + end + end + + class CatchRetry < CatchEntry + def to_a + [:retry, nil, begin_label.name, end_label.name, exit_label.name] + end + end + + def catch_break(iseq, begin_label, end_label, exit_label) + catch_table << CatchBreak.new(iseq, begin_label, end_label, exit_label) + end + + def catch_next(begin_label, end_label, exit_label) + catch_table << CatchNext.new(nil, begin_label, end_label, exit_label) + end + + def catch_redo(begin_label, end_label, exit_label) + catch_table << CatchRedo.new(nil, begin_label, end_label, exit_label) + end + + def catch_rescue(iseq, begin_label, end_label, exit_label) + catch_table << CatchRescue.new(iseq, begin_label, end_label, exit_label) + end + + def catch_retry(begin_label, end_label, exit_label) + catch_table << CatchRetry.new(nil, begin_label, end_label, exit_label) + end + + ########################################################################## + # Instruction push methods + ########################################################################## + + def label + Label.new + end + + def push(value) + node = insns.push(value) + + case value + when Array, Integer, Symbol + value + when Label + value.node = node + value + else + stack.change_by(-value.pops + value.pushes) + value + end + end + + def event(name) + push(name) + end + + def adjuststack(number) + push(AdjustStack.new(number)) + end + + def anytostring + push(AnyToString.new) + end + + def branchif(label) + push(BranchIf.new(label)) + end + + def branchnil(label) + push(BranchNil.new(label)) + end + + def branchunless(label) + push(BranchUnless.new(label)) + end + + def checkkeyword(keyword_bits_index, keyword_index) + push(CheckKeyword.new(keyword_bits_index, keyword_index)) + end + + def checkmatch(type) + push(CheckMatch.new(type)) + end + + def checktype(type) + push(CheckType.new(type)) + end + + def concatarray + push(ConcatArray.new) + end + + def concatstrings(number) + push(ConcatStrings.new(number)) + end + + def defined(type, name, message) + push(Defined.new(type, name, message)) + end + + def defineclass(name, class_iseq, flags) + push(DefineClass.new(name, class_iseq, flags)) + end + + def definemethod(name, method_iseq) + push(DefineMethod.new(name, method_iseq)) + end + + def definesmethod(name, method_iseq) + push(DefineSMethod.new(name, method_iseq)) + end + + def dup + push(Dup.new) + end + + def duparray(object) + push(DupArray.new(object)) + end + + def duphash(object) + push(DupHash.new(object)) + end + + def dupn(number) + push(DupN.new(number)) + end + + def expandarray(length, flags) + push(ExpandArray.new(length, flags)) + end + + def getblockparam(index, level) + push(GetBlockParam.new(index, level)) + end + + def getblockparamproxy(index, level) + push(GetBlockParamProxy.new(index, level)) + end + + def getclassvariable(name) + if RUBY_VERSION < "3.0" + push(Legacy::GetClassVariable.new(name)) + else + push(GetClassVariable.new(name, inline_storage_for(name))) + end + end + + def getconstant(name) + push(GetConstant.new(name)) + end + + def getglobal(name) + push(GetGlobal.new(name)) + end + + def getinstancevariable(name) + if RUBY_VERSION < "3.2" + push(GetInstanceVariable.new(name, inline_storage_for(name))) + else + push(GetInstanceVariable.new(name, inline_storage)) + end + end + + def getlocal(index, level) + if options.operands_unification? + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + push(GetLocalWC0.new(index)) + when 1 + push(GetLocalWC1.new(index)) + else + push(GetLocal.new(index, level)) + end + else + push(GetLocal.new(index, level)) + end + end + + def getspecial(key, type) + push(GetSpecial.new(key, type)) + end + + def intern + push(Intern.new) + end + + def invokeblock(calldata) + push(InvokeBlock.new(calldata)) + end + + def invokesuper(calldata, block_iseq) + push(InvokeSuper.new(calldata, block_iseq)) + end + + def jump(label) + push(Jump.new(label)) + end + + def leave + push(Leave.new) + end + + def newarray(number) + push(NewArray.new(number)) + end + + def newarraykwsplat(number) + push(NewArrayKwSplat.new(number)) + end + + def newhash(number) + push(NewHash.new(number)) + end + + def newrange(exclude_end) + push(NewRange.new(exclude_end)) + end + + def nop + push(Nop.new) + end + + def objtostring(calldata) + push(ObjToString.new(calldata)) + end + + def once(iseq, cache) + push(Once.new(iseq, cache)) + end + + def opt_aref_with(object, calldata) + push(OptArefWith.new(object, calldata)) + end + + def opt_aset_with(object, calldata) + push(OptAsetWith.new(object, calldata)) + end + + def opt_case_dispatch(case_dispatch_hash, else_label) + push(OptCaseDispatch.new(case_dispatch_hash, else_label)) + end + + def opt_getconstant_path(names) + if RUBY_VERSION < "3.2" || !options.inline_const_cache? + cache = nil + cache_filled_label = nil + + if options.inline_const_cache? + cache = inline_storage + cache_filled_label = label + opt_getinlinecache(cache_filled_label, cache) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + elsif names[0] == :"" + names.shift + putobject(Object) + else + putnil + end + + names.each_with_index do |name, index| + putobject(index == 0) + getconstant(name) + end + + if options.inline_const_cache? + opt_setinlinecache(cache) + push(cache_filled_label) + end + else + push(OptGetConstantPath.new(names)) + end + end + + def opt_getinlinecache(label, cache) + push(Legacy::OptGetInlineCache.new(label, cache)) + end + + def opt_setinlinecache(cache) + push(Legacy::OptSetInlineCache.new(cache)) + end + + def pop + push(Pop.new) + end + + def putnil + push(PutNil.new) + end + + def putobject(object) + if options.operands_unification? + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + push(PutObjectInt2Fix0.new) + elsif object.eql?(1) + push(PutObjectInt2Fix1.new) + else + push(PutObject.new(object)) + end + else + push(PutObject.new(object)) + end + end + + def putself + push(PutSelf.new) + end + + def putspecialobject(object) + push(PutSpecialObject.new(object)) + end + + def putstring(object) + push(PutString.new(object)) + end + + def send(calldata, block_iseq = nil) + push(Send.new(calldata, block_iseq)) + end + + def setblockparam(index, level) + push(SetBlockParam.new(index, level)) + end + + def setclassvariable(name) + if RUBY_VERSION < "3.0" + push(Legacy::SetClassVariable.new(name)) + else + push(SetClassVariable.new(name, inline_storage_for(name))) + end + end + + def setconstant(name) + push(SetConstant.new(name)) + end + + def setglobal(name) + push(SetGlobal.new(name)) + end + + def setinstancevariable(name) + if RUBY_VERSION < "3.2" + push(SetInstanceVariable.new(name, inline_storage_for(name))) + else + push(SetInstanceVariable.new(name, inline_storage)) + end + end + + def setlocal(index, level) + if options.operands_unification? + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + push(SetLocalWC0.new(index)) + when 1 + push(SetLocalWC1.new(index)) + else + push(SetLocal.new(index, level)) + end + else + push(SetLocal.new(index, level)) + end + end + + def setn(number) + push(SetN.new(number)) + end + + def setspecial(key) + push(SetSpecial.new(key)) + end + + def splatarray(flag) + push(SplatArray.new(flag)) + end + + def swap + push(Swap.new) + end + + def throw(type) + push(Throw.new(type)) + end + + def topn(number) + push(TopN.new(number)) + end + + def toregexp(options, length) + push(ToRegExp.new(options, length)) + end + + # This method will create a new instruction sequence from a serialized + # RubyVM::InstructionSequence object. + def self.from(source, options = Compiler::Options.new, parent_iseq = nil) + iseq = new(source[9], source[5], parent_iseq, Location.default, options) + + # set up the labels object so that the labels are shared between the + # location in the instruction sequence and the instructions that + # reference them + labels = Hash.new { |hash, name| hash[name] = Label.new(name) } + + # set up the correct argument size + iseq.argument_size = source[4][:arg_size] + + # set up all of the locals + source[10].each { |local| iseq.local_table.plain(local) } + + # set up the argument options + iseq.argument_options.merge!(source[11]) + if iseq.argument_options[:opt] + iseq.argument_options[:opt].map! { |opt| labels[opt] } + end + + # set up the catch table + source[12].each do |entry| + case entry[0] + when :break + iseq.catch_break( + from(entry[1]), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :next + iseq.catch_next( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :rescue + iseq.catch_rescue( + from(entry[1]), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :redo + iseq.catch_redo( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :retry + iseq.catch_retry( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + else + raise "unknown catch type: #{entry[0]}" + end + end + + # set up all of the instructions + source[13].each do |insn| + # add line numbers + if insn.is_a?(Integer) + iseq.push(insn) + next + end + + # add events and labels + if insn.is_a?(Symbol) + if insn.start_with?("label_") + iseq.push(labels[insn]) + else + iseq.push(insn) + end + next + end + + # add instructions, mapped to our own instruction classes + type, *opnds = insn + + case type + when :adjuststack + iseq.adjuststack(opnds[0]) + when :anytostring + iseq.anytostring + when :branchif + iseq.branchif(labels[opnds[0]]) + when :branchnil + iseq.branchnil(labels[opnds[0]]) + when :branchunless + iseq.branchunless(labels[opnds[0]]) + when :checkkeyword + iseq.checkkeyword(iseq.local_table.size - opnds[0] + 2, opnds[1]) + when :checkmatch + iseq.checkmatch(opnds[0]) + when :checktype + iseq.checktype(opnds[0]) + when :concatarray + iseq.concatarray + when :concatstrings + iseq.concatstrings(opnds[0]) + when :defineclass + iseq.defineclass(opnds[0], from(opnds[1], options, iseq), opnds[2]) + when :defined + iseq.defined(opnds[0], opnds[1], opnds[2]) + when :definemethod + iseq.definemethod(opnds[0], from(opnds[1], options, iseq)) + when :definesmethod + iseq.definesmethod(opnds[0], from(opnds[1], options, iseq)) + when :dup + iseq.dup + when :duparray + iseq.duparray(opnds[0]) + when :duphash + iseq.duphash(opnds[0]) + when :dupn + iseq.dupn(opnds[0]) + when :expandarray + iseq.expandarray(opnds[0], opnds[1]) + when :getblockparam, :getblockparamproxy, :getlocal, :getlocal_WC_0, + :getlocal_WC_1, :setblockparam, :setlocal, :setlocal_WC_0, + :setlocal_WC_1 + current = iseq + level = 0 + + case type + when :getlocal_WC_1, :setlocal_WC_1 + level = 1 + when :getblockparam, :getblockparamproxy, :getlocal, :setblockparam, + :setlocal + level = opnds[1] + end + + level.times { current = current.parent_iseq } + index = current.local_table.size - opnds[0] + 2 + + case type + when :getblockparam + iseq.getblockparam(index, level) + when :getblockparamproxy + iseq.getblockparamproxy(index, level) + when :getlocal, :getlocal_WC_0, :getlocal_WC_1 + iseq.getlocal(index, level) + when :setblockparam + iseq.setblockparam(index, level) + when :setlocal, :setlocal_WC_0, :setlocal_WC_1 + iseq.setlocal(index, level) + end + when :getclassvariable + iseq.push(GetClassVariable.new(opnds[0], opnds[1])) + when :getconstant + iseq.getconstant(opnds[0]) + when :getglobal + iseq.getglobal(opnds[0]) + when :getinstancevariable + iseq.push(GetInstanceVariable.new(opnds[0], opnds[1])) + when :getspecial + iseq.getspecial(opnds[0], opnds[1]) + when :intern + iseq.intern + when :invokeblock + iseq.invokeblock(CallData.from(opnds[0])) + when :invokesuper + block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil + iseq.invokesuper(CallData.from(opnds[0]), block_iseq) + when :jump + iseq.jump(labels[opnds[0]]) + when :leave + iseq.leave + when :newarray + iseq.newarray(opnds[0]) + when :newarraykwsplat + iseq.newarraykwsplat(opnds[0]) + when :newhash + iseq.newhash(opnds[0]) + when :newrange + iseq.newrange(opnds[0]) + when :nop + iseq.nop + when :objtostring + iseq.objtostring(CallData.from(opnds[0])) + when :once + iseq.once(from(opnds[0], options, iseq), opnds[1]) + when :opt_and, :opt_aref, :opt_aset, :opt_div, :opt_empty_p, :opt_eq, + :opt_ge, :opt_gt, :opt_le, :opt_length, :opt_lt, :opt_ltlt, + :opt_minus, :opt_mod, :opt_mult, :opt_nil_p, :opt_not, :opt_or, + :opt_plus, :opt_regexpmatch2, :opt_send_without_block, :opt_size, + :opt_succ + iseq.send(CallData.from(opnds[0]), nil) + when :opt_aref_with + iseq.opt_aref_with(opnds[0], CallData.from(opnds[1])) + when :opt_aset_with + iseq.opt_aset_with(opnds[0], CallData.from(opnds[1])) + when :opt_case_dispatch + hash = + opnds[0] + .each_slice(2) + .to_h + .transform_values { |value| labels[value] } + iseq.opt_case_dispatch(hash, labels[opnds[1]]) + when :opt_getconstant_path + iseq.opt_getconstant_path(opnds[0]) + when :opt_getinlinecache + iseq.opt_getinlinecache(labels[opnds[0]], opnds[1]) + when :opt_newarray_max + iseq.newarray(opnds[0]) + iseq.send(YARV.calldata(:max)) + when :opt_newarray_min + iseq.newarray(opnds[0]) + iseq.send(YARV.calldata(:min)) + when :opt_neq + iseq.push( + OptNEq.new(CallData.from(opnds[0]), CallData.from(opnds[1])) + ) + when :opt_setinlinecache + iseq.opt_setinlinecache(opnds[0]) + when :opt_str_freeze + iseq.putstring(opnds[0]) + iseq.send(YARV.calldata(:freeze)) + when :opt_str_uminus + iseq.putstring(opnds[0]) + iseq.send(YARV.calldata(:-@)) + when :pop + iseq.pop + when :putnil + iseq.putnil + when :putobject + iseq.putobject(opnds[0]) + when :putobject_INT2FIX_0_ + iseq.putobject(0) + when :putobject_INT2FIX_1_ + iseq.putobject(1) + when :putself + iseq.putself + when :putstring + iseq.putstring(opnds[0]) + when :putspecialobject + iseq.putspecialobject(opnds[0]) + when :send + block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil + iseq.send(CallData.from(opnds[0]), block_iseq) + when :setclassvariable + iseq.push(SetClassVariable.new(opnds[0], opnds[1])) + when :setconstant + iseq.setconstant(opnds[0]) + when :setglobal + iseq.setglobal(opnds[0]) + when :setinstancevariable + iseq.push(SetInstanceVariable.new(opnds[0], opnds[1])) + when :setn + iseq.setn(opnds[0]) + when :setspecial + iseq.setspecial(opnds[0]) + when :splatarray + iseq.splatarray(opnds[0]) + when :swap + iseq.swap + when :throw + iseq.throw(opnds[0]) + when :topn + iseq.topn(opnds[0]) + when :toregexp + iseq.toregexp(opnds[0], opnds[1]) + else + raise "Unknown instruction type: #{type}" + end + end + + iseq.compile! if iseq.type == :top + iseq + end + end + end +end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb new file mode 100644 index 00000000..288edb16 --- /dev/null +++ b/lib/syntax_tree/yarv/instructions.rb @@ -0,0 +1,5203 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This is an operand to various YARV instructions that represents the + # information about a specific call site. + class CallData + CALL_ARGS_SPLAT = 1 << 0 + CALL_ARGS_BLOCKARG = 1 << 1 + CALL_FCALL = 1 << 2 + CALL_VCALL = 1 << 3 + CALL_ARGS_SIMPLE = 1 << 4 + CALL_BLOCKISEQ = 1 << 5 + CALL_KWARG = 1 << 6 + CALL_KW_SPLAT = 1 << 7 + CALL_TAILCALL = 1 << 8 + CALL_SUPER = 1 << 9 + CALL_ZSUPER = 1 << 10 + CALL_OPT_SEND = 1 << 11 + CALL_KW_SPLAT_MUT = 1 << 12 + + attr_reader :method, :argc, :flags, :kw_arg + + def initialize( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + @method = method + @argc = argc + @flags = flags + @kw_arg = kw_arg + end + + def flag?(mask) + (flags & mask) > 0 + end + + def to_h + result = { mid: method, flag: flags, orig_argc: argc } + result[:kw_arg] = kw_arg if kw_arg + result + end + + def self.from(serialized) + new( + serialized[:mid], + serialized[:orig_argc], + serialized[:flag], + serialized[:kw_arg] + ) + end + end + + # A convenience method for creating a CallData object. + def self.calldata( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + CallData.new(method, argc, flags, kw_arg) + end + + # ### Summary + # + # `adjuststack` accepts a single integer argument and removes that many + # elements from the top of the stack. + # + # ### Usage + # + # ~~~ruby + # x = [true] + # x[0] ||= nil + # x[0] + # ~~~ + # + class AdjustStack + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("adjuststack", [fmt.object(number)]) + end + + def to_a(_iseq) + [:adjuststack, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + vm.pop(number) + end + end + + # ### Summary + # + # `anytostring` ensures that the value on top of the stack is a string. + # + # It pops two values off the stack. If the first value is a string it + # pushes it back on the stack. If the first value is not a string, it uses + # Ruby's built in string coercion to coerce the second value to a string + # and then pushes that back on the stack. + # + # This is used in conjunction with `objtostring` as a fallback for when an + # object's `to_s` method does not return a string. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class AnyToString + def disasm(fmt) + fmt.instruction("anytostring") + end + + def to_a(_iseq) + [:anytostring] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + original, value = vm.pop(2) + + if value.is_a?(String) + vm.push(value) + else + vm.push("#<#{original.class.name}:0000>") + end + end + end + + # ### Summary + # + # `branchif` has one argument: the jump index. It pops one value off the + # stack: the jump condition. + # + # If the value popped off the stack is true, `branchif` jumps to + # the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # x = true + # x ||= "foo" + # puts x + # ~~~ + # + class BranchIf + attr_reader :label + + def initialize(label) + @label = label + end + + def disasm(fmt) + fmt.instruction("branchif", [fmt.label(label)]) + end + + def to_a(_iseq) + [:branchif, label.name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + vm.jump(label) if vm.pop + end + end + + # ### Summary + # + # `branchnil` has one argument: the jump index. It pops one value off the + # stack: the jump condition. + # + # If the value popped off the stack is nil, `branchnil` jumps to + # the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # x = nil + # if x&.to_s + # puts "hi" + # end + # ~~~ + # + class BranchNil + attr_reader :label + + def initialize(label) + @label = label + end + + def disasm(fmt) + fmt.instruction("branchnil", [fmt.label(label)]) + end + + def to_a(_iseq) + [:branchnil, label.name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + vm.jump(label) if vm.pop.nil? + end + end + + # ### Summary + # + # `branchunless` has one argument: the jump index. It pops one value off + # the stack: the jump condition. + # + # If the value popped off the stack is false or nil, `branchunless` jumps + # to the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # if 2 + 3 + # puts "foo" + # end + # ~~~ + # + class BranchUnless + attr_reader :label + + def initialize(label) + @label = label + end + + def disasm(fmt) + fmt.instruction("branchunless", [fmt.label(label)]) + end + + def to_a(_iseq) + [:branchunless, label.name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + vm.jump(label) unless vm.pop + end + end + + # ### Summary + # + # `checkkeyword` checks if a keyword was passed at the callsite that + # called into the method represented by the instruction sequence. It has + # two arguments: the index of the local variable that stores the keywords + # metadata and the index of the keyword within that metadata. It pushes + # a boolean onto the stack indicating whether or not the keyword was + # given. + # + # ### Usage + # + # ~~~ruby + # def evaluate(value: rand) + # value + # end + # + # evaluate(value: 3) + # ~~~ + # + class CheckKeyword + attr_reader :keyword_bits_index, :keyword_index + + def initialize(keyword_bits_index, keyword_index) + @keyword_bits_index = keyword_bits_index + @keyword_index = keyword_index + end + + def disasm(fmt) + fmt.instruction( + "checkkeyword", + [fmt.object(keyword_bits_index), fmt.object(keyword_index)] + ) + end + + def to_a(iseq) + [ + :checkkeyword, + iseq.local_table.offset(keyword_bits_index), + keyword_index + ] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(keyword_bits_index, 0)[keyword_index]) + end + end + + # ### Summary + # + # `checkmatch` checks if the current pattern matches the current value. It + # pops the target and the pattern off the stack and pushes a boolean onto + # the stack if it matches or not. + # + # ### Usage + # + # ~~~ruby + # foo in Foo + # ~~~ + # + class CheckMatch + TYPE_WHEN = 1 + TYPE_CASE = 2 + TYPE_RESCUE = 3 + + attr_reader :type + + def initialize(type) + @type = type + end + + def disasm(fmt) + fmt.instruction("checkmatch", [fmt.object(type)]) + end + + def to_a(_iseq) + [:checkmatch, type] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + raise NotImplementedError, "checkmatch" + end + end + + # ### Summary + # + # `checktype` checks if the value on top of the stack is of a certain type. + # The type is the only argument. It pops the value off the stack and pushes + # a boolean onto the stack indicating whether or not the value is of the + # given type. + # + # ### Usage + # + # ~~~ruby + # foo in [bar] + # ~~~ + # + class CheckType + TYPE_OBJECT = 0x01 + TYPE_CLASS = 0x02 + TYPE_MODULE = 0x03 + TYPE_FLOAT = 0x04 + TYPE_STRING = 0x05 + TYPE_REGEXP = 0x06 + TYPE_ARRAY = 0x07 + TYPE_HASH = 0x08 + TYPE_STRUCT = 0x09 + TYPE_BIGNUM = 0x0a + TYPE_FILE = 0x0b + TYPE_DATA = 0x0c + TYPE_MATCH = 0x0d + TYPE_COMPLEX = 0x0e + TYPE_RATIONAL = 0x0f + TYPE_NIL = 0x11 + TYPE_TRUE = 0x12 + TYPE_FALSE = 0x13 + TYPE_SYMBOL = 0x14 + TYPE_FIXNUM = 0x15 + TYPE_UNDEF = 0x16 + + attr_reader :type + + def initialize(type) + @type = type + end + + def disasm(fmt) + name = + case type + when TYPE_OBJECT + "T_OBJECT" + when TYPE_CLASS + "T_CLASS" + when TYPE_MODULE + "T_MODULE" + when TYPE_FLOAT + "T_FLOAT" + when TYPE_STRING + "T_STRING" + when TYPE_REGEXP + "T_REGEXP" + when TYPE_ARRAY + "T_ARRAY" + when TYPE_HASH + "T_HASH" + when TYPE_STRUCT + "T_STRUCT" + when TYPE_BIGNUM + "T_BIGNUM" + when TYPE_FILE + "T_FILE" + when TYPE_DATA + "T_DATA" + when TYPE_MATCH + "T_MATCH" + when TYPE_COMPLEX + "T_COMPLEX" + when TYPE_RATIONAL + "T_RATIONAL" + when TYPE_NIL + "T_NIL" + when TYPE_TRUE + "T_TRUE" + when TYPE_FALSE + "T_FALSE" + when TYPE_SYMBOL + "T_SYMBOL" + when TYPE_FIXNUM + "T_FIXNUM" + when TYPE_UNDEF + "T_UNDEF" + end + + fmt.instruction("checktype", [name]) + end + + def to_a(_iseq) + [:checktype, type] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + # TODO: This is incorrect. The instruction only pushes a single value + # onto the stack. However, if this is set to 1, we no longer match the + # output of RubyVM::InstructionSequence. So leaving this here until we + # can investigate further. + 2 + end + + def canonical + self + end + + def call(vm) + object = vm.pop + result = + case type + when TYPE_OBJECT + raise NotImplementedError, "checktype TYPE_OBJECT" + when TYPE_CLASS + object.is_a?(Class) + when TYPE_MODULE + object.is_a?(Module) + when TYPE_FLOAT + object.is_a?(Float) + when TYPE_STRING + object.is_a?(String) + when TYPE_REGEXP + object.is_a?(Regexp) + when TYPE_ARRAY + object.is_a?(Array) + when TYPE_HASH + object.is_a?(Hash) + when TYPE_STRUCT + object.is_a?(Struct) + when TYPE_BIGNUM + raise NotImplementedError, "checktype TYPE_BIGNUM" + when TYPE_FILE + object.is_a?(File) + when TYPE_DATA + raise NotImplementedError, "checktype TYPE_DATA" + when TYPE_MATCH + raise NotImplementedError, "checktype TYPE_MATCH" + when TYPE_COMPLEX + object.is_a?(Complex) + when TYPE_RATIONAL + object.is_a?(Rational) + when TYPE_NIL + object.nil? + when TYPE_TRUE + object == true + when TYPE_FALSE + object == false + when TYPE_SYMBOL + object.is_a?(Symbol) + when TYPE_FIXNUM + object.is_a?(Integer) + when TYPE_UNDEF + raise NotImplementedError, "checktype TYPE_UNDEF" + end + + vm.push(result) + end + end + + # ### Summary + # + # `concatarray` concatenates the two Arrays on top of the stack. + # + # It coerces the two objects at the top of the stack into Arrays by + # calling `to_a` if necessary, and makes sure to `dup` the first Array if + # it was already an Array, to avoid mutating it when concatenating. + # + # ### Usage + # + # ~~~ruby + # [1, *2] + # ~~~ + # + class ConcatArray + def disasm(fmt) + fmt.instruction("concatarray") + end + + def to_a(_iseq) + [:concatarray] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + left, right = vm.pop(2) + vm.push([*left, *right]) + end + end + + # ### Summary + # + # `concatstrings` pops a number of strings from the stack joins them + # together into a single string and pushes that string back on the stack. + # + # This does no coercion and so is always used in conjunction with + # `objtostring` and `anytostring` to ensure the stack contents are always + # strings. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class ConcatStrings + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("concatstrings", [fmt.object(number)]) + end + + def to_a(_iseq) + [:concatstrings, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).join) + end + end + + # ### Summary + # + # `defineclass` defines a class. First it pops the superclass off the + # stack, then it pops the object off the stack that the class should be + # defined under. It has three arguments: the name of the constant, the + # instruction sequence associated with the class, and various flags that + # indicate if it is a singleton class, a module, or a regular class. + # + # ### Usage + # + # ~~~ruby + # class Foo + # end + # ~~~ + # + class DefineClass + TYPE_CLASS = 0 + TYPE_SINGLETON_CLASS = 1 + TYPE_MODULE = 2 + FLAG_SCOPED = 8 + FLAG_HAS_SUPERCLASS = 16 + + attr_reader :name, :class_iseq, :flags + + def initialize(name, class_iseq, flags) + @name = name + @class_iseq = class_iseq + @flags = flags + end + + def disasm(fmt) + fmt.enqueue(class_iseq) + fmt.instruction( + "defineclass", + [fmt.object(name), class_iseq.name, fmt.object(flags)] + ) + end + + def to_a(_iseq) + [:defineclass, name, class_iseq.to_a, flags] + end + + def length + 4 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + object, superclass = vm.pop(2) + iseq = class_iseq + + clazz = Class.new(superclass || Object) + vm.push(vm.run_class_frame(iseq, clazz)) + + object.const_set(name, clazz) + end + end + + # ### Summary + # + # `defined` checks if the top value of the stack is defined. If it is, it + # pushes its value onto the stack. Otherwise it pushes `nil`. + # + # ### Usage + # + # ~~~ruby + # defined?(x) + # ~~~ + # + class Defined + TYPE_NIL = 1 + TYPE_IVAR = 2 + TYPE_LVAR = 3 + TYPE_GVAR = 4 + TYPE_CVAR = 5 + TYPE_CONST = 6 + TYPE_METHOD = 7 + TYPE_YIELD = 8 + TYPE_ZSUPER = 9 + TYPE_SELF = 10 + TYPE_TRUE = 11 + TYPE_FALSE = 12 + TYPE_ASGN = 13 + TYPE_EXPR = 14 + TYPE_REF = 15 + TYPE_FUNC = 16 + TYPE_CONST_FROM = 17 + + attr_reader :type, :name, :message + + def initialize(type, name, message) + @type = type + @name = name + @message = message + end + + def disasm(fmt) + type_name = + case type + when TYPE_NIL + "nil" + when TYPE_IVAR + "ivar" + when TYPE_LVAR + "lvar" + when TYPE_GVAR + "gvar" + when TYPE_CVAR + "cvar" + when TYPE_CONST + "const" + when TYPE_METHOD + "method" + when TYPE_YIELD + "yield" + when TYPE_ZSUPER + "zsuper" + when TYPE_SELF + "self" + when TYPE_TRUE + "true" + when TYPE_FALSE + "false" + when TYPE_ASGN + "asgn" + when TYPE_EXPR + "expr" + when TYPE_REF + "ref" + when TYPE_FUNC + "func" + when TYPE_CONST_FROM + "constant-from" + end + + fmt.instruction( + "defined", + [type_name, fmt.object(name), fmt.object(message)] + ) + end + + def to_a(_iseq) + [:defined, type, name, message] + end + + def length + 4 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + object = vm.pop + + result = + case type + when TYPE_NIL, TYPE_SELF, TYPE_TRUE, TYPE_FALSE, TYPE_ASGN, TYPE_EXPR + message + when TYPE_IVAR + message if vm._self.instance_variable_defined?(name) + when TYPE_LVAR + raise NotImplementedError, "defined TYPE_LVAR" + when TYPE_GVAR + message if global_variables.include?(name) + when TYPE_CVAR + clazz = vm._self + clazz = clazz.singleton_class unless clazz.is_a?(Module) + message if clazz.class_variable_defined?(name) + when TYPE_CONST + raise NotImplementedError, "defined TYPE_CONST" + when TYPE_METHOD + raise NotImplementedError, "defined TYPE_METHOD" + when TYPE_YIELD + raise NotImplementedError, "defined TYPE_YIELD" + when TYPE_ZSUPER + raise NotImplementedError, "defined TYPE_ZSUPER" + when TYPE_REF + raise NotImplementedError, "defined TYPE_REF" + when TYPE_FUNC + message if object.respond_to?(name, true) + when TYPE_CONST_FROM + raise NotImplementedError, "defined TYPE_CONST_FROM" + end + + vm.push(result) + end + end + + # ### Summary + # + # `definemethod` defines a method on the class of the current value of + # `self`. It accepts two arguments. The first is the name of the method + # being defined. The second is the instruction sequence representing the + # body of the method. + # + # ### Usage + # + # ~~~ruby + # def value = "value" + # ~~~ + # + class DefineMethod + attr_reader :method_name, :method_iseq + + def initialize(method_name, method_iseq) + @method_name = method_name + @method_iseq = method_iseq + end + + def disasm(fmt) + fmt.enqueue(method_iseq) + fmt.instruction( + "definemethod", + [fmt.object(method_name), method_iseq.name] + ) + end + + def to_a(_iseq) + [:definemethod, method_name, method_iseq.to_a] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + name = method_name + iseq = method_iseq + + vm + ._self + .__send__(:define_method, name) do |*args, **kwargs, &block| + vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + end + end + end + + # ### Summary + # + # `definesmethod` defines a method on the singleton class of the current + # value of `self`. It accepts two arguments. The first is the name of the + # method being defined. The second is the instruction sequence representing + # the body of the method. It pops the object off the stack that the method + # should be defined on. + # + # ### Usage + # + # ~~~ruby + # def self.value = "value" + # ~~~ + # + class DefineSMethod + attr_reader :method_name, :method_iseq + + def initialize(method_name, method_iseq) + @method_name = method_name + @method_iseq = method_iseq + end + + def disasm(fmt) + fmt.enqueue(method_iseq) + fmt.instruction( + "definesmethod", + [fmt.object(method_name), method_iseq.name] + ) + end + + def to_a(_iseq) + [:definesmethod, method_name, method_iseq.to_a] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + name = method_name + iseq = method_iseq + + vm + ._self + .__send__(:define_singleton_method, name) do |*args, **kwargs, &block| + vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + end + end + end + + # ### Summary + # + # `dup` copies the top value of the stack and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # $global = 5 + # ~~~ + # + class Dup + def disasm(fmt) + fmt.instruction("dup") + end + + def to_a(_iseq) + [:dup] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 2 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.stack.last.dup) + end + end + + # ### Summary + # + # `duparray` dups an Array literal and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # [true] + # ~~~ + # + class DupArray + attr_reader :object + + def initialize(object) + @object = object + end + + def disasm(fmt) + fmt.instruction("duparray", [fmt.object(object)]) + end + + def to_a(_iseq) + [:duparray, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(object.dup) + end + end + + # ### Summary + # + # `duphash` dups a Hash literal and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # { a: 1 } + # ~~~ + # + class DupHash + attr_reader :object + + def initialize(object) + @object = object + end + + def disasm(fmt) + fmt.instruction("duphash", [fmt.object(object)]) + end + + def to_a(_iseq) + [:duphash, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(object.dup) + end + end + + # ### Summary + # + # `dupn` duplicates the top `n` stack elements. + # + # ### Usage + # + # ~~~ruby + # Object::X ||= true + # ~~~ + # + class DupN + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("dupn", [fmt.object(number)]) + end + + def to_a(_iseq) + [:dupn, number] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + number + end + + def canonical + self + end + + def call(vm) + values = vm.pop(number) + vm.push(*values) + vm.push(*values) + end + end + + # ### Summary + # + # `expandarray` looks at the top of the stack, and if the value is an array + # it replaces it on the stack with `number` elements of the array, or `nil` + # if the elements are missing. + # + # ### Usage + # + # ~~~ruby + # x, = [true, false, nil] + # ~~~ + # + class ExpandArray + attr_reader :number, :flags + + def initialize(number, flags) + @number = number + @flags = flags + end + + def disasm(fmt) + fmt.instruction("expandarray", [fmt.object(number), fmt.object(flags)]) + end + + def to_a(_iseq) + [:expandarray, number, flags] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + number + end + + def canonical + self + end + + def call(vm) + raise NotImplementedError, "expandarray" + end + end + + # ### Summary + # + # `getblockparam` is a similar instruction to `getlocal` in that it looks + # for a local variable in the current instruction sequence's local table and + # walks recursively up the parent instruction sequences until it finds it. + # The local it retrieves, however, is a special block local that was passed + # to the current method. It pushes the value of the block local onto the + # stack. + # + # ### Usage + # + # ~~~ruby + # def foo(&block) + # block + # end + # ~~~ + # + class GetBlockParam + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def disasm(fmt) + fmt.instruction("getblockparam", [fmt.local(index, explicit: level)]) + end + + def to_a(iseq) + current = iseq + level.times { current = iseq.parent_iseq } + [:getblockparam, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(index, level)) + end + end + + # ### Summary + # + # `getblockparamproxy` is almost the same as `getblockparam` except that it + # pushes a proxy object onto the stack instead of the actual value of the + # block local. This is used when a method is being called on the block + # local. + # + # ### Usage + # + # ~~~ruby + # def foo(&block) + # block.call + # end + # ~~~ + # + class GetBlockParamProxy + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def disasm(fmt) + fmt.instruction( + "getblockparamproxy", + [fmt.local(index, explicit: level)] + ) + end + + def to_a(iseq) + current = iseq + level.times { current = iseq.parent_iseq } + [:getblockparamproxy, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(index, level)) + end + end + + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. It uses an inline cache to reduce the + # need to lookup the class variable in the class hierarchy every time. + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def disasm(fmt) + fmt.instruction( + "getclassvariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + + def to_a(_iseq) + [:getclassvariable, name, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + clazz = vm._self + clazz = clazz.class unless clazz.is_a?(Class) + vm.push(clazz.class_variable_get(name)) + end + end + + # ### Summary + # + # `getconstant` performs a constant lookup and pushes the value of the + # constant onto the stack. It pops both the class it should look in and + # whether or not it should look globally as well. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class GetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def disasm(fmt) + fmt.instruction("getconstant", [fmt.object(name)]) + end + + def to_a(_iseq) + [:getconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + # const_base, allow_nil = + vm.pop(2) + + vm.frame.nesting.reverse_each do |clazz| + if clazz.const_defined?(name) + vm.push(clazz.const_get(name)) + return + end + end + + raise NameError, "uninitialized constant #{name}" + end + end + + # ### Summary + # + # `getglobal` pushes the value of a global variables onto the stack. + # + # ### Usage + # + # ~~~ruby + # $$ + # ~~~ + # + class GetGlobal + attr_reader :name + + def initialize(name) + @name = name + end + + def disasm(fmt) + fmt.instruction("getglobal", [fmt.object(name)]) + end + + def to_a(_iseq) + [:getglobal, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + # Evaluating the name of the global variable because there isn't a + # reflection API for global variables. + vm.push(eval(name.to_s, binding, __FILE__, __LINE__)) + end + end + + # ### Summary + # + # `getinstancevariable` pushes the value of an instance variable onto the + # stack. It uses an inline cache to avoid having to look up the instance + # variable in the class hierarchy every time. + # + # This instruction has two forms, but both have the same structure. Before + # Ruby 3.2, the inline cache corresponded to both the get and set + # instructions and could be shared. Since Ruby 3.2, it uses object shapes + # instead so the caches are unique per instruction. + # + # ### Usage + # + # ~~~ruby + # @instance_variable + # ~~~ + # + class GetInstanceVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def disasm(fmt) + fmt.instruction( + "getinstancevariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + + def to_a(_iseq) + [:getinstancevariable, name, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + method = Object.instance_method(:instance_variable_get) + vm.push(method.bind(vm._self).call(name)) + end + end + + # ### Summary + # + # `getlocal` fetches the value of a local variable from a frame determined + # by the level and index arguments. The level is the number of frames back + # to look and the index is the index in the local table. It pushes the value + # it finds onto the stack. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # tap { tap { value } } + # ~~~ + # + class GetLocal + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def disasm(fmt) + fmt.instruction("getlocal", [fmt.local(index, explicit: level)]) + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:getlocal, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(index, level)) + end + end + + # ### Summary + # + # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the current frame determined by + # the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # value + # ~~~ + # + class GetLocalWC0 + attr_reader :index + + def initialize(index) + @index = index + end + + def disasm(fmt) + fmt.instruction("getlocal_WC_0", [fmt.local(index, implicit: 0)]) + end + + def to_a(iseq) + [:getlocal_WC_0, iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + GetLocal.new(index, 0) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the parent frame determined by + # the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # self.then { value } + # ~~~ + # + class GetLocalWC1 + attr_reader :index + + def initialize(index) + @index = index + end + + def disasm(fmt) + fmt.instruction("getlocal_WC_1", [fmt.local(index, implicit: 1)]) + end + + def to_a(iseq) + [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + GetLocal.new(index, 1) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `getspecial` pushes the value of a special local variable onto the stack. + # + # ### Usage + # + # ~~~ruby + # 1 if (a == 1) .. (b == 2) + # ~~~ + # + class GetSpecial + SVAR_LASTLINE = 0 # $_ + SVAR_BACKREF = 1 # $~ + SVAR_FLIPFLOP_START = 2 # flipflop + + attr_reader :key, :type + + def initialize(key, type) + @key = key + @type = type + end + + def disasm(fmt) + fmt.instruction("getspecial", [fmt.object(key), fmt.object(type)]) + end + + def to_a(_iseq) + [:getspecial, key, type] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + case key + when SVAR_LASTLINE + raise NotImplementedError, "getspecial SVAR_LASTLINE" + when SVAR_BACKREF + raise NotImplementedError, "getspecial SVAR_BACKREF" + when SVAR_FLIPFLOP_START + vm.frame_svar.svars[SVAR_FLIPFLOP_START] + end + end + end + + # ### Summary + # + # `intern` converts the top element of the stack to a symbol and pushes the + # symbol onto the stack. + # + # ### Usage + # + # ~~~ruby + # :"#{"foo"}" + # ~~~ + # + class Intern + def disasm(fmt) + fmt.instruction("intern") + end + + def to_a(_iseq) + [:intern] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop.to_sym) + end + end + + # ### Summary + # + # `invokeblock` invokes the block given to the current method. It pops the + # arguments for the block off the stack and pushes the result of running the + # block onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo + # yield + # end + # ~~~ + # + class InvokeBlock + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("invokeblock", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:invokeblock, calldata.to_h] + end + + def length + 2 + end + + def pops + calldata.argc + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) + end + end + + # ### Summary + # + # `invokesuper` is similar to the `send` instruction, except that it calls + # the super method. It pops the receiver and arguments off the stack and + # pushes the return value onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo + # super + # end + # ~~~ + # + class InvokeSuper + attr_reader :calldata, :block_iseq + + def initialize(calldata, block_iseq) + @calldata = calldata + @block_iseq = block_iseq + end + + def disasm(fmt) + fmt.enqueue(block_iseq) if block_iseq + fmt.instruction( + "invokesuper", + [fmt.calldata(calldata), block_iseq&.name || "nil"] + ) + end + + def to_a(_iseq) + [:invokesuper, calldata.to_h, block_iseq&.to_a] + end + + def length + 1 + end + + def pops + argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) + argb + calldata.argc + 1 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + block = + if (iseq = block_iseq) + ->(*args, **kwargs, &blk) do + vm.run_block_frame(iseq, *args, **kwargs, &blk) + end + end + + keywords = + if calldata.kw_arg + calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h + else + {} + end + + arguments = vm.pop(calldata.argc) + receiver = vm.pop + + method = receiver.method(vm.frame.name).super_method + vm.push(method.call(*arguments, **keywords, &block)) + end + end + + # ### Summary + # + # `jump` unconditionally jumps to the label given as its only argument. + # + # ### Usage + # + # ~~~ruby + # x = 0 + # if x == 0 + # puts "0" + # else + # puts "2" + # end + # ~~~ + # + class Jump + attr_reader :label + + def initialize(label) + @label = label + end + + def disasm(fmt) + fmt.instruction("jump", [fmt.label(label)]) + end + + def to_a(_iseq) + [:jump, label.name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + vm.jump(label) + end + end + + # ### Summary + # + # `leave` exits the current frame. + # + # ### Usage + # + # ~~~ruby + # ;; + # ~~~ + # + class Leave + def disasm(fmt) + fmt.instruction("leave") + end + + def to_a(_iseq) + [:leave] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + # TODO: This is wrong. It should be 1. But it's 0 for now because + # otherwise the stack size is incorrectly calculated. + 0 + end + + def canonical + self + end + + def call(vm) + vm.leave + end + end + + # ### Summary + # + # `newarray` puts a new array initialized with `number` values from the + # stack. It pops `number` values off the stack and pushes the array onto the + # stack. + # + # ### Usage + # + # ~~~ruby + # ["string"] + # ~~~ + # + class NewArray + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("newarray", [fmt.object(number)]) + end + + def to_a(_iseq) + [:newarray, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number)) + end + end + + # ### Summary + # + # `newarraykwsplat` is a specialized version of `newarray` that takes a ** + # splat argument. It pops `number` values off the stack and pushes the array + # onto the stack. + # + # ### Usage + # + # ~~~ruby + # ["string", **{ foo: "bar" }] + # ~~~ + # + class NewArrayKwSplat + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("newarraykwsplat", [fmt.object(number)]) + end + + def to_a(_iseq) + [:newarraykwsplat, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number)) + end + end + + # ### Summary + # + # `newhash` puts a new hash onto the stack, using `number` elements from the + # stack. `number` needs to be even. It pops `number` elements off the stack + # and pushes a hash onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo(key, value) + # { key => value } + # end + # ~~~ + # + class NewHash + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("newhash", [fmt.object(number)]) + end + + def to_a(_iseq) + [:newhash, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).each_slice(2).to_h) + end + end + + # ### Summary + # + # `newrange` creates a new range object from the top two values on the + # stack. It pops both of them off, and then pushes on the new range. It + # takes one argument which is 0 if the end is included or 1 if the end value + # is excluded. + # + # ### Usage + # + # ~~~ruby + # x = 0 + # y = 1 + # p (x..y), (x...y) + # ~~~ + # + class NewRange + attr_reader :exclude_end + + def initialize(exclude_end) + @exclude_end = exclude_end + end + + def disasm(fmt) + fmt.instruction("newrange", [fmt.object(exclude_end)]) + end + + def to_a(_iseq) + [:newrange, exclude_end] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(Range.new(*vm.pop(2), exclude_end == 1)) + end + end + + # ### Summary + # + # `nop` is a no-operation instruction. It is used to pad the instruction + # sequence so there is a place for other instructions to jump to. + # + # ### Usage + # + # ~~~ruby + # raise rescue true + # ~~~ + # + class Nop + def disasm(fmt) + fmt.instruction("nop") + end + + def to_a(_iseq) + [:nop] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + end + end + + # ### Summary + # + # `objtostring` pops a value from the stack, calls `to_s` on that value and + # then pushes the result back to the stack. + # + # It has various fast paths for classes like String, Symbol, Module, Class, + # etc. For everything else it calls `to_s`. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class ObjToString + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("objtostring", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:objtostring, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop.to_s) + end + end + + # ### Summary + # + # `once` is an instruction that wraps an instruction sequence and ensures + # that is it only ever executed once for the lifetime of the program. It + # uses a cache to ensure that it is only executed once. It pushes the result + # of running the instruction sequence onto the stack. + # + # ### Usage + # + # ~~~ruby + # END { puts "END" } + # ~~~ + # + class Once + attr_reader :iseq, :cache + + def initialize(iseq, cache) + @iseq = iseq + @cache = cache + end + + def disasm(fmt) + fmt.enqueue(iseq) + fmt.instruction("once", [iseq.name, fmt.inline_storage(cache)]) + end + + def to_a(_iseq) + [:once, iseq.to_a, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + return if @executed + vm.push(vm.run_block_frame(iseq)) + @executed = true + end + end + + # ### Summary + # + # `opt_and` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `&` operator is used. There is a fast path for if + # both operands are integers. It pops both the receiver and the argument off + # the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 & 3 + # ~~~ + # + class OptAnd + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_and", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_and, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_aref` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `[]` operator is used. There are fast paths if the + # receiver is an integer, array, or hash. + # + # ### Usage + # + # ~~~ruby + # 7[2] + # ~~~ + # + class OptAref + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_aref", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_aref, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_aref_with` is a specialization of the `opt_aref` instruction that + # occurs when the `[]` operator is used with a string argument known at + # compile time. There are fast paths if the receiver is a hash. It pops the + # receiver off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # { 'test' => true }['test'] + # ~~~ + # + class OptArefWith + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction( + "opt_aref_with", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + + def to_a(_iseq) + [:opt_aref_with, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop[object]) + end + end + + # ### Summary + # + # `opt_aset` is an instruction for setting the hash value by the key in + # the `recv[obj] = set` format. It is a specialization of the + # `opt_send_without_block` instruction. It pops the receiver, the key, and + # the value off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # {}[:key] = value + # ~~~ + # + class OptAset + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_aset", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_aset, calldata.to_h] + end + + def length + 2 + end + + def pops + 3 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_aset_with` is an instruction for setting the hash value by the known + # string key in the `recv[obj] = set` format. It pops the receiver and the + # value off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # {}["key"] = value + # ~~~ + # + class OptAsetWith + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction( + "opt_aset_with", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + + def to_a(_iseq) + [:opt_aset_with, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + hash, value = vm.pop(2) + vm.push(hash[object] = value) + end + end + + # ### Summary + # + # `opt_case_dispatch` is a branch instruction that moves the control flow + # for case statements that have clauses where they can all be used as hash + # keys for an internal hash. + # + # It has two arguments: the `case_dispatch_hash` and an `else_label`. It + # pops one value off the stack: a hash key. `opt_case_dispatch` looks up the + # key in the `case_dispatch_hash` and jumps to the corresponding label if + # there is one. If there is no value in the `case_dispatch_hash`, + # `opt_case_dispatch` jumps to the `else_label` index. + # + # ### Usage + # + # ~~~ruby + # case 1 + # when 1 + # puts "foo" + # else + # puts "bar" + # end + # ~~~ + # + class OptCaseDispatch + attr_reader :case_dispatch_hash, :else_label + + def initialize(case_dispatch_hash, else_label) + @case_dispatch_hash = case_dispatch_hash + @else_label = else_label + end + + def disasm(fmt) + fmt.instruction( + "opt_case_dispatch", + ["", fmt.label(else_label)] + ) + end + + def to_a(_iseq) + [ + :opt_case_dispatch, + case_dispatch_hash.flat_map { |key, value| [key, value.name] }, + else_label.name + ] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) + end + end + + # ### Summary + # + # `opt_div` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `/` operator is used. There are fast paths for if + # both operands are integers, or if both operands are floats. It pops both + # the receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 / 3 + # ~~~ + # + class OptDiv + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_div", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_div, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_empty_p` is an optimization applied when the method `empty?` is + # called. It pops the receiver off the stack and pushes on the result of the + # method call. + # + # ### Usage + # + # ~~~ruby + # "".empty? + # ~~~ + # + class OptEmptyP + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_empty_p", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_empty_p, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_eq` is a specialization of the `opt_send_without_block` instruction + # that occurs when the == operator is used. Fast paths exist when both + # operands are integers, floats, symbols or strings. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 == 2 + # ~~~ + # + class OptEq + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_eq", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_eq, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_ge` is a specialization of the `opt_send_without_block` instruction + # that occurs when the >= operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 >= 3 + # ~~~ + # + class OptGE + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_ge", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_ge, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_getconstant_path` performs a constant lookup on a chain of constant + # names. It accepts as its argument an array of constant names, and pushes + # the value of the constant onto the stack. + # + # ### Usage + # + # ~~~ruby + # ::Object + # ~~~ + # + class OptGetConstantPath + attr_reader :names + + def initialize(names) + @names = names + end + + def disasm(fmt) + cache = "" + fmt.instruction("opt_getconstant_path", [cache]) + end + + def to_a(_iseq) + [:opt_getconstant_path, names] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + current = vm._self + current = current.class unless current.is_a?(Class) + + names.each do |name| + current = name == :"" ? Object : current.const_get(name) + end + + vm.push(current) + end + end + + # ### Summary + # + # `opt_gt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the > operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 > 3 + # ~~~ + # + class OptGT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_gt", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_gt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_le` is a specialization of the `opt_send_without_block` instruction + # that occurs when the <= operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 <= 4 + # ~~~ + # + class OptLE + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_le", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_le, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_length` is a specialization of `opt_send_without_block`, when the + # `length` method is called. There are fast paths when the receiver is + # either a string, hash, or array. It pops the receiver off the stack and + # pushes on the result of the method call. + # + # ### Usage + # + # ~~~ruby + # "".length + # ~~~ + # + class OptLength + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_length", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_length, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_lt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the < operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 < 4 + # ~~~ + # + class OptLT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_lt", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_lt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_ltlt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `<<` operator is used. Fast paths exists when the + # receiver is either a String or an Array. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "" << 2 + # ~~~ + # + class OptLTLT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_ltlt", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_ltlt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_minus` is a specialization of the `opt_send_without_block` + # instruction that occurs when the `-` operator is used. There are fast + # paths for if both operands are integers or if both operands are floats. It + # pops both the receiver and the argument off the stack and pushes on the + # result. + # + # ### Usage + # + # ~~~ruby + # 3 - 2 + # ~~~ + # + class OptMinus + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_minus", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_minus, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_mod` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `%` operator is used. There are fast paths for if + # both operands are integers or if both operands are floats. It pops both + # the receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 % 2 + # ~~~ + # + class OptMod + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_mod", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_mod, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_mult` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `*` operator is used. There are fast paths for if + # both operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 * 2 + # ~~~ + # + class OptMult + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_mult", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_mult, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_neq` is an optimization that tests whether two values at the top of + # the stack are not equal by testing their equality and calling the `!` on + # the result. This allows `opt_neq` to use the fast paths optimized in + # `opt_eq` when both operands are Integers, Floats, Symbols, or Strings. It + # pops both the receiver and the argument off the stack and pushes on the + # result. + # + # ### Usage + # + # ~~~ruby + # 2 != 2 + # ~~~ + # + class OptNEq + attr_reader :eq_calldata, :neq_calldata + + def initialize(eq_calldata, neq_calldata) + @eq_calldata = eq_calldata + @neq_calldata = neq_calldata + end + + def disasm(fmt) + fmt.instruction( + "opt_neq", + [fmt.calldata(eq_calldata), fmt.calldata(neq_calldata)] + ) + end + + def to_a(_iseq) + [:opt_neq, eq_calldata.to_h, neq_calldata.to_h] + end + + def length + 3 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + receiver, argument = vm.pop(2) + vm.push(receiver != argument) + end + end + + # ### Summary + # + # `opt_newarray_max` is a specialization that occurs when the `max` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [a, b, c].max + # ~~~ + # + class OptNewArrayMax + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("opt_newarray_max", [fmt.object(number)]) + end + + def to_a(_iseq) + [:opt_newarray_max, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).max) + end + end + + # ### Summary + # + # `opt_newarray_min` is a specialization that occurs when the `min` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [a, b, c].min + # ~~~ + # + class OptNewArrayMin + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("opt_newarray_min", [fmt.object(number)]) + end + + def to_a(_iseq) + [:opt_newarray_min, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).min) + end + end + + # ### Summary + # + # `opt_nil_p` is an optimization applied when the method `nil?` is called. + # It returns true immediately when the receiver is `nil` and defers to the + # `nil?` method in other cases. It pops the receiver off the stack and + # pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "".nil? + # ~~~ + # + class OptNilP + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_nil_p", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_nil_p, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_not` negates the value on top of the stack by calling the `!` method + # on it. It pops the receiver off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # !true + # ~~~ + # + class OptNot + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_not", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_not, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_or` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `|` operator is used. There is a fast path for if + # both operands are integers. It pops both the receiver and the argument off + # the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 | 3 + # ~~~ + # + class OptOr + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_or", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_or, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_plus` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `+` operator is used. There are fast paths for if + # both operands are integers, floats, strings, or arrays. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 + 3 + # ~~~ + # + class OptPlus + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_plus", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_plus, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_regexpmatch2` is a specialization of the `opt_send_without_block` + # instruction that occurs when the `=~` operator is used. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # /a/ =~ "a" + # ~~~ + # + class OptRegExpMatch2 + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_regexpmatch2", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_regexpmatch2, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_send_without_block` is a specialization of the send instruction that + # occurs when a method is being called without a block. It pops the receiver + # and the arguments off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # puts "Hello, world!" + # ~~~ + # + class OptSendWithoutBlock + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_send_without_block", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_send_without_block, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + calldata.argc + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_size` is a specialization of `opt_send_without_block`, when the + # `size` method is called. There are fast paths when the receiver is either + # a string, hash, or array. It pops the receiver off the stack and pushes on + # the result. + # + # ### Usage + # + # ~~~ruby + # "".size + # ~~~ + # + class OptSize + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_size", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_size, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `opt_str_freeze` pushes a frozen known string value with no interpolation + # onto the stack using the #freeze method. If the method gets overridden, + # this will fall back to a send. + # + # ### Usage + # + # ~~~ruby + # "hello".freeze + # ~~~ + # + class OptStrFreeze + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction( + "opt_str_freeze", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + + def to_a(_iseq) + [:opt_str_freeze, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(object.freeze) + end + end + + # ### Summary + # + # `opt_str_uminus` pushes a frozen known string value with no interpolation + # onto the stack. If the method gets overridden, this will fall back to a + # send. + # + # ### Usage + # + # ~~~ruby + # -"string" + # ~~~ + # + class OptStrUMinus + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction( + "opt_str_uminus", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + + def to_a(_iseq) + [:opt_str_uminus, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(-object) + end + end + + # ### Summary + # + # `opt_succ` is a specialization of the `opt_send_without_block` instruction + # when the method being called is `succ`. Fast paths exist when the receiver + # is either a String or a Fixnum. It pops the receiver off the stack and + # pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "".succ + # ~~~ + # + class OptSucc + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def disasm(fmt) + fmt.instruction("opt_succ", [fmt.calldata(calldata)]) + end + + def to_a(_iseq) + [:opt_succ, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `pop` pops the top value off the stack. + # + # ### Usage + # + # ~~~ruby + # a ||= 2 + # ~~~ + # + class Pop + def disasm(fmt) + fmt.instruction("pop") + end + + def to_a(_iseq) + [:pop] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + vm.pop + end + end + + # ### Summary + # + # `putnil` pushes a global nil object onto the stack. + # + # ### Usage + # + # ~~~ruby + # nil + # ~~~ + # + class PutNil + def disasm(fmt) + fmt.instruction("putnil") + end + + def to_a(_iseq) + [:putnil] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + PutObject.new(nil) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `putobject` pushes a known value onto the stack. + # + # ### Usage + # + # ~~~ruby + # 5 + # ~~~ + # + class PutObject + attr_reader :object + + def initialize(object) + @object = object + end + + def disasm(fmt) + fmt.instruction("putobject", [fmt.object(object)]) + end + + def to_a(_iseq) + [:putobject, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(object) + end + end + + # ### Summary + # + # `putobject_INT2FIX_0_` pushes 0 on the stack. It is a specialized + # instruction resulting from the operand unification optimization. It is + # equivalent to `putobject 0`. + # + # ### Usage + # + # ~~~ruby + # 0 + # ~~~ + # + class PutObjectInt2Fix0 + def disasm(fmt) + fmt.instruction("putobject_INT2FIX_0_") + end + + def to_a(_iseq) + [:putobject_INT2FIX_0_] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + PutObject.new(0) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `putobject_INT2FIX_1_` pushes 1 on the stack. It is a specialized + # instruction resulting from the operand unification optimization. It is + # equivalent to `putobject 1`. + # + # ### Usage + # + # ~~~ruby + # 1 + # ~~~ + # + class PutObjectInt2Fix1 + def disasm(fmt) + fmt.instruction("putobject_INT2FIX_1_") + end + + def to_a(_iseq) + [:putobject_INT2FIX_1_] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + PutObject.new(1) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `putself` pushes the current value of self onto the stack. + # + # ### Usage + # + # ~~~ruby + # puts "Hello, world!" + # ~~~ + # + class PutSelf + def disasm(fmt) + fmt.instruction("putself") + end + + def to_a(_iseq) + [:putself] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm._self) + end + end + + # ### Summary + # + # `putspecialobject` pushes one of three special objects onto the stack. + # These are either the VM core special object, the class base special + # object, or the constant base special object. + # + # ### Usage + # + # ~~~ruby + # alias foo bar + # ~~~ + # + class PutSpecialObject + OBJECT_VMCORE = 1 + OBJECT_CBASE = 2 + OBJECT_CONST_BASE = 3 + + attr_reader :object + + def initialize(object) + @object = object + end + + def disasm(fmt) + fmt.instruction("putspecialobject", [fmt.object(object)]) + end + + def to_a(_iseq) + [:putspecialobject, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + case object + when OBJECT_VMCORE + vm.push(vm.frozen_core) + when OBJECT_CBASE + value = vm._self + value = value.singleton_class unless value.is_a?(Class) + vm.push(value) + when OBJECT_CONST_BASE + vm.push(vm.const_base) + end + end + end + + # ### Summary + # + # `putstring` pushes an unfrozen string literal onto the stack. + # + # ### Usage + # + # ~~~ruby + # "foo" + # ~~~ + # + class PutString + attr_reader :object + + def initialize(object) + @object = object + end + + def disasm(fmt) + fmt.instruction("putstring", [fmt.object(object)]) + end + + def to_a(_iseq) + [:putstring, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(object.dup) + end + end + + # ### Summary + # + # `send` invokes a method with an optional block. It pops its receiver and + # the arguments for the method off the stack and pushes the return value + # onto the stack. It has two arguments: the calldata for the call site and + # the optional block instruction sequence. + # + # ### Usage + # + # ~~~ruby + # "hello".tap { |i| p i } + # ~~~ + # + class Send + attr_reader :calldata, :block_iseq + + def initialize(calldata, block_iseq) + @calldata = calldata + @block_iseq = block_iseq + end + + def disasm(fmt) + fmt.enqueue(block_iseq) if block_iseq + fmt.instruction( + "send", + [fmt.calldata(calldata), block_iseq&.name || "nil"] + ) + end + + def to_a(_iseq) + [:send, calldata.to_h, block_iseq&.to_a] + end + + def length + 3 + end + + def pops + argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) + argb + calldata.argc + 1 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + block = + if (iseq = block_iseq) + ->(*args, **kwargs, &blk) do + vm.run_block_frame(iseq, *args, **kwargs, &blk) + end + end + + keywords = + if calldata.kw_arg + calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h + else + {} + end + + arguments = vm.pop(calldata.argc) + receiver = vm.pop + + vm.push( + receiver.__send__(calldata.method, *arguments, **keywords, &block) + ) + end + end + + # ### Summary + # + # `setblockparam` sets the value of a block local variable on a frame + # determined by the level and index arguments. The level is the number of + # frames back to look and the index is the index in the local table. It pops + # the value it is setting off the stack. + # + # ### Usage + # + # ~~~ruby + # def foo(&bar) + # bar = baz + # end + # ~~~ + # + class SetBlockParam + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def disasm(fmt) + fmt.instruction("setblockparam", [fmt.local(index, explicit: level)]) + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:setblockparam, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + vm.local_set(index, level, vm.pop) + end + end + + # ### Summary + # + # `setclassvariable` looks for a class variable in the current class and + # sets its value to the value it pops off the top of the stack. It uses an + # inline cache to reduce the need to lookup the class variable in the class + # hierarchy every time. + # + # ### Usage + # + # ~~~ruby + # @@class_variable = 1 + # ~~~ + # + class SetClassVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def disasm(fmt) + fmt.instruction( + "setclassvariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + + def to_a(_iseq) + [:setclassvariable, name, cache] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + clazz = vm._self + clazz = clazz.class unless clazz.is_a?(Class) + clazz.class_variable_set(name, vm.pop) + end + end + + # ### Summary + # + # `setconstant` pops two values off the stack: the value to set the + # constant to and the constant base to set it in. + # + # ### Usage + # + # ~~~ruby + # Constant = 1 + # ~~~ + # + class SetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def disasm(fmt) + fmt.instruction("setconstant", [fmt.object(name)]) + end + + def to_a(_iseq) + [:setconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + value, parent = vm.pop(2) + parent.const_set(name, value) + end + end + + # ### Summary + # + # `setglobal` sets the value of a global variable to a value popped off the + # top of the stack. + # + # ### Usage + # + # ~~~ruby + # $global = 5 + # ~~~ + # + class SetGlobal + attr_reader :name + + def initialize(name) + @name = name + end + + def disasm(fmt) + fmt.instruction("setglobal", [fmt.object(name)]) + end + + def to_a(_iseq) + [:setglobal, name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + # Evaluating the name of the global variable because there isn't a + # reflection API for global variables. + eval("#{name} = vm.pop", binding, __FILE__, __LINE__) + end + end + + # ### Summary + # + # `setinstancevariable` pops a value off the top of the stack and then sets + # the instance variable associated with the instruction to that value. + # + # This instruction has two forms, but both have the same structure. Before + # Ruby 3.2, the inline cache corresponded to both the get and set + # instructions and could be shared. Since Ruby 3.2, it uses object shapes + # instead so the caches are unique per instruction. + # + # ### Usage + # + # ~~~ruby + # @instance_variable = 1 + # ~~~ + # + class SetInstanceVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def disasm(fmt) + fmt.instruction( + "setinstancevariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + + def to_a(_iseq) + [:setinstancevariable, name, cache] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + method = Object.instance_method(:instance_variable_set) + method.bind(vm._self).call(name, vm.pop) + end + end + + # ### Summary + # + # `setlocal` sets the value of a local variable on a frame determined by the + # level and index arguments. The level is the number of frames back to + # look and the index is the index in the local table. It pops the value it + # is setting off the stack. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # tap { tap { value = 10 } } + # ~~~ + # + class SetLocal + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def disasm(fmt) + fmt.instruction("setlocal", [fmt.local(index, explicit: level)]) + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:setlocal, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + vm.local_set(index, level, vm.pop) + end + end + + # ### Summary + # + # `setlocal_WC_0` is a specialized version of the `setlocal` instruction. It + # sets the value of a local variable on the current frame to the value at + # the top of the stack as determined by the index given as its only + # argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # ~~~ + # + class SetLocalWC0 + attr_reader :index + + def initialize(index) + @index = index + end + + def disasm(fmt) + fmt.instruction("setlocal_WC_0", [fmt.local(index, implicit: 0)]) + end + + def to_a(iseq) + [:setlocal_WC_0, iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + SetLocal.new(index, 0) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `setlocal_WC_1` is a specialized version of the `setlocal` instruction. It + # sets the value of a local variable on the parent frame to the value at the + # top of the stack as determined by the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # self.then { value = 10 } + # ~~~ + # + class SetLocalWC1 + attr_reader :index + + def initialize(index) + @index = index + end + + def disasm(fmt) + fmt.instruction("setlocal_WC_1", [fmt.local(index, implicit: 1)]) + end + + def to_a(iseq) + [:setlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + SetLocal.new(index, 1) + end + + def call(vm) + canonical.call(vm) + end + end + + # ### Summary + # + # `setn` sets a value in the stack to a value popped off the top of the + # stack. It then pushes that value onto the top of the stack as well. + # + # ### Usage + # + # ~~~ruby + # {}[:key] = 'val' + # ~~~ + # + class SetN + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("setn", [fmt.object(number)]) + end + + def to_a(_iseq) + [:setn, number] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.stack[-number - 1] = vm.stack.last + end + end + + # ### Summary + # + # `setspecial` pops a value off the top of the stack and sets a special + # local variable to that value. The special local variable is determined by + # the key given as its only argument. + # + # ### Usage + # + # ~~~ruby + # baz if (foo == 1) .. (bar == 1) + # ~~~ + # + class SetSpecial + attr_reader :key + + def initialize(key) + @key = key + end + + def disasm(fmt) + fmt.instruction("setspecial", [fmt.object(key)]) + end + + def to_a(_iseq) + [:setspecial, key] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + + def canonical + self + end + + def call(vm) + case key + when GetSpecial::SVAR_LASTLINE + raise NotImplementedError, "svar SVAR_LASTLINE" + when GetSpecial::SVAR_BACKREF + raise NotImplementedError, "setspecial SVAR_BACKREF" + when GetSpecial::SVAR_FLIPFLOP_START + vm.frame_svar.svars[GetSpecial::SVAR_FLIPFLOP_START] + end + end + end + + # ### Summary + # + # `splatarray` coerces the array object at the top of the stack into Array + # by calling `to_a`. It pushes a duplicate of the array if there is a flag, + # and the original array if there isn't one. + # + # ### Usage + # + # ~~~ruby + # x = *(5) + # ~~~ + # + class SplatArray + attr_reader :flag + + def initialize(flag) + @flag = flag + end + + def disasm(fmt) + fmt.instruction("splatarray", [fmt.object(flag)]) + end + + def to_a(_iseq) + [:splatarray, flag] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(*vm.pop) + end + end + + # ### Summary + # + # `swap` swaps the top two elements in the stack. + # + # ### TracePoint + # + # `swap` does not dispatch any events. + # + # ### Usage + # + # ~~~ruby + # !!defined?([[]]) + # ~~~ + # + class Swap + def disasm(fmt) + fmt.instruction("swap") + end + + def to_a(_iseq) + [:swap] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 2 + end + + def canonical + self + end + + def call(vm) + left, right = vm.pop(2) + vm.push(right, left) + end + end + + # ### Summary + # + # `throw` pops a value off the top of the stack and throws it. It is caught + # using the instruction sequence's (or an ancestor's) catch table. It pushes + # on the result of throwing the value. + # + # ### Usage + # + # ~~~ruby + # [1, 2, 3].map { break 2 } + # ~~~ + # + class Throw + TAG_NONE = 0x0 + TAG_RETURN = 0x1 + TAG_BREAK = 0x2 + TAG_NEXT = 0x3 + TAG_RETRY = 0x4 + TAG_REDO = 0x5 + TAG_RAISE = 0x6 + TAG_THROW = 0x7 + TAG_FATAL = 0x8 + + attr_reader :type + + def initialize(type) + @type = type + end + + def disasm(fmt) + fmt.instruction("throw", [fmt.object(type)]) + end + + def to_a(_iseq) + [:throw, type] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + raise NotImplementedError, "throw" + end + end + + # ### Summary + # + # `topn` pushes a single value onto the stack that is a copy of the value + # within the stack that is `number` of slots down from the top. + # + # ### Usage + # + # ~~~ruby + # case 3 + # when 1..5 + # puts "foo" + # end + # ~~~ + # + class TopN + attr_reader :number + + def initialize(number) + @number = number + end + + def disasm(fmt) + fmt.instruction("topn", [fmt.object(number)]) + end + + def to_a(_iseq) + [:topn, number] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(vm.stack[-number - 1]) + end + end + + # ### Summary + # + # `toregexp` pops a number of values off the stack, combines them into a new + # regular expression, and pushes the new regular expression onto the stack. + # + # ### Usage + # + # ~~~ruby + # /foo #{bar}/ + # ~~~ + # + class ToRegExp + attr_reader :options, :length + + def initialize(options, length) + @options = options + @length = length + end + + def disasm(fmt) + fmt.instruction("toregexp", [fmt.object(options), fmt.object(length)]) + end + + def to_a(_iseq) + [:toregexp, options, length] + end + + def pops + length + end + + def pushes + 1 + end + + def canonical + self + end + + def call(vm) + vm.push(Regexp.new(vm.pop(length).join, options)) + end + end + end +end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb new file mode 100644 index 00000000..30a95437 --- /dev/null +++ b/lib/syntax_tree/yarv/legacy.rb @@ -0,0 +1,192 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This module contains the instructions that used to be a part of YARV but + # have been replaced or removed in more recent versions. + module Legacy + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. + # + # This version of the `getclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache.` + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def disasm(fmt) + fmt.instruction("getclassvariable", [fmt.object(name)]) + end + + def to_a(_iseq) + [:getclassvariable, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_getinlinecache` is a wrapper around a series of `putobject` and + # `getconstant` instructions that allows skipping past them if the inline + # cache is currently set. It pushes the value of the cache onto the stack + # if it is set, otherwise it pushes `nil`. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class OptGetInlineCache + attr_reader :label, :cache + + def initialize(label, cache) + @label = label + @cache = cache + end + + def disasm(fmt) + fmt.instruction( + "opt_getinlinecache", + [fmt.label(label), fmt.inline_storage(cache)] + ) + end + + def to_a(_iseq) + [:opt_getinlinecache, label.name, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + + def call(vm) + vm.push(nil) + end + end + + # ### Summary + # + # `opt_setinlinecache` sets an inline cache for a constant lookup. It pops + # the value it should set off the top of the stack. It then pushes that + # value back onto the top of the stack. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class OptSetInlineCache + attr_reader :cache + + def initialize(cache) + @cache = cache + end + + def disasm(fmt) + fmt.instruction("opt_setinlinecache", [fmt.inline_storage(cache)]) + end + + def to_a(_iseq) + [:opt_setinlinecache, cache] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + + def call(vm) + vm.push(vm.pop) + end + end + + # ### Summary + # + # `setclassvariable` looks for a class variable in the current class and + # sets its value to the value it pops off the top of the stack. + # + # This version of the `setclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache. + # + # ### Usage + # + # ~~~ruby + # @@class_variable = 1 + # ~~~ + # + class SetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def disasm(fmt) + fmt.instruction("setclassvariable", [fmt.object(name)]) + end + + def to_a(_iseq) + [:setclassvariable, name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/local_table.rb b/lib/syntax_tree/yarv/local_table.rb new file mode 100644 index 00000000..54cc55ad --- /dev/null +++ b/lib/syntax_tree/yarv/local_table.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This represents every local variable associated with an instruction + # sequence. There are two kinds of locals: plain locals that are what you + # expect, and block proxy locals, which represent local variables + # associated with blocks that were passed into the current instruction + # sequence. + class LocalTable + # A local representing a block passed into the current instruction + # sequence. + class BlockLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # A regular local variable. + class PlainLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # The result of looking up a local variable in the current local table. + class Lookup + attr_reader :local, :index, :level + + def initialize(local, index, level) + @local = local + @index = index + @level = level + end + end + + attr_reader :locals + + def initialize + @locals = [] + end + + def empty? + locals.empty? + end + + def find(name, level = 0) + index = locals.index { |local| local.name == name } + Lookup.new(locals[index], index, level) if index + end + + def has?(name) + locals.any? { |local| local.name == name } + end + + def names + locals.map(&:name) + end + + def name_at(index) + locals[index].name + end + + def size + locals.length + end + + # Add a BlockLocal to the local table. + def block(name) + locals << BlockLocal.new(name) unless has?(name) + end + + # Add a PlainLocal to the local table. + def plain(name) + locals << PlainLocal.new(name) unless has?(name) + end + + # This is the offset from the top of the stack where this local variable + # lives. + def offset(index) + size - (index - 3) - 1 + end + end + end +end diff --git a/syntax_tree.gemspec b/syntax_tree.gemspec index 19f4ee97..f6c4a734 100644 --- a/syntax_tree.gemspec +++ b/syntax_tree.gemspec @@ -25,7 +25,7 @@ Gem::Specification.new do |spec| spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } spec.require_paths = %w[lib] - spec.add_dependency "prettier_print", ">= 1.1.0" + spec.add_dependency "prettier_print", ">= 1.2.0" spec.add_development_dependency "bundler" spec.add_development_dependency "minitest" diff --git a/test/compiler_test.rb b/test/compiler_test.rb new file mode 100644 index 00000000..1922f8c6 --- /dev/null +++ b/test/compiler_test.rb @@ -0,0 +1,525 @@ +# frozen_string_literal: true + +return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" +require_relative "test_helper" + +module SyntaxTree + class CompilerTest < Minitest::Test + CASES = [ + # Hooks + "BEGIN { a = 1 }", + "a = 1; END { a = 1 }; a", + # Various literals placed on the stack + "true", + "false", + "nil", + "self", + "0", + "1", + "2", + "1.0", + "1i", + "1r", + "1..2", + "1...2", + "(1)", + "%w[foo bar baz]", + "%W[foo bar baz]", + "%i[foo bar baz]", + "%I[foo bar baz]", + "{ foo: 1, bar: 1.0, baz: 1i }", + "'foo'", + "\"foo\"", + "\"foo\#{bar}\"", + "\"foo\#@bar\"", + "%q[foo]", + "%Q[foo]", + <<~RUBY, + "foo" \\ + "bar" + RUBY + <<~RUBY, + < 2", + "1 >= 2", + "1 == 2", + "1 != 2", + "1 & 2", + "1 | 2", + "1 << 2", + "1 ^ 2", + "foo.empty?", + "foo.length", + "foo.nil?", + "foo.size", + "foo.succ", + "/foo/ =~ \"foo\" && $1", + "\"foo\".freeze", + "\"foo\".freeze(1)", + "-\"foo\"", + "\"foo\".-@", + "\"foo\".-@(1)", + # Various method calls + "foo?", + "foo.bar", + "foo.bar(baz)", + "foo bar", + "foo.bar baz", + "foo(*bar)", + "foo(**bar)", + "foo(&bar)", + "foo.bar = baz", + "not foo", + "!foo", + "~foo", + "+foo", + "-foo", + "`foo`", + "`foo \#{bar} baz`", + # Local variables + "foo", + "foo = 1", + "foo = 1; bar = 2; baz = 3", + "foo = 1; foo", + "foo += 1", + "foo -= 1", + "foo *= 1", + "foo /= 1", + "foo %= 1", + "foo &= 1", + "foo |= 1", + "foo &&= 1", + "foo ||= 1", + "foo <<= 1", + "foo ^= 1", + "foo, bar = 1, 2", + "foo, bar, = 1, 2", + "foo, bar, baz = 1, 2", + "foo, bar = 1, 2, 3", + "foo = 1, 2, 3", + "foo, * = 1, 2, 3", + # Instance variables + "@foo", + "@foo = 1", + "@foo = 1; @bar = 2; @baz = 3", + "@foo = 1; @foo", + "@foo += 1", + "@foo -= 1", + "@foo *= 1", + "@foo /= 1", + "@foo %= 1", + "@foo &= 1", + "@foo |= 1", + "@foo &&= 1", + "@foo ||= 1", + "@foo <<= 1", + "@foo ^= 1", + # Class variables + "@@foo", + "@@foo = 1", + "@@foo = 1; @@bar = 2; @@baz = 3", + "@@foo = 1; @@foo", + "@@foo += 1", + "@@foo -= 1", + "@@foo *= 1", + "@@foo /= 1", + "@@foo %= 1", + "@@foo &= 1", + "@@foo |= 1", + "@@foo &&= 1", + "@@foo ||= 1", + "@@foo <<= 1", + "@@foo ^= 1", + # Global variables + "$foo", + "$foo = 1", + "$foo = 1; $bar = 2; $baz = 3", + "$foo = 1; $foo", + "$foo += 1", + "$foo -= 1", + "$foo *= 1", + "$foo /= 1", + "$foo %= 1", + "$foo &= 1", + "$foo |= 1", + "$foo &&= 1", + "$foo ||= 1", + "$foo <<= 1", + "$foo ^= 1", + # Index access + "foo[bar]", + "foo[bar] = 1", + "foo[bar] += 1", + "foo[bar] -= 1", + "foo[bar] *= 1", + "foo[bar] /= 1", + "foo[bar] %= 1", + "foo[bar] &= 1", + "foo[bar] |= 1", + "foo[bar] &&= 1", + "foo[bar] ||= 1", + "foo[bar] <<= 1", + "foo[bar] ^= 1", + "foo['true']", + "foo['true'] = 1", + # Constants (single) + "Foo", + "Foo = 1", + "Foo += 1", + "Foo -= 1", + "Foo *= 1", + "Foo /= 1", + "Foo %= 1", + "Foo &= 1", + "Foo |= 1", + "Foo &&= 1", + "Foo ||= 1", + "Foo <<= 1", + "Foo ^= 1", + # Constants (top) + "::Foo", + "::Foo = 1", + "::Foo += 1", + "::Foo -= 1", + "::Foo *= 1", + "::Foo /= 1", + "::Foo %= 1", + "::Foo &= 1", + "::Foo |= 1", + "::Foo &&= 1", + "::Foo ||= 1", + "::Foo <<= 1", + "::Foo ^= 1", + # Constants (nested) + "Foo::Bar::Baz", + "Foo::Bar::Baz += 1", + "Foo::Bar::Baz -= 1", + "Foo::Bar::Baz *= 1", + "Foo::Bar::Baz /= 1", + "Foo::Bar::Baz %= 1", + "Foo::Bar::Baz &= 1", + "Foo::Bar::Baz |= 1", + "Foo::Bar::Baz &&= 1", + "Foo::Bar::Baz ||= 1", + "Foo::Bar::Baz <<= 1", + "Foo::Bar::Baz ^= 1", + # Constants (top nested) + "::Foo::Bar::Baz", + "::Foo::Bar::Baz = 1", + "::Foo::Bar::Baz += 1", + "::Foo::Bar::Baz -= 1", + "::Foo::Bar::Baz *= 1", + "::Foo::Bar::Baz /= 1", + "::Foo::Bar::Baz %= 1", + "::Foo::Bar::Baz &= 1", + "::Foo::Bar::Baz |= 1", + "::Foo::Bar::Baz &&= 1", + "::Foo::Bar::Baz ||= 1", + "::Foo::Bar::Baz <<= 1", + "::Foo::Bar::Baz ^= 1", + # Constants (calls) + "Foo::Bar.baz", + "::Foo::Bar.baz", + "Foo::Bar.baz = 1", + "::Foo::Bar.baz = 1", + # Control flow + "foo&.bar", + "foo&.bar(1)", + "foo&.bar 1, 2, 3", + "foo&.bar {}", + "foo && bar", + "foo || bar", + "if foo then bar end", + "if foo then bar else baz end", + "if foo then bar elsif baz then qux end", + "foo if bar", + "unless foo then bar end", + "unless foo then bar else baz end", + "foo unless bar", + "foo while bar", + "while foo do bar end", + "foo until bar", + "until foo do bar end", + "for i in [1, 2, 3] do i end", + "foo ? bar : baz", + "case foo when bar then 1 end", + "case foo when bar then 1 else 2 end", + "baz if (foo == 1) .. (bar == 1)", + # Constructed values + "foo..bar", + "foo...bar", + "[1, 1.0, 1i, 1r]", + "[foo, bar, baz]", + "[@foo, @bar, @baz]", + "[@@foo, @@bar, @@baz]", + "[$foo, $bar, $baz]", + "%W[foo \#{bar} baz]", + "%I[foo \#{bar} baz]", + "[foo, bar] + [baz, qux]", + "[foo, bar, *baz, qux]", + "{ foo: bar, baz: qux }", + "{ :foo => bar, :baz => qux }", + "{ foo => bar, baz => qux }", + "%s[foo]", + "[$1, $2, $3, $4, $5, $6, $7, $8, $9]", + "/foo \#{bar} baz/", + "%r{foo \#{bar} baz}", + "[1, 2, 3].max", + "[foo, bar, baz].max", + "[foo, bar, baz].max(1)", + "[1, 2, 3].min", + "[foo, bar, baz].min", + "[foo, bar, baz].min(1)", + "[**{ x: true }][0][:x]", + # Core method calls + "alias foo bar", + "alias :foo :bar", + "super", + "super(1)", + "super(1, 2, 3)", + "undef foo", + "undef :foo", + "undef foo, bar, baz", + "undef :foo, :bar, :baz", + "def foo; yield; end", + "def foo; yield(1); end", + "def foo; yield(1, 2, 3); end", + # defined? usage + "defined?(foo)", + "defined?(\"foo\")", + "defined?(:foo)", + "defined?(@foo)", + "defined?(@@foo)", + "defined?($foo)", + "defined?(Foo)", + "defined?(yield)", + "defined?(super)", + "foo = 1; defined?(foo)", + "defined?(self)", + "defined?(true)", + "defined?(false)", + "defined?(nil)", + "defined?(foo = 1)", + # Ignored content + ";;;", + "# comment", + "=begin\nfoo\n=end", + <<~RUBY, + __END__ + RUBY + # Method definitions + "def foo; end", + "def foo(bar); end", + "def foo(bar, baz); end", + "def foo(bar = 1); end", + "def foo(bar = 1, baz = 2); end", + "def foo(*bar); end", + "def foo(bar, *baz); end", + "def foo(*bar, baz, qux); end", + "def foo(bar, *baz, qux); end", + "def foo(bar, baz, *qux, quaz); end", + "def foo(bar, baz, &qux); end", + "def foo(bar, *baz, &qux); end", + "def foo(&qux); qux; end", + "def foo(&qux); qux.call; end", + "def foo(&qux); qux = bar; end", + "def foo(bar:); end", + "def foo(bar:, baz:); end", + "def foo(bar: 1); end", + "def foo(bar: 1, baz: 2); end", + "def foo(bar: baz); end", + "def foo(bar: 1, baz: qux); end", + "def foo(bar: qux, baz: 1); end", + "def foo(bar: baz, qux: qaz); end", + "def foo(**rest); end", + "def foo(bar:, **rest); end", + "def foo(bar:, baz:, **rest); end", + "def foo(bar: 1, **rest); end", + "def foo(bar: 1, baz: 2, **rest); end", + "def foo(bar: baz, **rest); end", + "def foo(bar: 1, baz: qux, **rest); end", + "def foo(bar: qux, baz: 1, **rest); end", + "def foo(bar: baz, qux: qaz, **rest); end", + "def foo(...); end", + "def foo(bar, ...); end", + "def foo(...); bar(...); end", + "def foo(bar, ...); baz(1, 2, 3, ...); end", + "def self.foo; end", + "def foo.bar(baz); end", + # Class/module definitions + "module Foo; end", + "module ::Foo; end", + "module Foo::Bar; end", + "module ::Foo::Bar; end", + "module Foo; module Bar; end; end", + "class Foo; end", + "class ::Foo; end", + "class Foo::Bar; end", + "class ::Foo::Bar; end", + "class Foo; class Bar; end; end", + "class Foo < Baz; end", + "class ::Foo < Baz; end", + "class Foo::Bar < Baz; end", + "class ::Foo::Bar < Baz; end", + "class Foo; class Bar < Baz; end; end", + "class Foo < baz; end", + "class << Object; end", + "class << ::String; end", + # Block + "foo do end", + "foo {}", + "foo do |bar| end", + "foo { |bar| }", + "foo { |bar; baz| }", + "-> do end", + "-> {}", + "-> (bar) do end", + "-> (bar) {}", + "-> (bar; baz) { }", + # Pattern matching + "foo in bar", + "foo in [bar]", + "foo in [bar, baz]", + "foo in [1, 2, 3, bar, 4, 5, 6, baz]", + "foo in Foo[1, 2, 3, bar, 4, 5, 6, baz]", + "foo => bar" + ] + + # These are the combinations of instructions that we're going to test. + OPTIONS = [ + YARV::Compiler::Options.new, + YARV::Compiler::Options.new(frozen_string_literal: true), + YARV::Compiler::Options.new(operands_unification: false), + # TODO: have this work when peephole optimizations are turned off. + # YARV::Compiler::Options.new(peephole_optimization: false), + YARV::Compiler::Options.new(specialized_instruction: false), + YARV::Compiler::Options.new(inline_const_cache: false), + YARV::Compiler::Options.new(tailcall_optimization: true) + ] + + OPTIONS.each do |options| + suffix = options.to_hash.map { |key, value| "#{key}=#{value}" }.join("&") + + CASES.each do |source| + define_method(:"test_compiles_#{source}_(#{suffix})") do + assert_compiles(source, options) + end + + define_method(:"test_loads_#{source}_(#{suffix})") do + assert_loads(source, options) + end + + define_method(:"test_disasms_#{source}_(#{suffix})") do + assert_disasms(source, options) + end + end + end + + def test_evaluation + assert_evaluates 5, "2 + 3" + assert_evaluates 5, "a = 2; b = 3; a + b" + end + + private + + def serialize_iseq(iseq) + serialized = iseq.to_a + + serialized[4].delete(:node_id) + serialized[4].delete(:code_location) + serialized[4].delete(:node_ids) + + serialized[13] = serialized[13].filter_map do |insn| + case insn + when Array + insn.map do |operand| + if operand.is_a?(Array) && + operand[0] == YARV::InstructionSequence::MAGIC + serialize_iseq(operand) + else + operand + end + end + when Integer, :RUBY_EVENT_LINE + # ignore these for now + else + insn + end + end + + serialized + end + + # Check that the compiled instruction sequence matches the expected + # instruction sequence. + def assert_compiles(source, options) + assert_equal( + serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), + serialize_iseq(YARV.compile(source, options)) + ) + end + + # Check that the compiled instruction sequence matches the instruction + # sequence created directly from the compiled instruction sequence. + def assert_loads(source, options) + compiled = RubyVM::InstructionSequence.compile(source, **options) + + assert_equal( + serialize_iseq(compiled), + serialize_iseq(YARV::InstructionSequence.from(compiled.to_a, options)) + ) + end + + # Check that we can successfully disasm the compiled instruction sequence. + def assert_disasms(source, options) + compiled = RubyVM::InstructionSequence.compile(source, **options) + yarv = YARV::InstructionSequence.from(compiled.to_a, options) + assert_kind_of String, yarv.disasm + end + + def assert_evaluates(expected, source) + assert_equal expected, YARV.compile(source).eval + end + end +end diff --git a/test/fixtures/assoc_splat.rb b/test/fixtures/assoc_splat.rb index 2182c2ed..8b595ce9 100644 --- a/test/fixtures/assoc_splat.rb +++ b/test/fixtures/assoc_splat.rb @@ -12,3 +12,7 @@ } - { **foo } +% # >= 3.2.0 +def foo(**) + bar(**) +end diff --git a/test/fixtures/if.rb b/test/fixtures/if.rb index cfd6a882..b25386b9 100644 --- a/test/fixtures/if.rb +++ b/test/fixtures/if.rb @@ -67,3 +67,10 @@ if true # comment1 # comment2 end +% +result = + if false && val = 1 + "A" + else + "B" + end diff --git a/test/fixtures/rassign.rb b/test/fixtures/rassign.rb index 3db52b18..3d357351 100644 --- a/test/fixtures/rassign.rb +++ b/test/fixtures/rassign.rb @@ -23,3 +23,9 @@ % a in Integer b => [Integer => c] +% +case [0] +when 0 + { a: 0 } => { a: } + puts a +end diff --git a/test/formatting_test.rb b/test/formatting_test.rb index eff7ef71..37ca29e1 100644 --- a/test/formatting_test.rb +++ b/test/formatting_test.rb @@ -27,5 +27,37 @@ def test_stree_ignore assert_equal(source, SyntaxTree.format(source)) end + + def test_formatting_with_different_indentation_level + source = <<~SOURCE + def foo + puts "a" + end + SOURCE + + # Default indentation + assert_equal(source, SyntaxTree.format(source)) + + # Level 2 + assert_equal(<<-EXPECTED.chomp, SyntaxTree.format(source, 80, 2).rstrip) + def foo + puts "a" + end + EXPECTED + + # Level 4 + assert_equal(<<-EXPECTED.chomp, SyntaxTree.format(source, 80, 4).rstrip) + def foo + puts "a" + end + EXPECTED + + # Level 6 + assert_equal(<<-EXPECTED.chomp, SyntaxTree.format(source, 80, 6).rstrip) + def foo + puts "a" + end + EXPECTED + end end end diff --git a/test/node_test.rb b/test/node_test.rb index 15826be0..3d700e73 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -268,7 +268,7 @@ def test_bodystmt end SOURCE - at = location(lines: 9..9, chars: 5..64) + at = location(lines: 2..9, chars: 5..64) assert_node(BodyStmt, source, at: at, &:bodystmt) end diff --git a/test/yarv_test.rb b/test/yarv_test.rb new file mode 100644 index 00000000..f8e0ffdb --- /dev/null +++ b/test/yarv_test.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" +require_relative "test_helper" + +module SyntaxTree + class YARVTest < Minitest::Test + CASES = { + "0" => "break 0\n", + "1" => "break 1\n", + "2" => "break 2\n", + "1.0" => "break 1.0\n", + "1 + 2" => "break 1 + 2\n", + "1 - 2" => "break 1 - 2\n", + "1 * 2" => "break 1 * 2\n", + "1 / 2" => "break 1 / 2\n", + "1 % 2" => "break 1 % 2\n", + "1 < 2" => "break 1 < 2\n", + "1 <= 2" => "break 1 <= 2\n", + "1 > 2" => "break 1 > 2\n", + "1 >= 2" => "break 1 >= 2\n", + "1 == 2" => "break 1 == 2\n", + "1 != 2" => "break 1 != 2\n", + "1 & 2" => "break 1 & 2\n", + "1 | 2" => "break 1 | 2\n", + "1 << 2" => "break 1 << 2\n", + "1 >> 2" => "break 1.>>(2)\n", + "1 ** 2" => "break 1.**(2)\n", + "a = 1; a" => "a = 1\nbreak a\n" + }.freeze + + CASES.each do |source, expected| + define_method("test_disassemble_#{source}") do + assert_decompiles(expected, source) + end + end + + def test_bf + hello_world = + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]" \ + ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." + + iseq = YARV::Bf.new(hello_world).compile + Formatter.format(hello_world, YARV::Decompiler.new(iseq).to_ruby) + end + + private + + def assert_decompiles(expected, source) + ruby = YARV::Decompiler.new(YARV.compile(source)).to_ruby + actual = Formatter.format(source, ruby) + assert_equal expected, actual + end + end +end