diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 1a2c45cc..00000000 --- a/.gitmodules +++ /dev/null @@ -1,9 +0,0 @@ -[submodule "mspec"] - path = spec/mspec - url = git@github.com:ruby/mspec.git -[submodule "spec"] - path = spec/ruby - url = git@github.com:ruby/spec.git -[submodule "test/ruby-syntax-fixtures"] - path = test/ruby-syntax-fixtures - url = https://github.com/ruby-syntax-tree/ruby-syntax-fixtures diff --git a/.rubocop.yml b/.rubocop.yml index bc98a43a..e74cdc1b 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -8,7 +8,6 @@ AllCops: TargetRubyVersion: 2.7 Exclude: - '{.git,.github,bin,coverage,pkg,spec,test/fixtures,vendor,tmp}/**/*' - - test/ruby-syntax-fixtures/**/* - test.rb Gemspec/DevelopmentDependencies: @@ -29,6 +28,9 @@ Lint/AmbiguousRange: Lint/BooleanSymbol: Enabled: false +Lint/Debugger: + Enabled: false + Lint/DuplicateBranch: Enabled: false @@ -80,6 +82,9 @@ Security/Eval: Style/AccessorGrouping: Enabled: false +Style/Alias: + Enabled: false + Style/CaseEquality: Enabled: false @@ -89,6 +94,9 @@ Style/CaseLikeIf: Style/ClassVars: Enabled: false +Style/CombinableLoops: + Enabled: false + Style/DocumentDynamicEvalDefinition: Enabled: false @@ -110,6 +118,9 @@ Style/FormatStringToken: Style/GuardClause: Enabled: false +Style/HashLikeCase: + Enabled: false + Style/IdenticalConditionalBranches: Enabled: false diff --git a/CHANGELOG.md b/CHANGELOG.md index c39bed36..34c40e40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,68 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +## [6.0.0] - 2023-02-10 + +### Added + +- `SyntaxTree::BasicVisitor::visit_methods` has been added to allow you to check multiple visit methods inside of a block. There _was_ a method called `visit_methods` previously, but it was undocumented because it was meant as a private API. That method has been renamed to `valid_visit_methods`. +- `rake sorbet:rbi` has been added as a task within the repository to generate an RBI file corresponding to the nodes in the tree. This can be used to help aid consumers of Syntax Tree that are using Sorbet. +- `SyntaxTree::Reflection` has been added to allow you to get information about the nodes in the tree. It is not required by default, since it takes a small amount of time to parse `node.rb` and get all of the information. +- `SyntaxTree::Node#to_mermaid` has been added to allow you to generate a Mermaid diagram of the node and its children. This is useful for debugging and understanding the structure of the tree. +- `SyntaxTree::Translation` has been added as an experimental API to transform the Syntax Tree syntax tree into the syntax trees represented by the whitequark/parser and rubocop/rubocop-ast gems. + - `SyntaxTree::Translation.to_parser(node, buffer)` will return a `Parser::AST::Node` object. + - `SyntaxTree::Translation.to_rubocop_ast(node, buffer)` will return a `RuboCop::AST::Node` object. +- `SyntaxTree::index` and `SyntaxTree::index_file` have been added to allow you to get a list of all of the classes, modules, and methods defined in a given source string or file. +- Various convenience methods have been added: + - `SyntaxTree::format_file` - which calls format with the result of reading the file + - `SyntaxTree::format_node` - which formats the node directly + - `SyntaxTree::parse_file` - which calls parse with the result of reading the file + - `SyntaxTree::search_file` - which calls search with the result of reading the file + - `SyntaxTree::Node#start_char` - which is the same as calling `node.location.start_char` + - `SyntaxTree::Node#end_char` - which is the same as calling `node.location.end_char` +- `SyntaxTree::Assoc` nodes can now be formatted on their own without a parent hash node. +- `SyntaxTree::BlockVar#arg0?` has been added to check if a single required block parameter is present and would potentially be expanded. +- More experimental APIs have been added to the `SyntaxTree::YARV` module, including: + - `SyntaxTree::YARV::ControlFlowGraph` + - `SyntaxTree::YARV::DataFlowGraph` + - `SyntaxTree::YARV::SeaOfNodes` + +### Changed + +#### Major changes + +- *BREAKING* Updates to `WithEnvironment`: + - The `WithEnvironment` module has been renamed to `WithScope`. + - The `current_environment` method has been renamed to `current_scope`. + - The `with_current_environment` method has been removed. + - Previously scopes were always able to look up the tree, as in: `a = 1; def foo; a = 2; end` would see only a single `a` variable. That has been corrected. + - Previously accessing variables from inside of blocks that were not shadowed would mark them as being local to the block only. This has been correct. +- *BREAKING* Lots of constants moved out of `SyntaxTree::Visitor` to just `SyntaxTree`: + * `SyntaxTree::Visitor::FieldVisitor` is now `SyntaxTree::FieldVisitor` + * `SyntaxTree::Visitor::JSONVisitor` is now `SyntaxTree::JSONVisitor` + * `SyntaxTree::Visitor::MatchVisitor` is now `SyntaxTree::MatchVisitor` + * `SyntaxTree::Visitor::MutationVisitor` is now `SyntaxTree::MutationVisitor` + * `SyntaxTree::Visitor::PrettyPrintVisitor` is now `SyntaxTree::PrettyPrintVisitor` +- *BREAKING* Lots of constants are now autoloaded instead of required by default. This is only particularly relevant if you are in a forking environment and want to preload constants before forking for better memory usage with copy-on-write. +- *BREAKING* The `SyntaxTree::Statements#initialize` method no longer accepts a parser as the first argument. It now mirrors the other nodes in that it accepts its children and location. As a result, Syntax Tree nodes are now marshalable (and therefore can be sent over DRb). Previously the `Statements` node was not able to be marshaled because it held a reference to the parser. + +#### Minor changes + +- Many places where embedded documents (`=begin` to `=end`) were being treated as real comments have been fixed for formatting. +- Dynamic symbols in keyword pattern matching now have better formatting. +- Endless method definitions used to have a `SyntaxTree::BodyStmt` node that had any kind of node as its `statements` field. That has been corrected to be more consistent such that now going from `def_node.bodystmt.statements` always returns a `SyntaxTree::Statements` node, which is more consistent. +- We no longer assume that `fiddle` is able to be required, and only require it when it is actually needed. + +#### Tiny changes + +- Empty parameter nodes within blocks now have more accurate location information. +- Pinned variables have more correct location information now. (Previously the location was just around the variable itself, but it now includes the pin.) +- Array patterns in pattern matching now have more accurate location information when they are using parentheses with a constant present. +- Find patterns in pattern matching now have more correct location information for their `left` and `right` fields. +- Lots of nodes have more correct types in the comments on their attributes. +- The expressions `break foo.bar :baz do |qux| qux end` and `next fun foo do end` now correctly parses as a control-flow statement with a method call that has a block attached, as opposed to a control-flow statement with a block attached. +- The expression `self::a, b = 1, 2` would previously yield a `SyntaxTree::ConstPathField` node for the first element of the left-hand-side of the multiple assignment. Semantically this is incorrect, and we have fixed this to now be a `SyntaxTree::Field` node instead. + ## [5.3.0] - 2023-01-26 ### Added @@ -497,7 +559,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - šŸŽ‰ Initial release! šŸŽ‰ -[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.3.0...HEAD +[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.0...HEAD +[6.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.3.0...v6.0.0 [5.3.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.2.0...v5.3.0 [5.2.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.1.0...v5.2.0 [5.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...v5.1.0 diff --git a/Gemfile.lock b/Gemfile.lock index 799bd891..325d89b3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - syntax_tree (5.3.0) + syntax_tree (6.0.0) prettier_print (>= 1.2.0) GEM @@ -19,7 +19,7 @@ GEM rake (13.0.6) regexp_parser (2.6.2) rexml (3.2.5) - rubocop (1.44.1) + rubocop (1.45.1) json (~> 2.3) parallel (~> 1.10) parser (>= 3.2.0.0) diff --git a/README.md b/README.md index 3c437947..500d5fad 100644 --- a/README.md +++ b/README.md @@ -40,9 +40,10 @@ It is built with only standard library dependencies. It additionally ships with - [construct_keys](#construct_keys) - [Visitor](#visitor) - [visit_method](#visit_method) + - [visit_methods](#visit_methods) - [BasicVisitor](#basicvisitor) - [MutationVisitor](#mutationvisitor) - - [WithEnvironment](#withenvironment) + - [WithScope](#withscope) - [Language server](#language-server) - [textDocument/formatting](#textdocumentformatting) - [textDocument/inlayHint](#textdocumentinlayhint) @@ -340,7 +341,7 @@ This function takes an input string containing Ruby code, parses it into its und ### SyntaxTree.mutation(&block) -This function yields a new mutation visitor to the block, and then returns the initialized visitor. It's effectively a shortcut for creating a `SyntaxTree::Visitor::MutationVisitor` without having to remember the class name. For more information on that visitor, see the definition below. +This function yields a new mutation visitor to the block, and then returns the initialized visitor. It's effectively a shortcut for creating a `SyntaxTree::MutationVisitor` without having to remember the class name. For more information on that visitor, see the definition below. ### SyntaxTree.search(source, query, &block) @@ -517,6 +518,26 @@ Did you mean? visit_binary from bin/console:8:in `
' ``` +### visit_methods + +Similar to `visit_method`, `visit_methods` also checks that methods defined are valid visit methods. This variation however accepts a block and checks that all methods defined within that block are valid visit methods. It's meant to be used like: + +```ruby +class ArithmeticVisitor < SyntaxTree::Visitor + visit_methods do + def visit_binary(node) + # ... + end + + def visit_int(node) + # ... + end + end +end +``` + +This is only checked when the methods are defined and does not impose any kind of runtime overhead after that. It is very useful for upgrading versions of Syntax Tree in case these methods names change. + ### BasicVisitor When you're defining your own visitor, by default it will walk down the tree even if you don't define `visit_*` methods. This is to ensure you can define a subset of the necessary methods in order to only interact with the nodes you're interested in. If you'd like to change this default to instead raise an error if you visit a node you haven't explicitly handled, you can instead inherit from `BasicVisitor`. @@ -537,7 +558,7 @@ The `MutationVisitor` is a visitor that can be used to mutate the tree. It works ```ruby # Create a new visitor -visitor = SyntaxTree::Visitor::MutationVisitor.new +visitor = SyntaxTree::MutationVisitor.new # Specify that it should mutate If nodes with assignments in their predicates visitor.mutate("IfNode[predicate: Assign | OpAssign]") do |node| @@ -567,20 +588,18 @@ SyntaxTree::Formatter.format(source, program.accept(visitor)) # => "if (a = 1)\nend\n" ``` -### WithEnvironment +### WithScope -The `WithEnvironment` module can be included in visitors to automatically keep track of local variables and arguments -defined inside each environment. A `current_environment` accessor is made available to the request, allowing it to find -all usages and definitions of a local. +The `WithScope` module can be included in visitors to automatically keep track of local variables and arguments defined inside each scope. A `current_scope` accessor is made available to the request, allowing it to find all usages and definitions of a local. ```ruby class MyVisitor < Visitor - include WithEnvironment + prepend WithScope def visit_ident(node) # find_local will return a Local for any local variables or arguments # present in the current environment or nil if the identifier is not a local - local = current_environment.find_local(node) + local = current_scope.find_local(node) puts local.type # the type of the local (:variable or :argument) puts local.definitions # the array of locations where this local is defined diff --git a/Rakefile b/Rakefile index f06d8cf8..fb4f8847 100644 --- a/Rakefile +++ b/Rakefile @@ -4,6 +4,8 @@ require "bundler/gem_tasks" require "rake/testtask" require "syntax_tree/rake_tasks" +Rake.add_rakelib "tasks" + Rake::TestTask.new(:test) do |t| t.libs << "test" t.libs << "lib" @@ -14,7 +16,16 @@ task default: :test configure = ->(task) do task.source_files = - FileList[%w[Gemfile Rakefile syntax_tree.gemspec lib/**/*.rb test/*.rb]] + FileList[ + %w[ + Gemfile + Rakefile + syntax_tree.gemspec + lib/**/*.rb + tasks/*.rake + test/*.rb + ] + ] # Since Syntax Tree supports back to Ruby 2.7.0, we need to make sure that we # format our code such that it's compatible with that version. This actually @@ -26,10 +37,3 @@ end SyntaxTree::Rake::CheckTask.new(&configure) SyntaxTree::Rake::WriteTask.new(&configure) - -desc "Run mspec tests using YARV emulation" -task :spec do - Dir["./spec/ruby/language/**/*_spec.rb"].each do |filepath| - sh "exe/yarv ./spec/mspec/bin/mspec-tag #{filepath}" - end -end diff --git a/bin/console b/bin/console index 1c18bd62..6f35f1ec 100755 --- a/bin/console +++ b/bin/console @@ -3,6 +3,7 @@ require "bundler/setup" require "syntax_tree" +require "syntax_tree/reflection" require "irb" IRB.start(__FILE__) diff --git a/bin/whitequark b/bin/whitequark new file mode 100755 index 00000000..121bcd53 --- /dev/null +++ b/bin/whitequark @@ -0,0 +1,79 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "parser/current" + +$:.unshift(File.expand_path("../lib", __dir__)) +require "syntax_tree" + +# First, opt in to every AST feature. +Parser::Builders::Default.modernize + +# Modify the source map == check so that it doesn't check against the node +# itself so we don't get into a recursive loop. +Parser::Source::Map.prepend( + Module.new { + def ==(other) + self.class == other.class && + (instance_variables - %i[@node]).map do |ivar| + instance_variable_get(ivar) == other.instance_variable_get(ivar) + end.reduce(:&) + end + } +) + +# Next, ensure that we're comparing the nodes and also comparing the source +# ranges so that we're getting all of the necessary information. +Parser::AST::Node.prepend( + Module.new { + def ==(other) + super && (location == other.location) + end + } +) + +source = ARGF.read + +parser = Parser::CurrentRuby.new +parser.diagnostics.all_errors_are_fatal = true + +buffer = Parser::Source::Buffer.new("(string)", 1) +buffer.source = source.dup.force_encoding(parser.default_encoding) + +stree = SyntaxTree::Translation.to_parser(SyntaxTree.parse(source), buffer) +ptree = parser.parse(buffer) + +if stree == ptree + puts "Syntax trees are equivalent." +elsif stree.inspect == ptree.inspect + warn "Syntax tree locations are different." + + queue = [[stree, ptree]] + while (left, right = queue.shift) + if left.location != right.location + warn "Different node:" + pp left + + warn "Different location:" + + warn "Syntax Tree:" + pp left.location + + warn "whitequark/parser:" + pp right.location + + exit + end + + left.children.zip(right.children).each do |left_child, right_child| + queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node) + end + end +else + warn "Syntax Tree:" + pp stree + + warn "whitequark/parser:" + pp ptree +end diff --git a/doc/changing_structure.md b/doc/changing_structure.md new file mode 100644 index 00000000..74012f26 --- /dev/null +++ b/doc/changing_structure.md @@ -0,0 +1,16 @@ +# Changing structure + +First and foremost, changing the structure of the tree in any way is a major breaking change. It forces the consumers to update their visitors, pattern matches, and method calls. It should not be taking lightly, and can only happen on a major version change. So keep that in mind. + +That said, if you do want to change the structure of the tree, there are a few steps that you have to take. They are enumerated below. + +1. Change the structure in the required node classes. This could mean adding/removing classes or adding/removing fields. Be sure to also update the `copy` and `===` methods to be sure that they are correct. +2. Update the parser to correctly create the new structure. +3. Update any visitor methods that are affected by the change. For example, if adding a new node make sure to create the new visit method alias in the `Visitor` class. +4. Update the `FieldVisitor` class to be sure that the various serializers, pretty printers, and matchers all get updated accordingly. +5. Update the `DSL` module to be sure that folks can correctly create nodes with the new structure. +6. Ensure the formatting of the code hasn't changed. This can mostly be done by running the tests, but if there's a corner case that we don't cover that is now exposed by your change be sure to add test cases. +7. Update the translation visitors to ensure we're still translating into other ASTs correctly. +8. Update the YARV compiler visitor to ensure we're still compiling correctly. +9. Make sure we aren't referencing the previous structure in any documentation or tests. +10. Be sure to update `CHANGELOG.md` with a description of the change that you made. diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index f5c71aba..4e183383 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,50 +1,41 @@ # frozen_string_literal: true -require "etc" -require "fiddle" -require "json" -require "pp" require "prettier_print" require "ripper" -require "stringio" -require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" -require_relative "syntax_tree/dsl" -require_relative "syntax_tree/version" - require_relative "syntax_tree/basic_visitor" require_relative "syntax_tree/visitor" -require_relative "syntax_tree/visitor/field_visitor" -require_relative "syntax_tree/visitor/json_visitor" -require_relative "syntax_tree/visitor/match_visitor" -require_relative "syntax_tree/visitor/mutation_visitor" -require_relative "syntax_tree/visitor/pretty_print_visitor" -require_relative "syntax_tree/visitor/environment" -require_relative "syntax_tree/visitor/with_environment" +require_relative "syntax_tree/formatter" require_relative "syntax_tree/parser" -require_relative "syntax_tree/pattern" -require_relative "syntax_tree/search" -require_relative "syntax_tree/index" - -require_relative "syntax_tree/yarv" -require_relative "syntax_tree/yarv/bf" -require_relative "syntax_tree/yarv/compiler" -require_relative "syntax_tree/yarv/decompiler" -require_relative "syntax_tree/yarv/disassembler" -require_relative "syntax_tree/yarv/instruction_sequence" -require_relative "syntax_tree/yarv/instructions" -require_relative "syntax_tree/yarv/legacy" -require_relative "syntax_tree/yarv/local_table" -require_relative "syntax_tree/yarv/assembler" -require_relative "syntax_tree/yarv/vm" +require_relative "syntax_tree/version" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to # build formatters, linters, language servers, and more. module SyntaxTree + # Syntax Tree the library has many features that aren't always used by the + # CLI. Requiring those features takes time, so we autoload as many constants + # as possible in order to keep the CLI as fast as possible. + + autoload :DSL, "syntax_tree/dsl" + autoload :FieldVisitor, "syntax_tree/field_visitor" + autoload :Index, "syntax_tree/index" + autoload :JSONVisitor, "syntax_tree/json_visitor" + autoload :LanguageServer, "syntax_tree/language_server" + autoload :MatchVisitor, "syntax_tree/match_visitor" + autoload :Mermaid, "syntax_tree/mermaid" + autoload :MermaidVisitor, "syntax_tree/mermaid_visitor" + autoload :MutationVisitor, "syntax_tree/mutation_visitor" + autoload :Pattern, "syntax_tree/pattern" + autoload :PrettyPrintVisitor, "syntax_tree/pretty_print_visitor" + autoload :Search, "syntax_tree/search" + autoload :Translation, "syntax_tree/translation" + autoload :WithScope, "syntax_tree/with_scope" + autoload :YARV, "syntax_tree/yarv" + # This holds references to objects that respond to both #parse and #format # so that we can use them in the CLI. HANDLERS = {} @@ -63,40 +54,80 @@ module SyntaxTree # that Syntax Tree can format arbitrary parts of a document. DEFAULT_INDENTATION = 0 - # This is a hook provided so that plugins can register themselves as the - # handler for a particular file type. - def self.register_handler(extension, handler) - HANDLERS[extension] = handler + # Parses the given source and returns the formatted source. + def self.format( + source, + maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, + options: Formatter::Options.new + ) + format_node( + source, + parse(source), + maxwidth, + base_indentation, + options: options + ) end - # Parses the given source and returns the syntax tree. - def self.parse(source) - parser = Parser.new(source) - response = parser.parse - response unless parser.error? + # Parses the given file and returns the formatted source. + def self.format_file( + filepath, + maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, + options: Formatter::Options.new + ) + format(read(filepath), maxwidth, base_indentation, options: options) end - # Parses the given source and returns the formatted source. - def self.format( + # Accepts a node in the tree and returns the formatted source. + def self.format_node( source, + node, maxwidth = DEFAULT_PRINT_WIDTH, base_indentation = DEFAULT_INDENTATION, options: Formatter::Options.new ) formatter = Formatter.new(source, [], maxwidth, options: options) - parse(source).format(formatter) + node.format(formatter) formatter.flush(base_indentation) formatter.output.join end + # Indexes the given source code to return a list of all class, module, and + # method definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index(source) + Index.index(source) + end + + # Indexes the given file to return a list of all class, module, and method + # definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index_file(filepath) + Index.index_file(filepath) + end + # A convenience method for creating a new mutation visitor. def self.mutation - visitor = Visitor::MutationVisitor.new + visitor = MutationVisitor.new yield visitor visitor end + # Parses the given source and returns the syntax tree. + def self.parse(source) + parser = Parser.new(source) + response = parser.parse + response unless parser.error? + end + + # Parses the given file and returns the syntax tree. + def self.parse_file(filepath) + parse(read(filepath)) + end + # Returns the source from the given filepath taking into account any potential # magic encoding comments. def self.read(filepath) @@ -112,23 +143,24 @@ def self.read(filepath) File.read(filepath, encoding: encoding) end + # This is a hook provided so that plugins can register themselves as the + # handler for a particular file type. + def self.register_handler(extension, handler) + HANDLERS[extension] = handler + end + # Searches through the given source using the given pattern and yields each # node in the tree that matches the pattern to the given block. def self.search(source, query, &block) - Search.new(Pattern.new(query).compile).scan(parse(source), &block) - end + pattern = Pattern.new(query).compile + program = parse(source) - # Indexes the given source code to return a list of all class, module, and - # method definitions. Used to quickly provide indexing capability for IDEs or - # documentation generation. - def self.index(source) - Index.index(source) + Search.new(pattern).scan(program, &block) end - # Indexes the given file to return a list of all class, module, and method - # definitions. Used to quickly provide indexing capability for IDEs or - # documentation generation. - def self.index_file(filepath) - Index.index_file(filepath) + # Searches through the given file using the given pattern and yields each + # node in the tree that matches the pattern to the given block. + def self.search_file(filepath, query, &block) + search(read(filepath), query, &block) end end diff --git a/lib/syntax_tree/basic_visitor.rb b/lib/syntax_tree/basic_visitor.rb index 34b7876e..bd8ea5f2 100644 --- a/lib/syntax_tree/basic_visitor.rb +++ b/lib/syntax_tree/basic_visitor.rb @@ -29,7 +29,7 @@ def initialize(error) def corrections @corrections ||= DidYouMean::SpellChecker.new( - dictionary: Visitor.visit_methods + dictionary: BasicVisitor.valid_visit_methods ).correct(visit_method) end @@ -40,7 +40,40 @@ def corrections end end + # This module is responsible for checking all of the methods defined within + # a given block to ensure that they are valid visit methods. + class VisitMethodsChecker < Module + Status = Struct.new(:checking) + + # This is the status of the checker. It's used to determine whether or not + # we should be checking the methods that are defined. It is kept as an + # instance variable so that it can be disabled later. + attr_reader :status + + def initialize + # We need the status to be an instance variable so that it can be + # accessed by the disable! method, but also a local variable so that it + # can be captured by the define_method block. + status = @status = Status.new(true) + + define_method(:method_added) do |name| + BasicVisitor.visit_method(name) if status.checking + super(name) + end + end + + def disable! + status.checking = false + end + end + class << self + # This is the list of all of the valid visit methods. + def valid_visit_methods + @valid_visit_methods ||= + Visitor.instance_methods.grep(/^visit_(?!child_nodes)/) + end + # This method is here to help folks write visitors. # # It's not always easy to ensure you're writing the correct method name in @@ -51,15 +84,21 @@ class << self # name. It will raise an error if the visit method you're defining isn't # actually a method on the parent visitor. def visit_method(method_name) - return if visit_methods.include?(method_name) + return if valid_visit_methods.include?(method_name) raise VisitMethodError, method_name end - # This is the list of all of the valid visit methods. + # This method is here to help folks write visitors. + # + # Within the given block, every method that is defined will be checked to + # ensure it's a valid visit method using the BasicVisitor::visit_method + # method defined above. def visit_methods - @visit_methods ||= - Visitor.instance_methods.grep(/^visit_(?!child_nodes)/) + checker = VisitMethodsChecker.new + extend(checker) + yield + checker.disable! end end diff --git a/lib/syntax_tree/cli.rb b/lib/syntax_tree/cli.rb index 7e6f4067..cbe10446 100644 --- a/lib/syntax_tree/cli.rb +++ b/lib/syntax_tree/cli.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "etc" require "optparse" module SyntaxTree @@ -238,7 +239,7 @@ def run(item) # representation. class Json < Action def run(item) - object = Visitor::JSONVisitor.new.visit(item.handler.parse(item.source)) + object = item.handler.parse(item.source).accept(JSONVisitor.new) puts JSON.pretty_generate(object) end end @@ -501,7 +502,6 @@ def run(argv) when "j", "json" Json.new(options) when "lsp" - require "syntax_tree/language_server" LanguageServer.new(print_width: options.print_width).run return 0 when "m", "match" diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb index 860a1fe5..4506aa04 100644 --- a/lib/syntax_tree/dsl.rb +++ b/lib/syntax_tree/dsl.rb @@ -210,12 +210,17 @@ def RAssign(value, operator, pattern) end # Create a new ClassDeclaration node. - def ClassDeclaration(constant, superclass, bodystmt) + def ClassDeclaration( + constant, + superclass, + bodystmt, + location = Location.default + ) ClassDeclaration.new( constant: constant, superclass: superclass, bodystmt: bodystmt, - location: Location.default + location: location ) end @@ -225,12 +230,12 @@ def Comma(value) end # Create a new Command node. - def Command(message, arguments, block) + def Command(message, arguments, block, location = Location.default) Command.new( message: message, arguments: arguments, block: block, - location: Location.default + location: location ) end @@ -247,8 +252,8 @@ def CommandCall(receiver, operator, message, arguments, block) end # Create a new Comment node. - def Comment(value, inline) - Comment.new(value: value, inline: inline, location: Location.default) + def Comment(value, inline, location = Location.default) + Comment.new(value: value, inline: inline, location: location) end # Create a new Const node. @@ -285,14 +290,21 @@ def CVar(value) end # Create a new DefNode node. - def DefNode(target, operator, name, params, bodystmt) + def DefNode( + target, + operator, + name, + params, + bodystmt, + location = Location.default + ) DefNode.new( target: target, operator: operator, name: name, params: params, bodystmt: bodystmt, - location: Location.default + location: location ) end @@ -565,8 +577,8 @@ def MAssign(target, value) end # Create a new MethodAddBlock node. - def MethodAddBlock(call, block) - MethodAddBlock.new(call: call, block: block, location: Location.default) + def MethodAddBlock(call, block, location = Location.default) + MethodAddBlock.new(call: call, block: block, location: location) end # Create a new MLHS node. @@ -779,7 +791,7 @@ def SClass(target, bodystmt) # Create a new Statements node. def Statements(body) - Statements.new(nil, body: body, location: Location.default) + Statements.new(body: body, location: Location.default) end # Create a new StringContent node. diff --git a/lib/syntax_tree/visitor/field_visitor.rb b/lib/syntax_tree/field_visitor.rb similarity index 91% rename from lib/syntax_tree/visitor/field_visitor.rb rename to lib/syntax_tree/field_visitor.rb index 6e643e09..ca1df55b 100644 --- a/lib/syntax_tree/visitor/field_visitor.rb +++ b/lib/syntax_tree/field_visitor.rb @@ -1,55 +1,54 @@ # frozen_string_literal: true module SyntaxTree - class Visitor - # This is the parent class of a lot of built-in visitors for Syntax Tree. It - # reflects visiting each of the fields on every node in turn. It itself does - # not do anything with these fields, it leaves that behavior up to the - # subclass to implement. - # - # In order to properly use this class, you will need to subclass it and - # implement #comments, #field, #list, #node, #pairs, and #text. Those are - # documented here. - # - # == comments(node) - # - # This accepts the node that is being visited and does something depending - # on the comments attached to the node. - # - # == field(name, value) - # - # This accepts the name of the field being visited as a string (like - # "value") and the actual value of that field. The value can be a subclass - # of Node or any other type that can be held within the tree. - # - # == list(name, values) - # - # This accepts the name of the field being visited as well as a list of - # values. This is used, for example, when visiting something like the body - # of a Statements node. - # - # == node(name, node) - # - # This is the parent serialization method for each node. It is called with - # the node itself, as well as the type of the node as a string. The type - # is an internally used value that usually resembles the name of the - # ripper event that generated the node. The method should yield to the - # given block which then calls through to visit each of the fields on the - # node. - # - # == text(name, value) - # - # This accepts the name of the field being visited as well as a string - # value representing the value of the field. - # - # == pairs(name, values) - # - # This accepts the name of the field being visited as well as a list of - # pairs that represent the value of the field. It is used only in a couple - # of circumstances, like when visiting the list of optional parameters - # defined on a method. - # - class FieldVisitor < BasicVisitor + # This is the parent class of a lot of built-in visitors for Syntax Tree. It + # reflects visiting each of the fields on every node in turn. It itself does + # not do anything with these fields, it leaves that behavior up to the + # subclass to implement. + # + # In order to properly use this class, you will need to subclass it and + # implement #comments, #field, #list, #node, #pairs, and #text. Those are + # documented here. + # + # == comments(node) + # + # This accepts the node that is being visited and does something depending on + # the comments attached to the node. + # + # == field(name, value) + # + # This accepts the name of the field being visited as a string (like "value") + # and the actual value of that field. The value can be a subclass of Node or + # any other type that can be held within the tree. + # + # == list(name, values) + # + # This accepts the name of the field being visited as well as a list of + # values. This is used, for example, when visiting something like the body of + # a Statements node. + # + # == node(name, node) + # + # This is the parent serialization method for each node. It is called with the + # node itself, as well as the type of the node as a string. The type is an + # internally used value that usually resembles the name of the ripper event + # that generated the node. The method should yield to the given block which + # then calls through to visit each of the fields on the node. + # + # == text(name, value) + # + # This accepts the name of the field being visited as well as a string value + # representing the value of the field. + # + # == pairs(name, values) + # + # This accepts the name of the field being visited as well as a list of pairs + # that represent the value of the field. It is used only in a couple of + # circumstances, like when visiting the list of optional parameters defined on + # a method. + # + class FieldVisitor < BasicVisitor + visit_methods do def visit_aref(node) node(node, "aref") do field("collection", node.collection) @@ -1017,14 +1016,14 @@ def visit_zsuper(node) def visit___end__(node) visit_token(node, "__end__") end + end - private + private - def visit_token(node, type) - node(node, type) do - field("value", node.value) - comments(node) - end + def visit_token(node, type) + node(node, type) do + field("value", node.value) + comments(node) end end end diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index c64cf7d1..60858bf2 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -138,7 +138,7 @@ def format(node, stackable: true) # going to just print out the node as it was seen in the source. doc = if last_leading&.ignore? - range = source[node.location.start_char...node.location.end_char] + range = source[node.start_char...node.end_char] first = true range.each_line(chomp: true) do |line| diff --git a/lib/syntax_tree/index.rb b/lib/syntax_tree/index.rb index 8b33f785..ab2460dd 100644 --- a/lib/syntax_tree/index.rb +++ b/lib/syntax_tree/index.rb @@ -257,74 +257,76 @@ def initialize @statements = nil end - def visit_class(node) - name = visit(node.constant).to_sym - location = - Location.new(node.location.start_line, node.location.start_column) - - results << ClassDefinition.new( - nesting.dup, - name, - location, - comments_for(node) - ) - - nesting << name - super - nesting.pop - end - - def visit_const_ref(node) - node.constant.value - end + visit_methods do + def visit_class(node) + name = visit(node.constant).to_sym + location = + Location.new(node.location.start_line, node.location.start_column) - def visit_def(node) - name = node.name.value.to_sym - location = - Location.new(node.location.start_line, node.location.start_column) - - results << if node.target.nil? - MethodDefinition.new( + results << ClassDefinition.new( nesting.dup, name, location, comments_for(node) ) - else - SingletonMethodDefinition.new( + + nesting << name + super + nesting.pop + end + + def visit_const_ref(node) + node.constant.value + end + + def visit_def(node) + name = node.name.value.to_sym + location = + Location.new(node.location.start_line, node.location.start_column) + + results << if node.target.nil? + MethodDefinition.new( + nesting.dup, + name, + location, + comments_for(node) + ) + else + SingletonMethodDefinition.new( + nesting.dup, + name, + location, + comments_for(node) + ) + end + end + + def visit_module(node) + name = visit(node.constant).to_sym + location = + Location.new(node.location.start_line, node.location.start_column) + + results << ModuleDefinition.new( nesting.dup, name, location, comments_for(node) ) - end - end - - def visit_module(node) - name = visit(node.constant).to_sym - location = - Location.new(node.location.start_line, node.location.start_column) - results << ModuleDefinition.new( - nesting.dup, - name, - location, - comments_for(node) - ) - - nesting << name - super - nesting.pop - end + nesting << name + super + nesting.pop + end - def visit_program(node) - super - results - end + def visit_program(node) + super + results + end - def visit_statements(node) - @statements = node - super + def visit_statements(node) + @statements = node + super + end end private diff --git a/lib/syntax_tree/json_visitor.rb b/lib/syntax_tree/json_visitor.rb new file mode 100644 index 00000000..7ad3fba0 --- /dev/null +++ b/lib/syntax_tree/json_visitor.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +require "json" + +module SyntaxTree + # This visitor transforms the AST into a hash that contains only primitives + # that can be easily serialized into JSON. + class JSONVisitor < FieldVisitor + attr_reader :target + + def initialize + @target = nil + end + + private + + def comments(node) + target[:comments] = visit_all(node.comments) + end + + def field(name, value) + target[name] = value.is_a?(Node) ? visit(value) : value + end + + def list(name, values) + target[name] = visit_all(values) + end + + def node(node, type) + previous = @target + @target = { type: type, location: visit_location(node.location) } + yield + @target + ensure + @target = previous + end + + def pairs(name, values) + target[name] = values.map { |(key, value)| [visit(key), visit(value)] } + end + + def text(name, value) + target[name] = value + end + + def visit_location(location) + [ + location.start_line, + location.start_char, + location.end_line, + location.end_char + ] + end + end +end diff --git a/lib/syntax_tree/language_server.rb b/lib/syntax_tree/language_server.rb index a7b23664..6ec81030 100644 --- a/lib/syntax_tree/language_server.rb +++ b/lib/syntax_tree/language_server.rb @@ -2,10 +2,9 @@ require "cgi" require "json" +require "pp" require "uri" -require_relative "language_server/inlay_hints" - module SyntaxTree # Syntax Tree additionally ships with a language server conforming to the # language server protocol. It can be invoked through the CLI by running: @@ -13,6 +12,162 @@ module SyntaxTree # stree lsp # class LanguageServer + # This class provides inlay hints for the language server. For more + # information, see the spec here: + # https://github.com/microsoft/language-server-protocol/issues/956. + class InlayHints < Visitor + # This represents a hint that is going to be displayed in the editor. + class Hint + attr_reader :line, :character, :label + + def initialize(line:, character:, label:) + @line = line + @character = character + @label = label + end + + # This is the shape that the LSP expects. + def to_json(*opts) + { + position: { + line: line, + character: character + }, + label: label + }.to_json(*opts) + end + end + + attr_reader :stack, :hints + + def initialize + @stack = [] + @hints = [] + end + + def visit(node) + stack << node + result = super + stack.pop + result + end + + visit_methods do + # Adds parentheses around assignments contained within the default + # values of parameters. For example, + # + # def foo(a = b = c) + # end + # + # becomes + # + # def foo(a = ā‚b = cā‚Ž) + # end + # + def visit_assign(node) + parentheses(node.location) if stack[-2].is_a?(Params) + super + end + + # Adds parentheses around binary expressions to make it clear which + # subexpression will be evaluated first. For example, + # + # a + b * c + # + # becomes + # + # a + ā‚b * cā‚Ž + # + def visit_binary(node) + case stack[-2] + when Assign, OpAssign + parentheses(node.location) + when Binary + parentheses(node.location) if stack[-2].operator != node.operator + end + + super + end + + # Adds parentheses around ternary operators contained within certain + # expressions where it could be confusing which subexpression will get + # evaluated first. For example, + # + # a ? b : c ? d : e + # + # becomes + # + # a ? b : ā‚c ? d : eā‚Ž + # + def visit_if_op(node) + case stack[-2] + when Assign, Binary, IfOp, OpAssign + parentheses(node.location) + end + + super + end + + # Adds the implicitly rescued StandardError into a bare rescue clause. + # For example, + # + # begin + # rescue + # end + # + # becomes + # + # begin + # rescue StandardError + # end + # + def visit_rescue(node) + if node.exception.nil? + hints << Hint.new( + line: node.location.start_line - 1, + character: node.location.start_column + "rescue".length, + label: " StandardError" + ) + end + + super + end + + # Adds parentheses around unary statements using the - operator that are + # contained within Binary nodes. For example, + # + # -a + b + # + # becomes + # + # ā‚-aā‚Ž + b + # + def visit_unary(node) + if stack[-2].is_a?(Binary) && (node.operator == "-") + parentheses(node.location) + end + + super + end + end + + private + + def parentheses(location) + hints << Hint.new( + line: location.start_line - 1, + character: location.start_column, + label: "ā‚" + ) + + hints << Hint.new( + line: location.end_line - 1, + character: location.end_column, + label: "ā‚Ž" + ) + end + end + # This is a small module that effectively mirrors pattern matching. We're # using it so that we can support truffleruby without having to ignore the # language server. diff --git a/lib/syntax_tree/language_server/inlay_hints.rb b/lib/syntax_tree/language_server/inlay_hints.rb deleted file mode 100644 index dfd63b8d..00000000 --- a/lib/syntax_tree/language_server/inlay_hints.rb +++ /dev/null @@ -1,159 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class LanguageServer - # This class provides inlay hints for the language server. For more - # information, see the spec here: - # https://github.com/microsoft/language-server-protocol/issues/956. - class InlayHints < Visitor - # This represents a hint that is going to be displayed in the editor. - class Hint - attr_reader :line, :character, :label - - def initialize(line:, character:, label:) - @line = line - @character = character - @label = label - end - - # This is the shape that the LSP expects. - def to_json(*opts) - { - position: { - line: line, - character: character - }, - label: label - }.to_json(*opts) - end - end - - attr_reader :stack, :hints - - def initialize - @stack = [] - @hints = [] - end - - def visit(node) - stack << node - result = super - stack.pop - result - end - - # Adds parentheses around assignments contained within the default values - # of parameters. For example, - # - # def foo(a = b = c) - # end - # - # becomes - # - # def foo(a = ā‚b = cā‚Ž) - # end - # - def visit_assign(node) - parentheses(node.location) if stack[-2].is_a?(Params) - super - end - - # Adds parentheses around binary expressions to make it clear which - # subexpression will be evaluated first. For example, - # - # a + b * c - # - # becomes - # - # a + ā‚b * cā‚Ž - # - def visit_binary(node) - case stack[-2] - when Assign, OpAssign - parentheses(node.location) - when Binary - parentheses(node.location) if stack[-2].operator != node.operator - end - - super - end - - # Adds parentheses around ternary operators contained within certain - # expressions where it could be confusing which subexpression will get - # evaluated first. For example, - # - # a ? b : c ? d : e - # - # becomes - # - # a ? b : ā‚c ? d : eā‚Ž - # - def visit_if_op(node) - case stack[-2] - when Assign, Binary, IfOp, OpAssign - parentheses(node.location) - end - - super - end - - # Adds the implicitly rescued StandardError into a bare rescue clause. For - # example, - # - # begin - # rescue - # end - # - # becomes - # - # begin - # rescue StandardError - # end - # - def visit_rescue(node) - if node.exception.nil? - hints << Hint.new( - line: node.location.start_line - 1, - character: node.location.start_column + "rescue".length, - label: " StandardError" - ) - end - - super - end - - # Adds parentheses around unary statements using the - operator that are - # contained within Binary nodes. For example, - # - # -a + b - # - # becomes - # - # ā‚-aā‚Ž + b - # - def visit_unary(node) - if stack[-2].is_a?(Binary) && (node.operator == "-") - parentheses(node.location) - end - - super - end - - private - - def parentheses(location) - hints << Hint.new( - line: location.start_line - 1, - character: location.start_column, - label: "ā‚" - ) - - hints << Hint.new( - line: location.end_line - 1, - character: location.end_column, - label: "ā‚Ž" - ) - end - end - end -end diff --git a/lib/syntax_tree/match_visitor.rb b/lib/syntax_tree/match_visitor.rb new file mode 100644 index 00000000..ca5bf234 --- /dev/null +++ b/lib/syntax_tree/match_visitor.rb @@ -0,0 +1,120 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor transforms the AST into a Ruby pattern matching expression that + # would match correctly against the AST. + class MatchVisitor < FieldVisitor + attr_reader :q + + def initialize(q) + @q = q + end + + def visit(node) + case node + when Node + super + when String + # pp will split up a string on newlines and concat them together using a + # "+" operator. This breaks the pattern matching expression. So instead + # we're going to check here for strings and manually put the entire + # value into the output buffer. + q.text(node.inspect) + else + node.pretty_print(q) + end + end + + private + + def comments(node) + return if node.comments.empty? + + q.nest(0) do + q.text("comments: [") + q.indent do + q.breakable("") + q.seplist(node.comments) { |comment| visit(comment) } + end + q.breakable("") + q.text("]") + end + end + + def field(name, value) + q.nest(0) do + q.text(name) + q.text(": ") + visit(value) + end + end + + def list(name, values) + q.group do + q.text(name) + q.text(": [") + q.indent do + q.breakable("") + q.seplist(values) { |value| visit(value) } + end + q.breakable("") + q.text("]") + end + end + + def node(node, _type) + items = [] + q.with_target(items) { yield } + + if items.empty? + q.text(node.class.name) + return + end + + q.group do + q.text(node.class.name) + q.text("[") + q.indent do + q.breakable("") + q.seplist(items) { |item| q.target << item } + end + q.breakable("") + q.text("]") + end + end + + def pairs(name, values) + q.group do + q.text(name) + q.text(": [") + q.indent do + q.breakable("") + q.seplist(values) do |(key, value)| + q.group do + q.text("[") + q.indent do + q.breakable("") + visit(key) + q.text(",") + q.breakable + visit(value || nil) + end + q.breakable("") + q.text("]") + end + end + end + q.breakable("") + q.text("]") + end + end + + def text(name, value) + q.nest(0) do + q.text(name) + q.text(": ") + value.pretty_print(q) + end + end + end +end diff --git a/lib/syntax_tree/mermaid.rb b/lib/syntax_tree/mermaid.rb new file mode 100644 index 00000000..68ea4734 --- /dev/null +++ b/lib/syntax_tree/mermaid.rb @@ -0,0 +1,177 @@ +# frozen_string_literal: true + +require "cgi" +require "stringio" + +module SyntaxTree + # This module is responsible for rendering mermaid (https://mermaid.js.org/) + # flow charts. + module Mermaid + # This is the main class that handles rendering a flowchart. It keeps track + # of its nodes and links and renders them according to the mermaid syntax. + class FlowChart + attr_reader :output, :prefix, :nodes, :links + + def initialize + @output = StringIO.new + @output.puts("flowchart TD") + @prefix = " " + + @nodes = {} + @links = [] + end + + # Retrieve a node that has already been added to the flowchart by its id. + def fetch(id) + nodes.fetch(id) + end + + # Add a link to the flowchart between two nodes with an optional label. + def link(from, to, label = nil, type: :directed, color: nil) + link = Link.new(from, to, label, type, color) + links << link + + output.puts("#{prefix}#{link.render}") + link + end + + # Add a node to the flowchart with an optional label. + def node(id, label = " ", shape: :rectangle) + node = Node.new(id, label, shape) + nodes[id] = node + + output.puts("#{prefix}#{nodes[id].render}") + node + end + + # Add a subgraph to the flowchart. Within the given block, all of the + # nodes will be rendered within the subgraph. + def subgraph(label) + output.puts("#{prefix}subgraph #{Mermaid.escape(label)}") + + previous = prefix + @prefix = "#{prefix} " + + begin + yield + ensure + @prefix = previous + output.puts("#{prefix}end") + end + end + + # Return the rendered flowchart. + def render + links.each_with_index do |link, index| + if link.color + output.puts("#{prefix}linkStyle #{index} stroke:#{link.color}") + end + end + + output.string + end + end + + # This class represents a link between two nodes in a flowchart. It is not + # meant to be interacted with directly, but rather used as a data structure + # by the FlowChart class. + class Link + TYPES = %i[directed dotted].freeze + COLORS = %i[green red].freeze + + attr_reader :from, :to, :label, :type, :color + + def initialize(from, to, label, type, color) + raise unless TYPES.include?(type) + raise if color && !COLORS.include?(color) + + @from = from + @to = to + @label = label + @type = type + @color = color + end + + def render + left_side, right_side, full_side = sides + + if label + escaped = Mermaid.escape(label) + "#{from.id} #{left_side} #{escaped} #{right_side} #{to.id}" + else + "#{from.id} #{full_side} #{to.id}" + end + end + + private + + def sides + case type + when :directed + %w[-- --> -->] + when :dotted + %w[-. .-> -.->] + end + end + end + + # This class represents a node in a flowchart. Unlike the Link class, it can + # be used directly. It is the return value of the #node method, and is meant + # to be passed around to #link methods to create links between nodes. + class Node + SHAPES = %i[circle rectangle rounded stadium].freeze + + attr_reader :id, :label, :shape + + def initialize(id, label, shape) + raise unless SHAPES.include?(shape) + + @id = id + @label = label + @shape = shape + end + + def render + left_bound, right_bound = bounds + "#{id}#{left_bound}#{Mermaid.escape(label)}#{right_bound}" + end + + private + + def bounds + case shape + when :circle + %w[(( ))] + when :rectangle + ["[", "]"] + when :rounded + %w[( )] + when :stadium + ["([", "])"] + end + end + end + + class << self + # Escape a label to be used in the mermaid syntax. This is used to escape + # HTML entities such that they render properly within the quotes. + def escape(label) + "\"#{CGI.escapeHTML(label)}\"" + end + + # Create a new flowchart. If a block is given, it will be yielded to and + # the flowchart will be rendered. Otherwise, the flowchart will be + # returned. + def flowchart + flowchart = FlowChart.new + + if block_given? + yield flowchart + flowchart.render + else + flowchart + end + end + end + end +end diff --git a/lib/syntax_tree/mermaid_visitor.rb b/lib/syntax_tree/mermaid_visitor.rb new file mode 100644 index 00000000..fc9f6706 --- /dev/null +++ b/lib/syntax_tree/mermaid_visitor.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor transforms the AST into a mermaid flow chart. + class MermaidVisitor < FieldVisitor + attr_reader :flowchart, :target + + def initialize + @flowchart = Mermaid.flowchart + @target = nil + end + + def visit_program(node) + super + flowchart.render + end + + private + + def comments(node) + # Ignore + end + + def field(name, value) + case value + when nil + # skip + when Node + flowchart.link(target, visit(value), name) + else + to = + flowchart.node("#{target.id}_#{name}", value.inspect, shape: :stadium) + flowchart.link(target, to, name) + end + end + + def list(name, values) + values.each_with_index do |value, index| + field("#{name}[#{index}]", value) + end + end + + def node(node, type) + previous_target = target + + begin + @target = flowchart.node("node_#{node.object_id}", type) + yield + @target + ensure + @target = previous_target + end + end + + def pairs(name, values) + values.each_with_index do |(key, value), index| + to = flowchart.node("#{target.id}_#{name}_#{index}", shape: :circle) + + flowchart.link(target, to, "#{name}[#{index}]") + flowchart.link(to, visit(key), "[0]") + flowchart.link(to, visit(value), "[1]") if value + end + end + + def text(name, value) + field(name, value) + end + end +end diff --git a/lib/syntax_tree/visitor/mutation_visitor.rb b/lib/syntax_tree/mutation_visitor.rb similarity index 94% rename from lib/syntax_tree/visitor/mutation_visitor.rb rename to lib/syntax_tree/mutation_visitor.rb index 65f8c5ba..0b4b9357 100644 --- a/lib/syntax_tree/visitor/mutation_visitor.rb +++ b/lib/syntax_tree/mutation_visitor.rb @@ -1,39 +1,39 @@ # frozen_string_literal: true module SyntaxTree - class Visitor - # This visitor walks through the tree and copies each node as it is being - # visited. This is useful for mutating the tree before it is formatted. - class MutationVisitor < BasicVisitor - attr_reader :mutations + # This visitor walks through the tree and copies each node as it is being + # visited. This is useful for mutating the tree before it is formatted. + class MutationVisitor < BasicVisitor + attr_reader :mutations - def initialize - @mutations = [] - end - - # Create a new mutation based on the given query that will mutate the node - # using the given block. The block should return a new node that will take - # the place of the given node in the tree. These blocks frequently make - # use of the `copy` method on nodes to create a new node with the same - # properties as the original node. - def mutate(query, &block) - mutations << [Pattern.new(query).compile, block] - end + def initialize + @mutations = [] + end - # This is the base visit method for each node in the tree. It first - # creates a copy of the node using the visit_* methods defined below. Then - # it checks each mutation in sequence and calls it if it finds a match. - def visit(node) - return unless node - result = node.accept(self) + # Create a new mutation based on the given query that will mutate the node + # using the given block. The block should return a new node that will take + # the place of the given node in the tree. These blocks frequently make use + # of the `copy` method on nodes to create a new node with the same + # properties as the original node. + def mutate(query, &block) + mutations << [Pattern.new(query).compile, block] + end - mutations.each do |(pattern, mutation)| - result = mutation.call(result) if pattern.call(result) - end + # This is the base visit method for each node in the tree. It first creates + # a copy of the node using the visit_* methods defined below. Then it checks + # each mutation in sequence and calls it if it finds a match. + def visit(node) + return unless node + result = node.accept(self) - result + mutations.each do |(pattern, mutation)| + result = mutation.call(result) if pattern.call(result) end + result + end + + visit_methods do # Visit a BEGINBlock node. def visit_BEGIN(node) node.copy( diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index fc5517cf..567ec0c8 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -126,18 +126,28 @@ def format(q) raise NotImplementedError end + def start_char + location.start_char + end + + def end_char + location.end_char + end + def pretty_print(q) - visitor = Visitor::PrettyPrintVisitor.new(q) - visitor.visit(self) + accept(PrettyPrintVisitor.new(q)) end def to_json(*opts) - visitor = Visitor::JSONVisitor.new - visitor.visit(self).to_json(*opts) + accept(JSONVisitor.new).to_json(*opts) + end + + def to_mermaid + accept(MermaidVisitor.new) end def construct_keys - PrettierPrint.format(+"") { |q| Visitor::MatchVisitor.new(q).visit(self) } + PrettierPrint.format(+"") { |q| accept(MatchVisitor.new(q)) } end end @@ -555,7 +565,7 @@ def var_alias? # collection[] # class ARef < Node - # [untyped] the value being indexed + # [Node] the value being indexed attr_reader :collection # [nil | Args] the value being passed within the brackets @@ -633,7 +643,7 @@ def ===(other) # collection[index] = value # class ARefField < Node - # [untyped] the value being indexed + # [Node] the value being indexed attr_reader :collection # [nil | Args] the value being passed within the brackets @@ -808,7 +818,7 @@ def trailing_comma? # method(first, second, third) # class Args < Node - # [Array[ untyped ]] the arguments that this node wraps + # [Array[ Node ]] the arguments that this node wraps attr_reader :parts # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -874,7 +884,7 @@ def arity # method(&expression) # class ArgBlock < Node - # [nil | untyped] the expression being turned into a block + # [nil | Node] the expression being turned into a block attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -926,7 +936,7 @@ def ===(other) # method(*arguments) # class ArgStar < Node - # [nil | untyped] the expression being splatted + # [nil | Node] the expression being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1129,7 +1139,8 @@ def format(q) end end - # [LBracket] the bracket that opens this array + # [nil | LBracket | QSymbolsBeg | QWordsBeg | SymbolsBeg | WordsBeg] the + # bracket that opens this array attr_reader :lbracket # [nil | Args] the contents of the array @@ -1287,7 +1298,7 @@ def format(q) # [nil | VarRef] the optional constant wrapper attr_reader :constant - # [Array[ untyped ]] the regular positional arguments that this array + # [Array[ Node ]] the regular positional arguments that this array # pattern is matching against attr_reader :requireds @@ -1295,7 +1306,7 @@ def format(q) # positional arguments attr_reader :rest - # [Array[ untyped ]] the list of positional arguments occurring after the + # [Array[ Node ]] the list of positional arguments occurring after the # optional star if there is one attr_reader :posts @@ -1405,7 +1416,7 @@ class Assign < Node # to assign the result of the expression to attr_reader :target - # [untyped] the expression to be assigned + # [Node] the expression to be assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1480,10 +1491,10 @@ def skip_indent? # # In the above example, the would be two Assoc nodes. class Assoc < Node - # [untyped] the key of this pair + # [Node] the key of this pair attr_reader :key - # [untyped] the value of this pair + # [nil | Node] the value of this pair attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1537,7 +1548,7 @@ def ===(other) private def format_contents(q) - q.parent.format_key(q, key) + (q.parent || HashKeyFormatter::Identity.new).format_key(q, key) return unless value if key.comments.empty? && AssignFormatting.skip_indent?(value) @@ -1558,7 +1569,7 @@ def format_contents(q) # { **pairs } # class AssocSplat < Node - # [nil | untyped] the expression that is being splatted + # [nil | Node] the expression that is being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1754,6 +1765,20 @@ def format_key(q, key) end end + # When formatting a single assoc node without the context of the parent + # hash, this formatter is used. It uses whatever is present in the node, + # because there is nothing to be consistent with. + class Identity + def format_key(q, key) + if key.is_a?(Label) + q.format(key) + else + q.format(key) + q.text(" =>") + end + end + end + def self.for(container) labels = container.assocs.all? do |assoc| @@ -1908,7 +1933,7 @@ def ===(other) # end # class PinnedBegin < Node - # [untyped] the expression being pinned + # [Node] the expression being pinned attr_reader :statement # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1989,13 +2014,13 @@ def name } end - # [untyped] the left-hand side of the expression + # [Node] the left-hand side of the expression attr_reader :left # [Symbol] the operator used between the two expressions attr_reader :operator - # [untyped] the right-hand side of the expression + # [Node] the right-hand side of the expression attr_reader :right # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -2149,6 +2174,14 @@ def ===(other) other.is_a?(BlockVar) && params === other.params && ArrayMatch.call(locals, other.locals) end + + # When a single required parameter is declared for a block, it gets + # automatically expanded if the values being yielded into it are an array. + def arg0? + params.requireds.length == 1 && params.optionals.empty? && + params.rest.nil? && params.posts.empty? && params.keywords.empty? && + params.keyword_rest.nil? && params.block.nil? + end end # BlockArg represents declaring a block parameter on a method definition. @@ -2242,7 +2275,7 @@ def initialize( @comments = [] end - def bind(start_char, start_column, end_char, end_column) + def bind(parser, start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, @@ -2256,6 +2289,7 @@ def bind(start_char, start_column, end_char, end_column) # Here we're going to determine the bounds for the statements consequent = rescue_clause || else_clause || ensure_clause statements.bind( + parser, start_char, start_column, consequent ? consequent.location.start_char : end_char, @@ -2646,7 +2680,7 @@ def format(q) # Of course there are a lot of caveats to that, including trailing operators # when necessary, where comments are places, how blocks are aligned, etc. class CallChainFormatter - # [Call | MethodAddBlock] the top of the call chain + # [CallNode | MethodAddBlock] the top of the call chain attr_reader :node def initialize(node) @@ -2867,7 +2901,7 @@ def format_child( # receiver.message # class CallNode < Node - # [nil | untyped] the receiver of the method call + # [nil | Node] the receiver of the method call attr_reader :receiver # [nil | :"::" | Op | Period] the operator being used to send the message @@ -3043,7 +3077,7 @@ class Case < Node # [Kw] the keyword that opens this expression attr_reader :keyword - # [nil | untyped] optional value being switched on + # [nil | Node] optional value being switched on attr_reader :value # [In | When] the next clause in the chain @@ -3122,14 +3156,14 @@ def ===(other) # value => pattern # class RAssign < Node - # [untyped] the left-hand expression + # [Node] the left-hand expression attr_reader :value # [Kw | Op] the operator being used to match against the pattern, which is # either => or in attr_reader :operator - # [untyped] the pattern on the right-hand side of the expression + # [Node] the pattern on the right-hand side of the expression attr_reader :pattern # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3240,7 +3274,7 @@ class ClassDeclaration < Node # defined attr_reader :constant - # [nil | untyped] the optional superclass declaration + # [nil | Node] the optional superclass declaration attr_reader :superclass # [BodyStmt] the expressions to execute within the context of the class @@ -3378,7 +3412,7 @@ class Command < Node # [Args] the arguments being sent with the message attr_reader :arguments - # [nil | Block] the optional block being passed to the method + # [nil | BlockNode] the optional block being passed to the method attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3484,19 +3518,19 @@ def align(q, node, &block) # object.method argument # class CommandCall < Node - # [untyped] the receiver of the message + # [nil | Node] the receiver of the message attr_reader :receiver - # [:"::" | Op | Period] the operator used to send the message + # [nil | :"::" | Op | Period] the operator used to send the message attr_reader :operator - # [Const | Ident | Op] the message being send + # [:call | Const | Ident | Op] the message being send attr_reader :message - # [nil | Args] the arguments going along with the message + # [nil | Args | ArgParen] the arguments going along with the message attr_reader :arguments - # [nil | Block] the block associated with this method call + # [nil | BlockNode] the block associated with this method call attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3782,7 +3816,7 @@ def ===(other) # object::Const = value # class ConstPathField < Node - # [untyped] the source of the constant + # [Node] the source of the constant attr_reader :parent # [Const] the constant itself @@ -3846,7 +3880,7 @@ def ===(other) # object::Const # class ConstPathRef < Node - # [untyped] the source of the constant + # [Node] the source of the constant attr_reader :parent # [Const] the constant itself @@ -4015,7 +4049,7 @@ def ===(other) # def object.method(param) result end # class DefNode < Node - # [nil | untyped] the target where the method is being defined + # [nil | Node] the target where the method is being defined attr_reader :target # [nil | Op | Period] the operator being used to declare the method @@ -4027,7 +4061,7 @@ class DefNode < Node # [nil | Params | Paren] the parameter declaration for the method attr_reader :params - # [BodyStmt | untyped] the expressions to be executed by the method + # [BodyStmt | Node] the expressions to be executed by the method attr_reader :bodystmt # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4090,7 +4124,8 @@ def deconstruct_keys(_keys) def format(q) q.group do q.group do - q.text("def ") + q.text("def") + q.text(" ") if target || name.comments.empty? if target q.format(target) @@ -4160,7 +4195,7 @@ def arity # defined?(variable) # class Defined < Node - # [untyped] the value being sent to the keyword + # [Node] the value being sent to the keyword attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4317,7 +4352,7 @@ def format(q) # are no parentheses around the arguments to that command, so we need to # break the block. case q.parent - when Command, CommandCall + when nil, Command, CommandCall q.break_parent format_break(q, break_opening, break_closing) return @@ -4371,7 +4406,7 @@ def unchangeable_bounds?(q) # If we're a sibling of a control-flow keyword, then we're going to have to # use the do..end bounds. def forced_do_end_bounds?(q) - case q.parent.call + case q.parent&.call when Break, Next, ReturnNode, Super true else @@ -4451,13 +4486,13 @@ def format_flat(q, flat_opening, flat_closing) # # One of the sides of the expression may be nil, but not both. class RangeNode < Node - # [nil | untyped] the left side of the expression + # [nil | Node] the left side of the expression attr_reader :left # [Op] the operator used for this range attr_reader :operator - # [nil | untyped] the right side of the expression + # [nil | Node] the right side of the expression attr_reader :right # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4578,7 +4613,7 @@ class DynaSymbol < Node # dynamic symbol attr_reader :parts - # [String] the quote used to delimit the dynamic symbol + # [nil | String] the quote used to delimit the dynamic symbol attr_reader :quote # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4776,7 +4811,7 @@ def ===(other) # end # class Elsif < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -4872,6 +4907,25 @@ class EmbDoc < Node def initialize(value:, location:) @value = value @location = location + + @leading = false + @trailing = false + end + + def leading! + @leading = true + end + + def leading? + @leading + end + + def trailing! + @trailing = true + end + + def trailing? + @trailing end def inline? @@ -4908,7 +4962,13 @@ def deconstruct_keys(_keys) end def format(q) - q.trim + if (q.parent.is_a?(DefNode) && q.parent.endless?) || + q.parent.is_a?(Statements) + q.trim + else + q.breakable_return + end + q.text(value) end @@ -5177,7 +5237,7 @@ def ===(other) # object.variable = value # class Field < Node - # [untyped] the parent object that owns the field being assigned + # [Node] the parent object that owns the field being assigned attr_reader :parent # [:"::" | Op | Period] the operator being used for the assignment @@ -5303,13 +5363,13 @@ def ===(other) # end # class FndPtn < Node - # [nil | untyped] the optional constant wrapper + # [nil | Node] the optional constant wrapper attr_reader :constant # [VarField] the splat on the left-hand side attr_reader :left - # [Array[ untyped ]] the list of positional expressions in the pattern that + # [Array[ Node ]] the list of positional expressions in the pattern that # are being matched attr_reader :values @@ -5405,7 +5465,7 @@ class For < Node # pull values out of the object being enumerated attr_reader :index - # [untyped] the object being enumerated in the loop + # [Node] the object being enumerated in the loop attr_reader :collection # [Statements] the statements to be executed @@ -5884,7 +5944,7 @@ class KeywordFormatter # [Label] the keyword being used attr_reader :key - # [untyped] the optional value for the keyword + # [Node] the optional value for the keyword attr_reader :value def initialize(key, value) @@ -5897,7 +5957,7 @@ def comments end def format(q) - q.format(key) + HashKeyFormatter::Labels.new.format_key(q, key) if value q.text(" ") @@ -5925,11 +5985,11 @@ def format(q) end end - # [nil | untyped] the optional constant wrapper + # [nil | Node] the optional constant wrapper attr_reader :constant - # [Array[ [Label, untyped] ]] the set of tuples representing the keywords - # that should be matched against in the pattern + # [Array[ [DynaSymbol | Label, nil | Node] ]] the set of tuples + # representing the keywords that should be matched against in the pattern attr_reader :keywords # [nil | VarField] an optional parameter to gather up all remaining keywords @@ -6354,7 +6414,7 @@ def contains_conditional? # end # class IfNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -6427,13 +6487,13 @@ def modifier? # predicate ? truthy : falsy # class IfOp < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate - # [untyped] the expression to be executed if the predicate is truthy + # [Node] the expression to be executed if the predicate is truthy attr_reader :truthy - # [untyped] the expression to be executed if the predicate is falsy + # [Node] the expression to be executed if the predicate is falsy attr_reader :falsy # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -6617,7 +6677,7 @@ def ===(other) # end # class In < Node - # [untyped] the pattern to check against + # [Node] the pattern to check against attr_reader :pattern # [Statements] the expressions to execute if the pattern matched @@ -7400,7 +7460,7 @@ class MAssign < Node # [MLHS | MLHSParen] the target of the multiple assignment attr_reader :target - # [untyped] the value being assigned + # [Node] the value being assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7460,10 +7520,10 @@ def ===(other) # method {} # class MethodAddBlock < Node - # [Call | Command | CommandCall] the method call + # [ARef | CallNode | Command | CommandCall | Super | ZSuper] the method call attr_reader :call - # [Block] the block being sent with the method call + # [BlockNode] the block being sent with the method call attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7535,8 +7595,12 @@ def format_contents(q) # first, second, third = value # class MLHS < Node - # Array[ARefField | ArgStar | Field | Ident | MLHSParen | VarField] the - # parts of the left-hand side of a multiple assignment + # [ + # Array[ + # ARefField | ArgStar | ConstPathField | Field | Ident | MLHSParen | + # TopConstField | VarField + # ] + # ] the parts of the left-hand side of a multiple assignment attr_reader :parts # [boolean] whether or not there is a trailing comma at the end of this @@ -7762,7 +7826,7 @@ def format_declaration(q) # values = first, second, third # class MRHS < Node - # Array[untyped] the parts that are being assigned + # [Array[Node]] the parts that are being assigned attr_reader :parts # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7938,7 +8002,7 @@ class OpAssign < Node # [Op] the operator being used for the assignment attr_reader :operator - # [untyped] the expression to be assigned + # [Node] the expression to be assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -8095,7 +8159,7 @@ class OptionalFormatter # [Ident] the name of the parameter attr_reader :name - # [untyped] the value of the parameter + # [Node] the value of the parameter attr_reader :value def initialize(name, value) @@ -8120,7 +8184,7 @@ class KeywordFormatter # [Ident] the name of the parameter attr_reader :name - # [nil | untyped] the value of the parameter + # [nil | Node] the value of the parameter attr_reader :value def initialize(name, value) @@ -8161,10 +8225,10 @@ def format(q) end end - # [Array[ Ident ]] any required parameters + # [Array[ Ident | MLHSParen ]] any required parameters attr_reader :requireds - # [Array[ [ Ident, untyped ] ]] any optional parameters and their default + # [Array[ [ Ident, Node ] ]] any optional parameters and their default # values attr_reader :optionals @@ -8176,11 +8240,12 @@ def format(q) # parameter attr_reader :posts - # [Array[ [ Ident, nil | untyped ] ]] any keyword parameters and their + # [Array[ [ Label, nil | Node ] ]] any keyword parameters and their # optional default values attr_reader :keywords - # [nil | :nil | KwRestParam] the optional keyword rest parameter + # [nil | :nil | ArgsForward | KwRestParam] the optional keyword rest + # parameter attr_reader :keyword_rest # [nil | BlockArg] the optional block parameter @@ -8369,7 +8434,7 @@ class Paren < Node # [LParen] the left parenthesis that opened this statement attr_reader :lparen - # [nil | untyped] the expression inside the parentheses + # [nil | Node] the expression inside the parentheses attr_reader :contents # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -9218,7 +9283,7 @@ def ambiguous?(q) # end # class RescueEx < Node - # [untyped] the list of exceptions being rescued + # [nil | Node] the list of exceptions being rescued attr_reader :exceptions # [nil | Field | VarField] the expression being used to capture the raised @@ -9296,7 +9361,7 @@ class Rescue < Node # [Kw] the rescue keyword attr_reader :keyword - # [RescueEx] the exceptions being rescued + # [nil | RescueEx] the exceptions being rescued attr_reader :exception # [Statements] the expressions to evaluate when an error is rescued @@ -9416,10 +9481,10 @@ def ===(other) # expression rescue value # class RescueMod < Node - # [untyped] the expression to execute + # [Node] the expression to execute attr_reader :statement - # [untyped] the value to use if the executed expression raises an error + # [Node] the value to use if the executed expression raises an error attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -9678,7 +9743,7 @@ def ===(other) # end # class SClass < Node - # [untyped] the target of the singleton class to enter + # [Node] the target of the singleton class to enter attr_reader :target # [BodyStmt] the expressions to be executed @@ -9752,23 +9817,19 @@ def ===(other) # propagate that onto void_stmt nodes inside the stmts in order to make sure # all comments get printed appropriately. class Statements < Node - # [SyntaxTree] the parser that is generating this node - attr_reader :parser - - # [Array[ untyped ]] the list of expressions contained within this node + # [Array[ Node ]] the list of expressions contained within this node attr_reader :body # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(parser, body:, location:) - @parser = parser + def initialize(body:, location:) @body = body @location = location @comments = [] end - def bind(start_char, start_column, end_char, end_column) + def bind(parser, start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, @@ -9794,7 +9855,7 @@ def bind(start_char, start_column, end_char, end_column) body[0] = VoidStmt.new(location: location) end - attach_comments(start_char, end_char) + attach_comments(parser, start_char, end_char) end def bind_end(end_char, end_column) @@ -9826,7 +9887,6 @@ def child_nodes def copy(body: nil, location: nil) node = Statements.new( - parser, body: body || self.body, location: location || self.location ) @@ -9838,7 +9898,7 @@ def copy(body: nil, location: nil) alias deconstruct child_nodes def deconstruct_keys(_keys) - { parser: parser, body: body, location: location, comments: comments } + { body: body, location: location, comments: comments } end def format(q) @@ -9898,7 +9958,7 @@ def ===(other) # As efficiently as possible, gather up all of the comments that have been # found while this statements list was being parsed and add them into the # body. - def attach_comments(start_char, end_char) + def attach_comments(parser, start_char, end_char) parser_comments = parser.comments comment_index = 0 @@ -9945,9 +10005,13 @@ class StringContent < Node # string attr_reader :parts + # [Array[ Comment | EmbDoc ]] the comments attached to this node + attr_reader :comments + def initialize(parts:, location:) @parts = parts @location = location + @comments = [] end def accept(visitor) @@ -9974,6 +10038,33 @@ def deconstruct_keys(_keys) def ===(other) other.is_a?(StringContent) && ArrayMatch.call(parts, other.parts) end + + def format(q) + q.text(q.quote) + q.group do + parts.each do |part| + if part.is_a?(TStringContent) + value = Quotes.normalize(part.value, q.quote) + first = true + + value.each_line(chomp: true) do |line| + if first + first = false + else + q.breakable_return + end + + q.text(line) + end + + q.breakable_return if value.end_with?("\n") + else + q.format(part) + end + end + end + q.text(q.quote) + end end # StringConcat represents concatenating two strings together using a backward @@ -9983,7 +10074,8 @@ def ===(other) # "second" # class StringConcat < Node - # [StringConcat | StringLiteral] the left side of the concatenation + # [Heredoc | StringConcat | StringLiteral] the left side of the + # concatenation attr_reader :left # [StringLiteral] the right side of the concatenation @@ -10180,7 +10272,7 @@ class StringLiteral < Node # string literal attr_reader :parts - # [String] which quote was used by the string literal + # [nil | String] which quote was used by the string literal attr_reader :quote # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -10425,8 +10517,8 @@ def ===(other) # :symbol # class SymbolLiteral < Node - # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op] the value of the - # symbol + # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op | TStringContent] + # the value of the symbol attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -10465,6 +10557,7 @@ def deconstruct_keys(_keys) def format(q) q.text(":") + q.text("\\") if value.comments.any? q.format(value) end @@ -10934,7 +11027,7 @@ def ===(other) # not value # class Not < Node - # [nil | untyped] the statement on which to operate + # [nil | Node] the statement on which to operate attr_reader :statement # [boolean] whether or not parentheses were used @@ -11021,7 +11114,7 @@ class Unary < Node # [String] the operator being used attr_reader :operator - # [untyped] the statement on which to operate + # [Node] the statement on which to operate attr_reader :statement # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11165,7 +11258,7 @@ def ===(other) # end # class UnlessNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -11311,7 +11404,7 @@ def format_break(q) # end # class UntilNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -11379,7 +11472,7 @@ def modifier? # # In the example above, the VarField node represents the +variable+ token. class VarField < Node - # [nil | Const | CVar | GVar | Ident | IVar] the target of this node + # [nil | :nil | Const | CVar | GVar | Ident | IVar] the target of this node attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11490,8 +11583,9 @@ def ===(other) # # To be clear, this method should just not exist. It's not good. It's a # place of shame. But it's necessary for now, so I'm keeping it. - def pin(parent) - replace = PinnedVarRef.new(value: value, location: location) + def pin(parent, pin) + replace = + PinnedVarRef.new(value: value, location: pin.location.to(location)) parent .deconstruct_keys([]) @@ -11517,7 +11611,7 @@ def pin(parent) # This can be a plain local variable like the example above. It can also be a # a class variable, a global variable, or an instance variable. class PinnedVarRef < Node - # [VarRef] the value of this node + # [Const | CVar | GVar | Ident | IVar] the value of this node attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11631,9 +11725,6 @@ def arity # ;; # class VoidStmt < Node - # [Location] the location of this node - attr_reader :location - # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments @@ -11794,7 +11885,7 @@ def ===(other) # end # class WhileNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 99b703d0..426bd945 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -256,11 +256,37 @@ def find_token(type) tokens[index] if index end + def find_token_between(type, left, right) + bounds = left.location.end_char...right.location.start_char + index = + tokens.rindex do |token| + char = token.location.start_char + break if char < bounds.begin + + token.is_a?(type) && bounds.cover?(char) + end + + tokens[index] if index + end + def find_keyword(name) index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) } tokens[index] if index end + def find_keyword_between(name, left, right) + bounds = left.end_char...right.start_char + index = + tokens.rindex do |token| + char = token.location.start_char + break if char < bounds.begin + + token.is_a?(Kw) && (token.name == name) && bounds.cover?(char) + end + + tokens[index] if index + end + def find_operator(name) index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) } tokens[index] if index @@ -348,6 +374,7 @@ def on_BEGIN(statements) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( + self, start_char, start_char - line_counts[lbrace.location.start_line - 1].start, rbrace.location.start_char, @@ -386,6 +413,7 @@ def on_END(statements) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( + self, start_char, start_char - line_counts[lbrace.location.start_line - 1].start, rbrace.location.start_char, @@ -640,13 +668,14 @@ def visit(node) stack.pop end - def visit_var_ref(node) - pins.shift - node.pin(stack[-2]) + visit_methods do + def visit_var_ref(node) + node.pin(stack[-2], pins.shift) + end end def self.visit(node, tokens) - start_char = node.location.start_char + start_char = node.start_char allocated = [] tokens.reverse_each do |token| @@ -670,18 +699,22 @@ def self.visit(node, tokens) # (nil | Array[untyped]) posts # ) -> AryPtn def on_aryptn(constant, requireds, rest, posts) - parts = [constant, *requireds, rest, *posts].compact + lbracket = find_token(LBracket) + lbracket ||= find_token(LParen) if constant - # If there aren't any parts (no constant, no positional arguments), then - # we're matching an empty array. In this case, we're going to look for the - # left and right brackets explicitly. Otherwise, we'll just use the bounds - # of the various parts. - location = - if parts.empty? - consume_token(LBracket).location.to(consume_token(RBracket).location) - else - parts[0].location.to(parts[-1].location) - end + rbracket = find_token(RBracket) + rbracket ||= find_token(RParen) if constant + + parts = [constant, lbracket, *requireds, rest, *posts, rbracket].compact + + # The location is going to be determined by the first part to the last + # part. This includes potential brackets. + location = parts[0].location.to(parts[-1].location) + + # Now that we have the location calculated, we can remove the brackets + # from the list of tokens. + tokens.delete(lbracket) if lbracket + tokens.delete(rbracket) if rbracket # If there is a plain *, then we're going to fix up the location of it # here because it currently doesn't have anything to use for its precise @@ -820,6 +853,7 @@ def on_begin(bodystmt) end bodystmt.bind( + self, find_next_statement_start(keyword.location.end_char), keyword.location.end_column, end_location.end_char, @@ -871,13 +905,34 @@ def on_binary(left, operator, right) # on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar def on_block_var(params, locals) index = - tokens.rindex do |node| - node.is_a?(Op) && %w[| ||].include?(node.value) && - node.location.start_char < params.location.start_char - end + tokens.rindex { |node| node.is_a?(Op) && %w[| ||].include?(node.value) } + + ending = tokens.delete_at(index) + beginning = ending.value == "||" ? ending : consume_operator(:|) + + # If there are no parameters, then we didn't have anything to base the + # location information of off. Now that we have an opening of the + # block, we can correct this. + if params.empty? + start_line = params.location.start_line + start_char = + ( + if beginning.value == "||" + beginning.location.start_char + else + find_next_statement_start(beginning.location.end_char) + end + ) - beginning = tokens[index] - ending = tokens[-1] + location = + Location.fixed( + line: start_line, + char: start_char, + column: start_char - line_counts[start_line - 1].start + ) + + params = params.copy(location: location) + end BlockVar.new( params: params, @@ -905,6 +960,14 @@ def on_blockarg(name) # (nil | Ensure) ensure_clause # ) -> BodyStmt def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) + # In certain versions of Ruby, the `statements` argument can be any node + # in the case that we're inside of an endless method definition. In this + # case we'll wrap it in a Statements node to be consistent. + unless statements.is_a?(Statements) + statements = + Statements.new(body: [statements], location: statements.location) + end + parts = [statements, rescue_clause, else_clause, ensure_clause].compact BodyStmt.new( @@ -929,6 +992,7 @@ def on_brace_block(block_var, statements) start_char = find_next_statement_start(location.end_char) statements.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, rbrace.location.start_char, @@ -1036,6 +1100,7 @@ def on_class(constant, superclass, bodystmt) start_char = find_next_statement_start(location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, ending.location.start_char, @@ -1154,13 +1219,23 @@ def on_const(value) end # :call-seq: - # on_const_path_field: (untyped parent, Const constant) -> ConstPathField + # on_const_path_field: (untyped parent, Const constant) -> + # ConstPathField | Field def on_const_path_field(parent, constant) - ConstPathField.new( - parent: parent, - constant: constant, - location: parent.location.to(constant.location) - ) + if constant.is_a?(Const) + ConstPathField.new( + parent: parent, + constant: constant, + location: parent.location.to(constant.location) + ) + else + Field.new( + parent: parent, + operator: consume_operator(:"::"), + name: constant, + location: parent.location.to(constant.location) + ) + end end # :call-seq: @@ -1235,6 +1310,7 @@ def on_def(name, params, bodystmt) start_char = find_next_statement_start(params.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[params.location.start_line - 1].start, ending.location.start_char, @@ -1323,6 +1399,7 @@ def on_defs(target, operator, name, params, bodystmt) start_char = find_next_statement_start(params.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[params.location.start_line - 1].start, ending.location.start_char, @@ -1362,6 +1439,7 @@ def on_do_block(block_var, bodystmt) start_char = find_next_statement_start(location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, ending.location.start_char, @@ -1457,6 +1535,7 @@ def on_else(statements) start_char = find_next_statement_start(keyword.location.end_char) statements.bind( + self, start_char, start_char - line_counts[keyword.location.start_line - 1].start, ending.location.start_char, @@ -1482,6 +1561,7 @@ def on_elsif(predicate, statements, consequent) start_char = find_next_statement_start(predicate.location.end_char) statements.bind( + self, start_char, start_char - line_counts[predicate.location.start_line - 1].start, ending.location.start_char, @@ -1605,6 +1685,7 @@ def on_ensure(statements) ending = find_keyword(:end) start_char = find_next_statement_start(keyword.location.end_char) statements.bind( + self, start_char, start_char - line_counts[keyword.location.start_line - 1].start, ending.location.start_char, @@ -1679,6 +1760,22 @@ def on_float(value) # VarField right # ) -> FndPtn def on_fndptn(constant, left, values, right) + # The left and right of a find pattern are always going to be splats, so + # we're going to consume the * operators and use their location + # information to extend the location of the splats. + right, left = + [right, left].map do |node| + operator = consume_operator(:*) + location = + if node.value + operator.location.to(node.location) + else + operator.location + end + + node.copy(location: location) + end + # The opening of this find pattern is either going to be a left bracket, a # right left parenthesis, or the left splat. We're going to use this to # determine how to find the closing of the pattern, as well as determining @@ -1719,21 +1816,20 @@ def on_for(index, collection, statements) in_keyword = consume_keyword(:in) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && - keyword.location.start_char > collection.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, collection, ending) || + find_token_between(Semicolon, collection, ending) + + tokens.delete(delimiter) if delimiter start_char = - find_next_statement_start((keyword || collection).location.end_char) + find_next_statement_start((delimiter || collection).location.end_char) + statements.bind( + self, start_char, start_char - - line_counts[(keyword || collection).location.end_line - 1].start, + line_counts[(delimiter || collection).location.end_line - 1].start, ending.location.start_char, ending.location.start_column ) @@ -1787,7 +1883,7 @@ def on_heredoc_beg(value) line: lineno, char: char_pos, column: current_column, - size: value.size + 1 + size: value.size ) # Here we're going to artificially create an extra node type so that if @@ -1822,7 +1918,7 @@ def on_heredoc_end(value) line: lineno, char: char_pos, column: current_column, - size: value.size + 1 + size: value.size ) heredoc_end = HeredocEnd.new(value: value.chomp, location: location) @@ -1837,9 +1933,9 @@ def on_heredoc_end(value) start_line: heredoc.location.start_line, start_char: heredoc.location.start_char, start_column: heredoc.location.start_column, - end_line: lineno, - end_char: char_pos, - end_column: current_column + end_line: location.end_line, + end_char: location.end_char, + end_column: location.end_column ) ) end @@ -1847,10 +1943,42 @@ def on_heredoc_end(value) # :call-seq: # on_hshptn: ( # (nil | untyped) constant, - # Array[[Label, untyped]] keywords, + # Array[[Label | StringContent, untyped]] keywords, # (nil | VarField) keyword_rest # ) -> HshPtn def on_hshptn(constant, keywords, keyword_rest) + keywords = + (keywords || []).map do |(label, value)| + if label.is_a?(Label) + [label, value] + else + tstring_beg_index = + tokens.rindex do |token| + token.is_a?(TStringBeg) && + token.location.start_char < label.location.start_char + end + + tstring_beg = tokens.delete_at(tstring_beg_index) + + label_end_index = + tokens.rindex do |token| + token.is_a?(LabelEnd) && + token.location.start_char == label.location.end_char + end + + label_end = tokens.delete_at(label_end_index) + + [ + DynaSymbol.new( + parts: label.parts, + quote: label_end.value[0], + location: tstring_beg.location.to(label_end.location) + ), + value + ] + end + end + if keyword_rest # We're doing this to delete the token from the list so that it doesn't # confuse future patterns by thinking they have an extra ** on the end. @@ -1863,7 +1991,7 @@ def on_hshptn(constant, keywords, keyword_rest) keyword_rest = VarField.new(value: nil, location: token.location) end - parts = [constant, *keywords&.flatten(1), keyword_rest].compact + parts = [constant, *keywords.flatten(1), keyword_rest].compact # If there's no constant, there may be braces, so we're going to look for # those to get our bounds. @@ -1880,7 +2008,7 @@ def on_hshptn(constant, keywords, keyword_rest) HshPtn.new( constant: constant, - keywords: keywords || [], + keywords: keywords, keyword_rest: keyword_rest, location: parts[0].location.to(parts[-1].location) ) @@ -1911,8 +2039,14 @@ def on_if(predicate, statements, consequent) beginning = consume_keyword(:if) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + if (keyword = find_keyword_between(:then, predicate, ending)) + tokens.delete(keyword) + end + + start_char = + find_next_statement_start((keyword || predicate).location.end_char) statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -1946,7 +2080,7 @@ def on_if_mod(predicate, statement) IfNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), consequent: nil, location: statement.location.to(predicate.location) ) @@ -1995,8 +2129,10 @@ def on_in(pattern, statements, consequent) statements_start = token end - start_char = find_next_statement_start(statements_start.location.end_char) + start_char = + find_next_statement_start((token || statements_start).location.end_char) statements.bind( + self, start_char, start_char - line_counts[statements_start.location.start_line - 1].start, @@ -2121,12 +2257,19 @@ def on_lambda(params, statements) token.location.start_char > beginning.location.start_char end + if braces + opening = consume_token(TLamBeg) + closing = consume_token(RBrace) + else + opening = consume_keyword(:do) + closing = consume_keyword(:end) + end + # We need to do some special mapping here. Since ripper doesn't support - # capturing lambda var until 3.2, we need to normalize all of that here. + # capturing lambda vars, we need to normalize all of that here. params = - case params - when Paren - # In this case we've gotten to the <3.2 parentheses wrapping a set of + if params.is_a?(Paren) + # In this case we've gotten to the parentheses wrapping a set of # parameters case. Here we need to manually scan for lambda locals. range = (params.location.start_char + 1)...params.location.end_char locals = lambda_locals(source[range]) @@ -2148,27 +2291,31 @@ def on_lambda(params, statements) node.comments.concat(params.comments) node - when Params - # In this case we've gotten to the <3.2 plain set of parameters. In - # this case there cannot be lambda locals, so we will wrap the - # parameters into a lambda var that has no locals. + else + # If there are no parameters, then we didn't have anything to base the + # location information of off. Now that we have an opening of the + # block, we can correct this. + if params.empty? + opening_location = opening.location + location = + Location.fixed( + line: opening_location.start_line, + char: opening_location.start_char, + column: opening_location.start_column + ) + + params = params.copy(location: location) + end + + # In this case we've gotten to the plain set of parameters. In this + # case there cannot be lambda locals, so we will wrap the parameters + # into a lambda var that has no locals. LambdaVar.new(params: params, locals: [], location: params.location) - when LambdaVar - # In this case we've gotten to 3.2+ lambda var. In this case we don't - # need to do anything and can just the value as given. - params end - if braces - opening = consume_token(TLamBeg) - closing = consume_token(RBrace) - else - opening = consume_keyword(:do) - closing = consume_keyword(:end) - end - start_char = find_next_statement_start(opening.location.end_char) statements.bind( + self, start_char, start_char - line_counts[opening.location.end_line - 1].start, closing.location.start_char, @@ -2353,23 +2500,30 @@ def on_method_add_arg(call, arguments) # :call-seq: # on_method_add_block: ( - # (Call | Command | CommandCall) call, + # (Break | Call | Command | CommandCall, Next) call, # Block block - # ) -> MethodAddBlock + # ) -> Break | MethodAddBlock def on_method_add_block(call, block) location = call.location.to(block.location) case call + when Break, Next, ReturnNode + parts = call.arguments.parts + + node = parts.pop + copied = + node.copy(block: block, location: node.location.to(block.location)) + + copied.comments.concat(call.comments) + parts << copied + + call.copy(location: location) when Command, CommandCall node = call.copy(block: block, location: location) node.comments.concat(call.comments) node else - MethodAddBlock.new( - call: call, - block: block, - location: call.location.to(block.location) - ) + MethodAddBlock.new(call: call, block: block, location: location) end end @@ -2446,6 +2600,7 @@ def on_module(constant, bodystmt) start_char = find_next_statement_start(constant.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[constant.location.start_line - 1].start, ending.location.start_char, @@ -2592,19 +2747,40 @@ def on_params( # have a `nil` for the value instead of a `false`. keywords&.map! { |(key, value)| [key, value || nil] } - parts = [ - *requireds, - *optionals&.flatten(1), - rest, - *posts, - *keywords&.flatten(1), - (keyword_rest if keyword_rest != :nil), - (block if block != :&) - ].compact + # Here we're going to build up a list of all of the params so that we can + # determine our location information. + parts = [] + + requireds&.each { |required| parts << required.location } + optionals&.each do |(key, value)| + parts << key.location + parts << value.location if value + end + + parts << rest.location if rest + posts&.each { |post| parts << post.location } + + keywords&.each do |(key, value)| + parts << key.location + parts << value.location if value + end + + if keyword_rest == :nil + # When we get a :nil here, it means that we have **nil syntax, which + # means this set of parameters accepts no more keyword arguments. In + # this case we need to go and find the location of these two tokens. + operator = consume_operator(:**) + parts << operator.location.to(consume_keyword(:nil).location) + elsif keyword_rest + parts << keyword_rest.location + end + + parts << block.location if block && block != :& + parts = parts.compact location = if parts.any? - parts[0].location.to(parts[-1].location) + parts[0].to(parts[-1]) else Location.fixed(line: lineno, char: char_pos, column: current_column) end @@ -2701,7 +2877,7 @@ def on_program(statements) ) statements.body << @__end__ if @__end__ - statements.bind(0, 0, source.length, last_column) + statements.bind(self, 0, 0, source.length, last_column) program = Program.new(statements: statements, location: location) attach_comments(program, @comments) @@ -3033,8 +3209,9 @@ def on_rescue(exceptions, variable, statements, consequent) exceptions = exceptions[0] if exceptions.is_a?(Array) last_node = variable || exceptions || keyword - start_char = find_next_statement_start(last_node.location.end_char) + start_char = find_next_statement_start(last_node.end_char) statements.bind( + self, start_char, start_char - line_counts[last_node.location.start_line - 1].start, char_pos, @@ -3055,7 +3232,7 @@ def on_rescue(exceptions, variable, statements, consequent) start_char: keyword.location.end_char + 1, start_column: keyword.location.end_column + 1, end_line: last_node.location.end_line, - end_char: last_node.location.end_char, + end_char: last_node.end_char, end_column: last_node.location.end_column ) ) @@ -3153,6 +3330,7 @@ def on_sclass(target, bodystmt) start_char = find_next_statement_start(target.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[target.location.start_line - 1].start, ending.location.start_char, @@ -3166,9 +3344,29 @@ def on_sclass(target, bodystmt) ) end - # def on_semicolon(value) - # value - # end + # Semicolons are tokens that get added to the token list but never get + # attached to the AST. Because of this they only need to track their + # associated location so they can be used for computing bounds. + class Semicolon + attr_reader :location + + def initialize(location) + @location = location + end + end + + # :call-seq: + # on_semicolon: (String value) -> Semicolon + def on_semicolon(value) + tokens << Semicolon.new( + Location.token( + line: lineno, + char: char_pos, + column: current_column, + size: value.size + ) + ) + end # def on_sp(value) # value @@ -3186,18 +3384,13 @@ def on_stmts_add(statements, statement) statements.location.to(statement.location) end - Statements.new( - self, - body: statements.body << statement, - location: location - ) + Statements.new(body: statements.body << statement, location: location) end # :call-seq: # on_stmts_new: () -> Statements def on_stmts_new Statements.new( - self, body: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column) @@ -3262,6 +3455,7 @@ def on_string_embexpr(statements) embexpr_end = consume_token(EmbExprEnd) statements.bind( + self, embexpr_beg.location.end_char, embexpr_beg.location.end_column, embexpr_end.location.start_char, @@ -3605,8 +3799,14 @@ def on_unless(predicate, statements, consequent) beginning = consume_keyword(:unless) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + if (keyword = find_keyword_between(:then, predicate, ending)) + tokens.delete(keyword) + end + + start_char = + find_next_statement_start((keyword || predicate).location.end_char) statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3629,7 +3829,7 @@ def on_unless_mod(predicate, statement) UnlessNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), consequent: nil, location: statement.location.to(predicate.location) ) @@ -3641,17 +3841,18 @@ def on_until(predicate, statements) beginning = consume_keyword(:until) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && keyword.location.start_char > predicate.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter # Update the Statements location information - start_char = find_next_statement_start(predicate.location.end_char) + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3673,7 +3874,7 @@ def on_until_mod(predicate, statement) UntilNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), location: statement.location.to(predicate.location) ) end @@ -3744,9 +3945,11 @@ def on_when(arguments, statements, consequent) statements_start = token end - start_char = find_next_statement_start(statements_start.location.end_char) + start_char = + find_next_statement_start((token || statements_start).location.end_char) statements.bind( + self, start_char, start_char - line_counts[statements_start.location.start_line - 1].start, @@ -3768,17 +3971,18 @@ def on_while(predicate, statements) beginning = consume_keyword(:while) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && keyword.location.start_char > predicate.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter # Update the Statements location information - start_char = find_next_statement_start(predicate.location.end_char) + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3800,7 +4004,7 @@ def on_while_mod(predicate, statement) WhileNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), location: statement.location.to(predicate.location) ) end diff --git a/lib/syntax_tree/pretty_print_visitor.rb b/lib/syntax_tree/pretty_print_visitor.rb new file mode 100644 index 00000000..894e0cf4 --- /dev/null +++ b/lib/syntax_tree/pretty_print_visitor.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor pretty-prints the AST into an equivalent s-expression. + class PrettyPrintVisitor < FieldVisitor + attr_reader :q + + def initialize(q) + @q = q + end + + # This is here because we need to make sure the operator is cast to a string + # before we print it out. + def visit_binary(node) + node(node, "binary") do + field("left", node.left) + text("operator", node.operator.to_s) + field("right", node.right) + comments(node) + end + end + + # This is here to make it a little nicer to look at labels since they + # typically have their : at the end of the value. + def visit_label(node) + node(node, "label") do + q.breakable + q.text(":") + q.text(node.value[0...-1]) + comments(node) + end + end + + private + + def comments(node) + return if node.comments.empty? + + q.breakable + q.group(2, "(", ")") do + q.seplist(node.comments) { |comment| q.pp(comment) } + end + end + + def field(_name, value) + q.breakable + q.pp(value) + end + + def list(_name, values) + q.breakable + q.group(2, "(", ")") { q.seplist(values) { |value| q.pp(value) } } + end + + def node(_node, type) + q.group(2, "(", ")") do + q.text(type) + yield + end + end + + def pairs(_name, values) + q.group(2, "(", ")") do + q.seplist(values) do |(key, value)| + q.pp(key) + + if value + q.text("=") + q.group(2) do + q.breakable("") + q.pp(value) + end + end + end + end + end + + def text(_name, value) + q.breakable + q.text(value) + end + end +end diff --git a/lib/syntax_tree/reflection.rb b/lib/syntax_tree/reflection.rb new file mode 100644 index 00000000..bf4b95f3 --- /dev/null +++ b/lib/syntax_tree/reflection.rb @@ -0,0 +1,241 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module is used to provide some reflection on the various types of nodes + # and their attributes. As soon as it is required it collects all of its + # information. + module Reflection + # This module represents the type of the values being passed to attributes + # of nodes. It is used as part of the documentation of the attributes. + module Type + CONSTANTS = SyntaxTree.constants.to_h { [_1, SyntaxTree.const_get(_1)] } + + # Represents an array type that holds another type. + class ArrayType + attr_reader :type + + def initialize(type) + @type = type + end + + def ===(value) + value.is_a?(Array) && value.all? { type === _1 } + end + + def inspect + "Array<#{type.inspect}>" + end + end + + # Represents a tuple type that holds a number of types in order. + class TupleType + attr_reader :types + + def initialize(types) + @types = types + end + + def ===(value) + value.is_a?(Array) && value.length == types.length && + value.zip(types).all? { |item, type| type === item } + end + + def inspect + "[#{types.map(&:inspect).join(", ")}]" + end + end + + # Represents a union type that can be one of a number of types. + class UnionType + attr_reader :types + + def initialize(types) + @types = types + end + + def ===(value) + types.any? { _1 === value } + end + + def inspect + types.map(&:inspect).join(" | ") + end + end + + class << self + def parse(comment) + comment = comment.gsub(/\n/, " ") + + unless comment.start_with?("[") + raise "Comment does not start with a bracket: #{comment.inspect}" + end + + count = 1 + found = + comment.chars[1..] + .find + .with_index(1) do |char, index| + count += { "[" => 1, "]" => -1 }.fetch(char, 0) + break index if count == 0 + end + + # If we weren't able to find the end of the balanced brackets, then + # the comment is malformed. + if found.nil? + raise "Comment does not have balanced brackets: #{comment.inspect}" + end + + parse_type(comment[1...found].strip) + end + + private + + def parse_type(value) + case value + when "Integer" + Integer + when "String" + String + when "Symbol" + Symbol + when "boolean" + UnionType.new([TrueClass, FalseClass]) + when "nil" + NilClass + when ":\"::\"" + :"::" + when ":call" + :call + when ":nil" + :nil + when /\AArray\[(.+)\]\z/ + ArrayType.new(parse_type($1.strip)) + when /\A\[(.+)\]\z/ + TupleType.new($1.strip.split(/\s*,\s*/).map { parse_type(_1) }) + else + if value.include?("|") + UnionType.new(value.split(/\s*\|\s*/).map { parse_type(_1) }) + else + CONSTANTS.fetch(value.to_sym) + end + end + end + end + end + + # This class represents one of the attributes on a node in the tree. + class Attribute + attr_reader :name, :comment, :type + + def initialize(name, comment) + @name = name + @comment = comment + @type = Type.parse(comment) + end + end + + # This class represents one of our nodes in the tree. We're going to use it + # as a placeholder for collecting all of the various places that nodes are + # used. + class Node + attr_reader :name, :comment, :attributes + + def initialize(name, comment, attributes) + @name = name + @comment = comment + @attributes = attributes + end + end + + class << self + # This is going to hold a hash of all of the nodes in the tree. The keys + # are the names of the nodes as symbols. + attr_reader :nodes + + # This expects a node name as a symbol and returns the node object for + # that node. + def node(name) + nodes.fetch(name) + end + + private + + def parse_comments(statements, index) + statements[0...index] + .reverse_each + .take_while { _1.is_a?(SyntaxTree::Comment) } + .reverse_each + .map { _1.value[2..] } + end + end + + @nodes = {} + + # For each node, we're going to parse out its attributes and other metadata. + # We'll use this as the basis for our report. + program = + SyntaxTree.parse(SyntaxTree.read(File.expand_path("node.rb", __dir__))) + + main_statements = program.statements.body.last.bodystmt.statements.body + main_statements.each_with_index do |main_statement, main_statement_index| + # Ensure we are only looking at class declarations. + next unless main_statement.is_a?(SyntaxTree::ClassDeclaration) + + # Ensure we're looking at class declarations with superclasses. + next unless main_statement.superclass.is_a?(SyntaxTree::VarRef) + + # Ensure we're looking at class declarations that inherit from Node. + next unless main_statement.superclass.value.value == "Node" + + # All child nodes inherit the location attr_reader from Node, so we'll add + # that to the list of attributes first. + attributes = { + location: + Attribute.new(:location, "[Location] the location of this node") + } + + statements = main_statement.bodystmt.statements.body + statements.each_with_index do |statement, statement_index| + case statement + when SyntaxTree::Command + # We only use commands in node classes to define attributes. So, we + # can safely assume that we're looking at an attribute definition. + unless %w[attr_reader attr_accessor].include?(statement.message.value) + raise "Unexpected command: #{statement.message.value.inspect}" + end + + # The arguments to the command are the attributes that we're defining. + # We want to ensure that we're only defining one at a time. + if statement.arguments.parts.length != 1 + raise "Declaring more than one attribute at a time is not permitted" + end + + attribute = + Attribute.new( + statement.arguments.parts.first.value.value.to_sym, + "#{parse_comments(statements, statement_index).join("\n")}\n" + ) + + # Ensure that we don't already have an attribute named the same as + # this one, and then add it to the list of attributes. + if attributes.key?(attribute.name) + raise "Duplicate attribute: #{attribute.name}" + end + + attributes[attribute.name] = attribute + end + end + + # Finally, set it up in the hash of nodes so that we can use it later. + comments = parse_comments(main_statements, main_statement_index) + node = + Node.new( + main_statement.constant.constant.value.to_sym, + "#{comments.join("\n")}\n", + attributes + ) + + @nodes[node.name] = node + end + end +end diff --git a/lib/syntax_tree/translation.rb b/lib/syntax_tree/translation.rb new file mode 100644 index 00000000..6fc96f00 --- /dev/null +++ b/lib/syntax_tree/translation.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module is responsible for translating the Syntax Tree syntax tree into + # other representations. + module Translation + # This method translates the given node into the representation defined by + # the whitequark/parser gem. We don't explicitly list it as a dependency + # because it's not required for the core functionality of Syntax Tree. + def self.to_parser(node, buffer) + require "parser" + require_relative "translation/parser" + + node.accept(Parser.new(buffer)) + end + + # This method translates the given node into the representation defined by + # the rubocop/rubocop-ast gem. We don't explicitly list it as a dependency + # because it's not required for the core functionality of Syntax Tree. + def self.to_rubocop_ast(node, buffer) + require "rubocop/ast" + require_relative "translation/parser" + require_relative "translation/rubocop_ast" + + node.accept(RuboCopAST.new(buffer)) + end + end +end diff --git a/lib/syntax_tree/translation/parser.rb b/lib/syntax_tree/translation/parser.rb new file mode 100644 index 00000000..ad889478 --- /dev/null +++ b/lib/syntax_tree/translation/parser.rb @@ -0,0 +1,3019 @@ +# frozen_string_literal: true + +module SyntaxTree + module Translation + # This visitor is responsible for converting the syntax tree produced by + # Syntax Tree into the syntax tree produced by the whitequark/parser gem. + class Parser < BasicVisitor + # Heredocs are represented _very_ differently in the parser gem from how + # they are represented in the Syntax Tree AST. This class is responsible + # for handling the translation. + class HeredocBuilder + Line = Struct.new(:value, :segments) + + attr_reader :node, :segments + + def initialize(node) + @node = node + @segments = [] + end + + def <<(segment) + if segment.type == :str && segments.last && + segments.last.type == :str && + !segments.last.children.first.end_with?("\n") + segments.last.children.first << segment.children.first + else + segments << segment + end + end + + def trim! + return unless node.beginning.value[2] == "~" + lines = [Line.new(+"", [])] + + segments.each do |segment| + lines.last.segments << segment + + if segment.type == :str + lines.last.value << segment.children.first + lines << Line.new(+"", []) if lines.last.value.end_with?("\n") + end + end + + lines.pop if lines.last.value.empty? + return if lines.empty? + + segments.clear + lines.each do |line| + remaining = node.dedent + + line.segments.each do |segment| + if segment.type == :str + if remaining > 0 + whitespace = segment.children.first[/^\s{0,#{remaining}}/] + segment.children.first.sub!(/^#{whitespace}/, "") + remaining -= whitespace.length + end + + if node.beginning.value[3] != "'" && segments.any? && + segments.last.type == :str && + segments.last.children.first.end_with?("\\\n") + segments.last.children.first.gsub!(/\\\n\z/, "") + segments.last.children.first.concat(segment.children.first) + elsif !segment.children.first.empty? + segments << segment + end + else + segments << segment + end + end + end + end + end + + attr_reader :buffer, :stack + + def initialize(buffer) + @buffer = buffer + @stack = [] + end + + # For each node that we visit, we keep track of it in a stack as we + # descend into its children. We do this so that child nodes can reflect on + # their parents if they need additional information about their context. + def visit(node) + stack << node + result = super + stack.pop + result + end + + visit_methods do + # Visit an AliasNode node. + def visit_alias(node) + s( + :alias, + [visit(node.left), visit(node.right)], + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + end + + # Visit an ARefNode. + def visit_aref(node) + if ::Parser::Builders::Default.emit_index + if node.index.nil? + s( + :index, + [visit(node.collection)], + smap_index( + srange_find(node.collection.end_char, node.end_char, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + else + s( + :index, + [visit(node.collection)].concat(visit_all(node.index.parts)), + smap_index( + srange_find_between(node.collection, node.index, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + else + if node.index.nil? + s( + :send, + [visit(node.collection), :[]], + smap_send_bare( + srange_find(node.collection.end_char, node.end_char, "[]"), + srange_node(node) + ) + ) + else + s( + :send, + [visit(node.collection), :[], *visit_all(node.index.parts)], + smap_send_bare( + srange( + srange_find_between( + node.collection, + node.index, + "[" + ).begin_pos, + node.end_char + ), + srange_node(node) + ) + ) + end + end + end + + # Visit an ARefField node. + def visit_aref_field(node) + if ::Parser::Builders::Default.emit_index + if node.index.nil? + s( + :indexasgn, + [visit(node.collection)], + smap_index( + srange_find(node.collection.end_char, node.end_char, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + else + s( + :indexasgn, + [visit(node.collection)].concat(visit_all(node.index.parts)), + smap_index( + srange_find_between(node.collection, node.index, "["), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + else + if node.index.nil? + s( + :send, + [visit(node.collection), :[]=], + smap_send_bare( + srange_find(node.collection.end_char, node.end_char, "[]"), + srange_node(node) + ) + ) + else + s( + :send, + [visit(node.collection), :[]=].concat( + visit_all(node.index.parts) + ), + smap_send_bare( + srange( + srange_find_between( + node.collection, + node.index, + "[" + ).begin_pos, + node.end_char + ), + srange_node(node) + ) + ) + end + end + end + + # Visit an ArgBlock node. + def visit_arg_block(node) + s( + :block_pass, + [visit(node.value)], + smap_operator(srange_length(node.start_char, 1), srange_node(node)) + ) + end + + # Visit an ArgStar node. + def visit_arg_star(node) + if stack[-3].is_a?(MLHSParen) && stack[-3].contents.is_a?(MLHS) + if node.value.nil? + s(:restarg, [], smap_variable(nil, srange_node(node))) + else + s( + :restarg, + [node.value.value.to_sym], + smap_variable(srange_node(node.value), srange_node(node)) + ) + end + else + s( + :splat, + node.value.nil? ? [] : [visit(node.value)], + smap_operator( + srange_length(node.start_char, 1), + srange_node(node) + ) + ) + end + end + + # Visit an ArgsForward node. + def visit_args_forward(node) + s(:forwarded_args, [], smap(srange_node(node))) + end + + # Visit an ArrayLiteral node. + def visit_array(node) + s( + :array, + node.contents ? visit_all(node.contents.parts) : [], + if node.lbracket.nil? + smap_collection_bare(srange_node(node)) + else + smap_collection( + srange_node(node.lbracket), + srange_length(node.end_char, -1), + srange_node(node) + ) + end + ) + end + + # Visit an AryPtn node. + def visit_aryptn(node) + type = :array_pattern + children = visit_all(node.requireds) + + if node.rest.is_a?(VarField) + if !node.rest.value.nil? + children << s(:match_rest, [visit(node.rest)], nil) + elsif node.posts.empty? && + node.rest.start_char == node.rest.end_char + # Here we have an implicit rest, as in [foo,]. parser has a + # specific type for these patterns. + type = :array_pattern_with_tail + else + children << s(:match_rest, [], nil) + end + end + + if node.constant + s( + :const_pattern, + [ + visit(node.constant), + s( + type, + children + visit_all(node.posts), + smap_collection_bare( + srange(node.constant.end_char + 1, node.end_char - 1) + ) + ) + ], + smap_collection( + srange_length(node.constant.end_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + else + s( + type, + children + visit_all(node.posts), + if buffer.source[node.start_char] == "[" + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + else + smap_collection_bare(srange_node(node)) + end + ) + end + end + + # Visit an Assign node. + def visit_assign(node) + target = visit(node.target) + location = + target + .location + .with_operator(srange_find_between(node.target, node.value, "=")) + .with_expression(srange_node(node)) + + s(target.type, target.children + [visit(node.value)], location) + end + + # Visit an Assoc node. + def visit_assoc(node) + if node.value.nil? + expression = srange(node.start_char, node.end_char - 1) + + type, location = + if node.key.value.start_with?(/[A-Z]/) + [:const, smap_constant(nil, expression, expression)] + else + [:send, smap_send_bare(expression, expression)] + end + + s( + :pair, + [ + visit(node.key), + s(type, [nil, node.key.value.chomp(":").to_sym], location) + ], + smap_operator( + srange_length(node.key.end_char, -1), + srange_node(node) + ) + ) + else + s( + :pair, + [visit(node.key), visit(node.value)], + smap_operator( + srange_search_between(node.key, node.value, "=>") || + srange_length(node.key.end_char, -1), + srange_node(node) + ) + ) + end + end + + # Visit an AssocSplat node. + def visit_assoc_splat(node) + s( + :kwsplat, + [visit(node.value)], + smap_operator(srange_length(node.start_char, 2), srange_node(node)) + ) + end + + # Visit a Backref node. + def visit_backref(node) + location = smap(srange_node(node)) + + if node.value.match?(/^\$\d+$/) + s(:nth_ref, [node.value[1..].to_i], location) + else + s(:back_ref, [node.value.to_sym], location) + end + end + + # Visit a BareAssocHash node. + def visit_bare_assoc_hash(node) + s( + if ::Parser::Builders::Default.emit_kwargs && + !stack[-2].is_a?(ArrayLiteral) + :kwargs + else + :hash + end, + visit_all(node.assocs), + smap_collection_bare(srange_node(node)) + ) + end + + # Visit a BEGINBlock node. + def visit_BEGIN(node) + s( + :preexe, + [visit(node.statements)], + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.statements.start_char, "{"), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a Begin node. + def visit_begin(node) + location = + smap_collection( + srange_length(node.start_char, 5), + srange_length(node.end_char, -3), + srange_node(node) + ) + + if node.bodystmt.empty? + s(:kwbegin, [], location) + elsif node.bodystmt.rescue_clause.nil? && + node.bodystmt.ensure_clause.nil? && + node.bodystmt.else_clause.nil? + child = visit(node.bodystmt.statements) + + s( + :kwbegin, + child.type == :begin ? child.children : [child], + location + ) + else + s(:kwbegin, [visit(node.bodystmt)], location) + end + end + + # Visit a Binary node. + def visit_binary(node) + case node.operator + when :| + current = -2 + while stack[current].is_a?(Binary) && stack[current].operator == :| + current -= 1 + end + + if stack[current].is_a?(In) + s(:match_alt, [visit(node.left), visit(node.right)], nil) + else + visit(canonical_binary(node)) + end + when :"=>", :"&&", :and, :"||", :or + s( + { "=>": :match_as, "&&": :and, "||": :or }.fetch( + node.operator, + node.operator + ), + [visit(node.left), visit(node.right)], + smap_operator( + srange_find_between(node.left, node.right, node.operator.to_s), + srange_node(node) + ) + ) + when :=~ + # When you use a regular expression on the left hand side of a =~ + # operator and it doesn't have interpolatoin, then its named capture + # groups introduce local variables into the scope. In this case the + # parser gem has a different node (match_with_lvasgn) instead of the + # regular send. + if node.left.is_a?(RegexpLiteral) && node.left.parts.length == 1 && + node.left.parts.first.is_a?(TStringContent) + s( + :match_with_lvasgn, + [visit(node.left), visit(node.right)], + smap_operator( + srange_find_between( + node.left, + node.right, + node.operator.to_s + ), + srange_node(node) + ) + ) + else + visit(canonical_binary(node)) + end + else + visit(canonical_binary(node)) + end + end + + # Visit a BlockArg node. + def visit_blockarg(node) + if node.name.nil? + s(:blockarg, [nil], smap_variable(nil, srange_node(node))) + else + s( + :blockarg, + [node.name.value.to_sym], + smap_variable(srange_node(node.name), srange_node(node)) + ) + end + end + + # Visit a BlockVar node. + def visit_block_var(node) + shadowargs = + node.locals.map do |local| + s( + :shadowarg, + [local.value.to_sym], + smap_variable(srange_node(local), srange_node(local)) + ) + end + + params = node.params + children = + if ::Parser::Builders::Default.emit_procarg0 && node.arg0? + # There is a special node type in the parser gem for when a single + # required parameter to a block would potentially be expanded + # automatically. We handle that case here. + required = params.requireds.first + procarg0 = + if ::Parser::Builders::Default.emit_arg_inside_procarg0 && + required.is_a?(Ident) + s( + :procarg0, + [ + s( + :arg, + [required.value.to_sym], + smap_variable( + srange_node(required), + srange_node(required) + ) + ) + ], + smap_collection_bare(srange_node(required)) + ) + else + child = visit(required) + s(:procarg0, child, child.location) + end + + [procarg0] + else + visit(params).children + end + + s( + :args, + children + shadowargs, + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a BodyStmt node. + def visit_bodystmt(node) + result = visit(node.statements) + + if node.rescue_clause + rescue_node = visit(node.rescue_clause) + + children = [result] + rescue_node.children + location = rescue_node.location + + if node.else_clause + children.pop + children << visit(node.else_clause) + + location = + smap_condition( + nil, + nil, + srange_length(node.else_clause.start_char - 3, -4), + nil, + srange( + location.expression.begin_pos, + node.else_clause.end_char + ) + ) + end + + result = s(rescue_node.type, children, location) + end + + if node.ensure_clause + ensure_node = visit(node.ensure_clause) + + expression = + ( + if result + result.location.expression.join( + ensure_node.location.expression + ) + else + ensure_node.location.expression + end + ) + location = ensure_node.location.with_expression(expression) + + result = + s(ensure_node.type, [result] + ensure_node.children, location) + end + + result + end + + # Visit a Break node. + def visit_break(node) + s( + :break, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + end + + # Visit a CallNode node. + def visit_call(node) + visit_command_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + end + + # Visit a Case node. + def visit_case(node) + clauses = [node.consequent] + while clauses.last && !clauses.last.is_a?(Else) + clauses << clauses.last.consequent + end + + else_token = + if clauses.last.is_a?(Else) + srange_length(clauses.last.start_char, 4) + end + + s( + node.consequent.is_a?(In) ? :case_match : :case, + [visit(node.value)] + clauses.map { |clause| visit(clause) }, + smap_condition( + srange_length(node.start_char, 4), + nil, + else_token, + srange_length(node.end_char, -3), + srange_node(node) + ) + ) + end + + # Visit a CHAR node. + def visit_CHAR(node) + s( + :str, + [node.value[1..]], + smap_collection( + srange_length(node.start_char, 1), + nil, + srange_node(node) + ) + ) + end + + # Visit a ClassDeclaration node. + def visit_class(node) + operator = + if node.superclass + srange_find_between(node.constant, node.superclass, "<") + end + + s( + :class, + [ + visit(node.constant), + visit(node.superclass), + visit(node.bodystmt) + ], + smap_definition( + srange_length(node.start_char, 5), + operator, + srange_node(node.constant), + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) + ) + end + + # Visit a Command node. + def visit_command(node) + visit_command_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + # Visit a CommandCall node. + def visit_command_call(node) + children = [ + visit(node.receiver), + node.message == :call ? :call : node.message.value.to_sym + ] + + begin_token = nil + end_token = nil + + case node.arguments + when Args + children += visit_all(node.arguments.parts) + when ArgParen + case node.arguments.arguments + when nil + # skip + when ArgsForward + children << visit(node.arguments.arguments) + else + children += visit_all(node.arguments.arguments.parts) + end + + begin_token = srange_length(node.arguments.start_char, 1) + end_token = srange_length(node.arguments.end_char, -1) + end + + dot_bound = + if node.arguments + node.arguments.start_char + elsif node.block + node.block.start_char + else + node.end_char + end + + expression = + if node.arguments.is_a?(ArgParen) + srange(node.start_char, node.arguments.end_char) + elsif node.arguments.is_a?(Args) && node.arguments.parts.any? + last_part = node.arguments.parts.last + end_char = + if last_part.is_a?(Heredoc) + last_part.beginning.end_char + else + last_part.end_char + end + + srange(node.start_char, end_char) + elsif node.block + srange_node(node.message) + else + srange_node(node) + end + + call = + s( + if node.operator.is_a?(Op) && node.operator.value == "&." + :csend + else + :send + end, + children, + smap_send( + if node.operator == :"::" + srange_find( + node.receiver.end_char, + if node.message == :call + dot_bound + else + node.message.start_char + end, + "::" + ) + elsif node.operator + srange_node(node.operator) + end, + node.message == :call ? nil : srange_node(node.message), + begin_token, + end_token, + expression + ) + ) + + if node.block + type, arguments = block_children(node.block) + + s( + type, + [call, arguments, visit(node.block.bodystmt)], + smap_collection( + srange_node(node.block.opening), + srange_length( + node.end_char, + node.block.opening.is_a?(Kw) ? -3 : -1 + ), + srange_node(node) + ) + ) + else + call + end + end + + # Visit a Const node. + def visit_const(node) + s( + :const, + [nil, node.value.to_sym], + smap_constant(nil, srange_node(node), srange_node(node)) + ) + end + + # Visit a ConstPathField node. + def visit_const_path_field(node) + if node.parent.is_a?(VarRef) && node.parent.value.is_a?(Kw) && + node.parent.value.value == "self" && node.constant.is_a?(Ident) + s(:send, [visit(node.parent), :"#{node.constant.value}="], nil) + else + s( + :casgn, + [visit(node.parent), node.constant.value.to_sym], + smap_constant( + srange_find_between(node.parent, node.constant, "::"), + srange_node(node.constant), + srange_node(node) + ) + ) + end + end + + # Visit a ConstPathRef node. + def visit_const_path_ref(node) + s( + :const, + [visit(node.parent), node.constant.value.to_sym], + smap_constant( + srange_find_between(node.parent, node.constant, "::"), + srange_node(node.constant), + srange_node(node) + ) + ) + end + + # Visit a ConstRef node. + def visit_const_ref(node) + s( + :const, + [nil, node.constant.value.to_sym], + smap_constant(nil, srange_node(node.constant), srange_node(node)) + ) + end + + # Visit a CVar node. + def visit_cvar(node) + s( + :cvar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) + end + + # Visit a DefNode node. + def visit_def(node) + name = node.name.value.to_sym + args = + case node.params + when Params + child = visit(node.params) + + s( + child.type, + child.children, + smap_collection_bare(child.location&.expression) + ) + when Paren + child = visit(node.params.contents) + + s( + child.type, + child.children, + smap_collection( + srange_length(node.params.start_char, 1), + srange_length(node.params.end_char, -1), + srange_node(node.params) + ) + ) + else + s(:args, [], smap_collection_bare(nil)) + end + + location = + if node.endless? + smap_method_definition( + srange_length(node.start_char, 3), + nil, + srange_node(node.name), + nil, + srange_find_between( + (node.params || node.name), + node.bodystmt, + "=" + ), + srange_node(node) + ) + else + smap_method_definition( + srange_length(node.start_char, 3), + nil, + srange_node(node.name), + srange_length(node.end_char, -3), + nil, + srange_node(node) + ) + end + + if node.target + target = + node.target.is_a?(Paren) ? node.target.contents : node.target + + s( + :defs, + [visit(target), name, args, visit(node.bodystmt)], + smap_method_definition( + location.keyword, + srange_node(node.operator), + location.name, + location.end, + location.assignment, + location.expression + ) + ) + else + s(:def, [name, args, visit(node.bodystmt)], location) + end + end + + # Visit a Defined node. + def visit_defined(node) + paren_range = (node.start_char + 8)...node.end_char + begin_token, end_token = + if buffer.source[paren_range].include?("(") + [ + srange_find(paren_range.begin, paren_range.end, "("), + srange_length(node.end_char, -1) + ] + end + + s( + :defined?, + [visit(node.value)], + smap_keyword( + srange_length(node.start_char, 8), + begin_token, + end_token, + srange_node(node) + ) + ) + end + + # Visit a DynaSymbol node. + def visit_dyna_symbol(node) + location = + if node.quote + smap_collection( + srange_length(node.start_char, node.quote.length), + srange_length(node.end_char, -1), + srange_node(node) + ) + else + smap_collection_bare(srange_node(node)) + end + + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + s(:sym, ["\"#{node.parts.first.value}\"".undump.to_sym], location) + else + s(:dsym, visit_all(node.parts), location) + end + end + + # Visit an Else node. + def visit_else(node) + if node.statements.empty? && stack[-2].is_a?(Case) + s(:empty_else, [], nil) + else + visit(node.statements) + end + end + + # Visit an Elsif node. + def visit_elsif(node) + else_token = + case node.consequent + when Elsif + srange_length(node.consequent.start_char, 5) + when Else + srange_length(node.consequent.start_char, 4) + end + + expression = srange(node.start_char, node.statements.end_char - 1) + + s( + :if, + [ + visit(node.predicate), + visit(node.statements), + visit(node.consequent) + ], + smap_condition( + srange_length(node.start_char, 5), + nil, + else_token, + nil, + expression + ) + ) + end + + # Visit an ENDBlock node. + def visit_END(node) + s( + :postexe, + [visit(node.statements)], + smap_keyword( + srange_length(node.start_char, 3), + srange_find(node.start_char + 3, node.statements.start_char, "{"), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit an Ensure node. + def visit_ensure(node) + start_char = node.start_char + end_char = + if node.statements.empty? + start_char + 6 + else + node.statements.body.last.end_char + end + + s( + :ensure, + [visit(node.statements)], + smap_condition( + srange_length(start_char, 6), + nil, + nil, + nil, + srange(start_char, end_char) + ) + ) + end + + # Visit a Field node. + def visit_field(node) + message = + case stack[-2] + when Assign, MLHS + Ident.new( + value: "#{node.name.value}=", + location: node.name.location + ) + else + node.name + end + + visit_command_call( + CommandCall.new( + receiver: node.parent, + operator: node.operator, + message: message, + arguments: nil, + block: nil, + location: node.location + ) + ) + end + + # Visit a FloatLiteral node. + def visit_float(node) + operator = + if %w[+ -].include?(buffer.source[node.start_char]) + srange_length(node.start_char, 1) + end + + s( + :float, + [node.value.to_f], + smap_operator(operator, srange_node(node)) + ) + end + + # Visit a FndPtn node. + def visit_fndptn(node) + left, right = + [node.left, node.right].map do |child| + location = + smap_operator( + srange_length(child.start_char, 1), + srange_node(child) + ) + + if child.is_a?(VarField) && child.value.nil? + s(:match_rest, [], location) + else + s(:match_rest, [visit(child)], location) + end + end + + inner = + s( + :find_pattern, + [left, *visit_all(node.values), right], + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + + if node.constant + s(:const_pattern, [visit(node.constant), inner], nil) + else + inner + end + end + + # Visit a For node. + def visit_for(node) + s( + :for, + [visit(node.index), visit(node.collection), visit(node.statements)], + smap_for( + srange_length(node.start_char, 3), + srange_find_between(node.index, node.collection, "in"), + srange_search_between(node.collection, node.statements, "do") || + srange_search_between(node.collection, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) + ) + ) + end + + # Visit a GVar node. + def visit_gvar(node) + s( + :gvar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) + end + + # Visit a HashLiteral node. + def visit_hash(node) + s( + :hash, + visit_all(node.assocs), + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a Heredoc node. + def visit_heredoc(node) + heredoc = HeredocBuilder.new(node) + + # For each part of the heredoc, if it's a string content node, split + # it into multiple string content nodes, one for each line. Otherwise, + # visit the node as normal. + node.parts.each do |part| + if part.is_a?(TStringContent) && part.value.count("\n") > 1 + index = part.start_char + lines = part.value.split("\n") + + lines.each do |line| + length = line.length + 1 + location = smap_collection_bare(srange_length(index, length)) + + heredoc << s(:str, ["#{line}\n"], location) + index += length + end + else + heredoc << visit(part) + end + end + + # Now that we have all of the pieces on the heredoc, we can trim it if + # it is a heredoc that supports trimming (i.e., it has a ~ on the + # declaration). + heredoc.trim! + + # Generate the location for the heredoc, which goes from the + # declaration to the ending delimiter. + location = + smap_heredoc( + srange_node(node.beginning), + srange( + if node.parts.empty? + node.beginning.end_char + 1 + else + node.parts.first.start_char + end, + node.ending.start_char + ), + srange(node.ending.start_char, node.ending.end_char - 1) + ) + + # Finally, decide which kind of heredoc node to generate based on its + # declaration and contents. + if node.beginning.value.match?(/`\w+`\z/) + s(:xstr, heredoc.segments, location) + elsif heredoc.segments.length == 1 + segment = heredoc.segments.first + s(segment.type, segment.children, location) + else + s(:dstr, heredoc.segments, location) + end + end + + # Visit a HshPtn node. + def visit_hshptn(node) + children = + node.keywords.map do |(keyword, value)| + next s(:pair, [visit(keyword), visit(value)], nil) if value + + case keyword + when DynaSymbol + raise if keyword.parts.length > 1 + s(:match_var, [keyword.parts.first.value.to_sym], nil) + when Label + s(:match_var, [keyword.value.chomp(":").to_sym], nil) + end + end + + if node.keyword_rest.is_a?(VarField) + children << if node.keyword_rest.value.nil? + s(:match_rest, [], nil) + elsif node.keyword_rest.value == :nil + s(:match_nil_pattern, [], nil) + else + s(:match_rest, [visit(node.keyword_rest)], nil) + end + end + + inner = s(:hash_pattern, children, nil) + if node.constant + s(:const_pattern, [visit(node.constant), inner], nil) + else + inner + end + end + + # Visit an Ident node. + def visit_ident(node) + s( + :lvar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) + end + + # Visit an IfNode node. + def visit_if(node) + predicate = + case node.predicate + when RangeNode + type = + node.predicate.operator.value == ".." ? :iflipflop : :eflipflop + s(type, visit(node.predicate).children, nil) + when RegexpLiteral + s(:match_current_line, [visit(node.predicate)], nil) + when Unary + if node.predicate.operator.value == "!" && + node.predicate.statement.is_a?(RegexpLiteral) + s( + :send, + [ + s(:match_current_line, [visit(node.predicate.statement)]), + :! + ], + nil + ) + else + visit(node.predicate) + end + else + visit(node.predicate) + end + + s( + :if, + [predicate, visit(node.statements), visit(node.consequent)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "if"), + srange_node(node) + ) + else + begin_start = node.predicate.end_char + begin_end = + if node.statements.empty? + node.statements.end_char + else + node.statements.body.first.start_char + end + + begin_token = + if buffer.source[begin_start...begin_end].include?("then") + srange_find(begin_start, begin_end, "then") + elsif buffer.source[begin_start...begin_end].include?(";") + srange_find(begin_start, begin_end, ";") + end + + else_token = + case node.consequent + when Elsif + srange_length(node.consequent.start_char, 5) + when Else + srange_length(node.consequent.start_char, 4) + end + + smap_condition( + srange_length(node.start_char, 2), + begin_token, + else_token, + srange_length(node.end_char, -3), + srange_node(node) + ) + end + ) + end + + # Visit an IfOp node. + def visit_if_op(node) + s( + :if, + [visit(node.predicate), visit(node.truthy), visit(node.falsy)], + smap_ternary( + srange_find_between(node.predicate, node.truthy, "?"), + srange_find_between(node.truthy, node.falsy, ":"), + srange_node(node) + ) + ) + end + + # Visit an Imaginary node. + def visit_imaginary(node) + s( + :complex, + [ + # We have to do an eval here in order to get the value in case + # it's something like 42ri. to_c will not give the right value in + # that case. Maybe there's an API for this but I can't find it. + eval(node.value) + ], + smap_operator(nil, srange_node(node)) + ) + end + + # Visit an In node. + def visit_in(node) + case node.pattern + when IfNode + s( + :in_pattern, + [ + visit(node.pattern.statements), + s(:if_guard, [visit(node.pattern.predicate)], nil), + visit(node.statements) + ], + nil + ) + when UnlessNode + s( + :in_pattern, + [ + visit(node.pattern.statements), + s(:unless_guard, [visit(node.pattern.predicate)], nil), + visit(node.statements) + ], + nil + ) + else + begin_token = + srange_search_between(node.pattern, node.statements, "then") + + end_char = + if begin_token || node.statements.empty? + node.statements.end_char - 1 + else + node.statements.body.last.start_char + end + + s( + :in_pattern, + [visit(node.pattern), nil, visit(node.statements)], + smap_keyword( + srange_length(node.start_char, 2), + begin_token, + nil, + srange(node.start_char, end_char) + ) + ) + end + end + + # Visit an Int node. + def visit_int(node) + operator = + if %w[+ -].include?(buffer.source[node.start_char]) + srange_length(node.start_char, 1) + end + + s(:int, [node.value.to_i], smap_operator(operator, srange_node(node))) + end + + # Visit an IVar node. + def visit_ivar(node) + s( + :ivar, + [node.value.to_sym], + smap_variable(srange_node(node), srange_node(node)) + ) + end + + # Visit a Kw node. + def visit_kw(node) + location = smap(srange_node(node)) + + case node.value + when "__FILE__" + s(:str, [buffer.name], location) + when "__LINE__" + s( + :int, + [node.location.start_line + buffer.first_line - 1], + location + ) + when "__ENCODING__" + if ::Parser::Builders::Default.emit_encoding + s(:__ENCODING__, [], location) + else + s(:const, [s(:const, [nil, :Encoding], nil), :UTF_8], location) + end + else + s(node.value.to_sym, [], location) + end + end + + # Visit a KwRestParam node. + def visit_kwrest_param(node) + if node.name.nil? + s(:kwrestarg, [], smap_variable(nil, srange_node(node))) + else + s( + :kwrestarg, + [node.name.value.to_sym], + smap_variable(srange_node(node.name), srange_node(node)) + ) + end + end + + # Visit a Label node. + def visit_label(node) + s( + :sym, + [node.value.chomp(":").to_sym], + smap_collection_bare(srange(node.start_char, node.end_char - 1)) + ) + end + + # Visit a Lambda node. + def visit_lambda(node) + args = + node.params.is_a?(LambdaVar) ? node.params : node.params.contents + args_node = visit(args) + + type = :block + if args.empty? && (maximum = num_block_type(node.statements)) + type = :numblock + args_node = maximum + end + + begin_token, end_token = + if ( + srange = + srange_search_between(node.params, node.statements, "{") + ) + [srange, srange_length(node.end_char, -1)] + else + [ + srange_find_between(node.params, node.statements, "do"), + srange_length(node.end_char, -3) + ] + end + + selector = srange_length(node.start_char, 2) + + s( + type, + [ + if ::Parser::Builders::Default.emit_lambda + s(:lambda, [], smap(selector)) + else + s(:send, [nil, :lambda], smap_send_bare(selector, selector)) + end, + args_node, + visit(node.statements) + ], + smap_collection(begin_token, end_token, srange_node(node)) + ) + end + + # Visit a LambdaVar node. + def visit_lambda_var(node) + shadowargs = + node.locals.map do |local| + s( + :shadowarg, + [local.value.to_sym], + smap_variable(srange_node(local), srange_node(local)) + ) + end + + location = + if node.start_char == node.end_char + smap_collection_bare(nil) + else + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + end + + s(:args, visit(node.params).children + shadowargs, location) + end + + # Visit an MAssign node. + def visit_massign(node) + s( + :masgn, + [visit(node.target), visit(node.value)], + smap_operator( + srange_find_between(node.target, node.value, "="), + srange_node(node) + ) + ) + end + + # Visit a MethodAddBlock node. + def visit_method_add_block(node) + case node.call + when Break, Next, ReturnNode + type, arguments = block_children(node.block) + call = visit(node.call) + + s( + call.type, + [ + s( + type, + [*call.children, arguments, visit(node.block.bodystmt)], + nil + ) + ], + nil + ) + when ARef, Super, ZSuper + type, arguments = block_children(node.block) + + s( + type, + [visit(node.call), arguments, visit(node.block.bodystmt)], + nil + ) + else + visit_command_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) + end + end + + # Visit an MLHS node. + def visit_mlhs(node) + s( + :mlhs, + node.parts.map do |part| + if part.is_a?(Ident) + s( + :arg, + [part.value.to_sym], + smap_variable(srange_node(part), srange_node(part)) + ) + else + visit(part) + end + end, + smap_collection_bare(srange_node(node)) + ) + end + + # Visit an MLHSParen node. + def visit_mlhs_paren(node) + child = visit(node.contents) + + s( + child.type, + child.children, + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a ModuleDeclaration node. + def visit_module(node) + s( + :module, + [visit(node.constant), visit(node.bodystmt)], + smap_definition( + srange_length(node.start_char, 6), + nil, + srange_node(node.constant), + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) + ) + end + + # Visit an MRHS node. + def visit_mrhs(node) + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: Args.new(parts: node.parts, location: node.location), + location: node.location + ) + ) + end + + # Visit a Next node. + def visit_next(node) + s( + :next, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 4), + srange_node(node) + ) + ) + end + + # Visit a Not node. + def visit_not(node) + if node.statement.nil? + begin_token = srange_find(node.start_char, nil, "(") + end_token = srange_find(node.start_char, nil, ")") + + s( + :send, + [ + s( + :begin, + [], + smap_collection( + begin_token, + end_token, + begin_token.join(end_token) + ) + ), + :! + ], + smap_send_bare( + srange_length(node.start_char, 3), + srange_node(node) + ) + ) + else + begin_token, end_token = + if node.parentheses? + [ + srange_find( + node.start_char + 3, + node.statement.start_char, + "(" + ), + srange_length(node.end_char, -1) + ] + end + + s( + :send, + [visit(node.statement), :!], + smap_send( + nil, + srange_length(node.start_char, 3), + begin_token, + end_token, + srange_node(node) + ) + ) + end + end + + # Visit an OpAssign node. + def visit_opassign(node) + target = visit(node.target) + location = + target + .location + .with_expression(srange_node(node)) + .with_operator(srange_node(node.operator)) + + case node.operator.value + when "||=" + s(:or_asgn, [target, visit(node.value)], location) + when "&&=" + s(:and_asgn, [target, visit(node.value)], location) + else + s( + :op_asgn, + [ + target, + node.operator.value.chomp("=").to_sym, + visit(node.value) + ], + location + ) + end + end + + # Visit a Params node. + def visit_params(node) + children = [] + + children += + node.requireds.map do |required| + case required + when MLHSParen + visit(required) + else + s( + :arg, + [required.value.to_sym], + smap_variable(srange_node(required), srange_node(required)) + ) + end + end + + children += + node.optionals.map do |(name, value)| + s( + :optarg, + [name.value.to_sym, visit(value)], + smap_variable( + srange_node(name), + srange_node(name).join(srange_node(value)) + ).with_operator(srange_find_between(name, value, "=")) + ) + end + + if node.rest && !node.rest.is_a?(ExcessedComma) + children << visit(node.rest) + end + + children += + node.posts.map do |post| + s( + :arg, + [post.value.to_sym], + smap_variable(srange_node(post), srange_node(post)) + ) + end + + children += + node.keywords.map do |(name, value)| + key = name.value.chomp(":").to_sym + + if value + s( + :kwoptarg, + [key, visit(value)], + smap_variable( + srange(name.start_char, name.end_char - 1), + srange_node(name).join(srange_node(value)) + ) + ) + else + s( + :kwarg, + [key], + smap_variable( + srange(name.start_char, name.end_char - 1), + srange_node(name) + ) + ) + end + end + + case node.keyword_rest + when nil, ArgsForward + # do nothing + when :nil + children << s( + :kwnilarg, + [], + smap_variable(srange_length(node.end_char, -3), srange_node(node)) + ) + else + children << visit(node.keyword_rest) + end + + children << visit(node.block) if node.block + + if node.keyword_rest.is_a?(ArgsForward) + location = smap(srange_node(node.keyword_rest)) + + # If there are no other arguments and we have the emit_forward_arg + # option enabled, then the entire argument list is represented by a + # single forward_args node. + if children.empty? && !::Parser::Builders::Default.emit_forward_arg + return s(:forward_args, [], location) + end + + # Otherwise, we need to insert a forward_arg node into the list of + # parameters before any keyword rest or block parameters. + index = + node.requireds.length + node.optionals.length + + node.keywords.length + children.insert(index, s(:forward_arg, [], location)) + end + + location = + unless children.empty? + first = children.first.location.expression + last = children.last.location.expression + smap_collection_bare(first.join(last)) + end + + s(:args, children, location) + end + + # Visit a Paren node. + def visit_paren(node) + location = + smap_collection( + srange_length(node.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + + if node.contents.nil? || + (node.contents.is_a?(Statements) && node.contents.empty?) + s(:begin, [], location) + else + child = visit(node.contents) + child.type == :begin ? child : s(:begin, [child], location) + end + end + + # Visit a PinnedBegin node. + def visit_pinned_begin(node) + s( + :pin, + [ + s( + :begin, + [visit(node.statement)], + smap_collection( + srange_length(node.start_char + 1, 1), + srange_length(node.end_char, -1), + srange(node.start_char + 1, node.end_char) + ) + ) + ], + smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) + ) + end + + # Visit a PinnedVarRef node. + def visit_pinned_var_ref(node) + s( + :pin, + [visit(node.value)], + smap_send_bare(srange_length(node.start_char, 1), srange_node(node)) + ) + end + + # Visit a Program node. + def visit_program(node) + visit(node.statements) + end + + # Visit a QSymbols node. + def visit_qsymbols(node) + parts = + node.elements.map do |element| + SymbolLiteral.new(value: element, location: element.location) + end + + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: parts, location: node.location), + location: node.location + ) + ) + end + + # Visit a QWords node. + def visit_qwords(node) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: node.elements, location: node.location), + location: node.location + ) + ) + end + + # Visit a RangeNode node. + def visit_range(node) + s( + node.operator.value == ".." ? :irange : :erange, + [visit(node.left), visit(node.right)], + smap_operator(srange_node(node.operator), srange_node(node)) + ) + end + + # Visit an RAssign node. + def visit_rassign(node) + s( + node.operator.value == "=>" ? :match_pattern : :match_pattern_p, + [visit(node.value), visit(node.pattern)], + smap_operator(srange_node(node.operator), srange_node(node)) + ) + end + + # Visit a Rational node. + def visit_rational(node) + s(:rational, [node.value.to_r], smap_operator(nil, srange_node(node))) + end + + # Visit a Redo node. + def visit_redo(node) + s(:redo, [], smap_keyword_bare(srange_node(node), srange_node(node))) + end + + # Visit a RegexpLiteral node. + def visit_regexp_literal(node) + s( + :regexp, + visit_all(node.parts).push( + s( + :regopt, + node.ending.scan(/[a-z]/).sort.map(&:to_sym), + smap(srange_length(node.end_char, -(node.ending.length - 1))) + ) + ), + smap_collection( + srange_length(node.start_char, node.beginning.length), + srange_length(node.end_char - node.ending.length, 1), + srange_node(node) + ) + ) + end + + # Visit a Rescue node. + def visit_rescue(node) + # In the parser gem, there is a separation between the rescue node and + # the rescue body. They have different bounds, so we have to calculate + # those here. + start_char = node.start_char + + body_end_char = + if node.statements.empty? + start_char + 6 + else + node.statements.body.last.end_char + end + + end_char = + if node.consequent + end_node = node.consequent + end_node = end_node.consequent while end_node.consequent + + if end_node.statements.empty? + start_char + 6 + else + end_node.statements.body.last.end_char + end + else + body_end_char + end + + # These locations are reused for multiple children. + keyword = srange_length(start_char, 6) + body_expression = srange(start_char, body_end_char) + expression = srange(start_char, end_char) + + exceptions = + case node.exception&.exceptions + when nil + nil + when MRHS + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: + Args.new( + parts: node.exception.exceptions.parts, + location: node.exception.exceptions.location + ), + location: node.exception.exceptions.location + ) + ) + else + visit_array( + ArrayLiteral.new( + lbracket: nil, + contents: + Args.new( + parts: [node.exception.exceptions], + location: node.exception.exceptions.location + ), + location: node.exception.exceptions.location + ) + ) + end + + resbody = + if node.exception.nil? + s( + :resbody, + [nil, nil, visit(node.statements)], + smap_rescue_body(keyword, nil, nil, body_expression) + ) + elsif node.exception.variable.nil? + s( + :resbody, + [exceptions, nil, visit(node.statements)], + smap_rescue_body(keyword, nil, nil, body_expression) + ) + else + s( + :resbody, + [ + exceptions, + visit(node.exception.variable), + visit(node.statements) + ], + smap_rescue_body( + keyword, + srange_find( + node.start_char + 6, + node.exception.variable.start_char, + "=>" + ), + nil, + body_expression + ) + ) + end + + children = [resbody] + if node.consequent + children += visit(node.consequent).children + else + children << nil + end + + s(:rescue, children, smap_condition_bare(expression)) + end + + # Visit a RescueMod node. + def visit_rescue_mod(node) + keyword = srange_find_between(node.statement, node.value, "rescue") + + s( + :rescue, + [ + visit(node.statement), + s( + :resbody, + [nil, nil, visit(node.value)], + smap_rescue_body( + keyword, + nil, + nil, + keyword.join(srange_node(node.value)) + ) + ), + nil + ], + smap_condition_bare(srange_node(node)) + ) + end + + # Visit a RestParam node. + def visit_rest_param(node) + if node.name + s( + :restarg, + [node.name.value.to_sym], + smap_variable(srange_node(node.name), srange_node(node)) + ) + else + s(:restarg, [], smap_variable(nil, srange_node(node))) + end + end + + # Visit a Retry node. + def visit_retry(node) + s(:retry, [], smap_keyword_bare(srange_node(node), srange_node(node))) + end + + # Visit a ReturnNode node. + def visit_return(node) + s( + :return, + node.arguments ? visit_all(node.arguments.parts) : [], + smap_keyword_bare( + srange_length(node.start_char, 6), + srange_node(node) + ) + ) + end + + # Visit an SClass node. + def visit_sclass(node) + s( + :sclass, + [visit(node.target), visit(node.bodystmt)], + smap_definition( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.target.start_char, "<<"), + nil, + srange_length(node.end_char, -3) + ).with_expression(srange_node(node)) + ) + end + + # Visit a Statements node. + def visit_statements(node) + children = + node.body.reject do |child| + child.is_a?(Comment) || child.is_a?(EmbDoc) || + child.is_a?(EndContent) || child.is_a?(VoidStmt) + end + + case children.length + when 0 + nil + when 1 + visit(children.first) + else + s( + :begin, + visit_all(children), + smap_collection_bare( + srange(children.first.start_char, children.last.end_char) + ) + ) + end + end + + # Visit a StringConcat node. + def visit_string_concat(node) + s( + :dstr, + [visit(node.left), visit(node.right)], + smap_collection_bare(srange_node(node)) + ) + end + + # Visit a StringDVar node. + def visit_string_dvar(node) + visit(node.variable) + end + + # Visit a StringEmbExpr node. + def visit_string_embexpr(node) + s( + :begin, + visit(node.statements).then { |child| child ? [child] : [] }, + smap_collection( + srange_length(node.start_char, 2), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + # Visit a StringLiteral node. + def visit_string_literal(node) + location = + if node.quote + smap_collection( + srange_length(node.start_char, node.quote.length), + srange_length(node.end_char, -1), + srange_node(node) + ) + else + smap_collection_bare(srange_node(node)) + end + + if node.parts.empty? + s(:str, [""], location) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + child = visit(node.parts.first) + s(child.type, child.children, location) + else + s(:dstr, visit_all(node.parts), location) + end + end + + # Visit a Super node. + def visit_super(node) + if node.arguments.is_a?(Args) + s( + :super, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + else + case node.arguments.arguments + when nil + s( + :super, + [], + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + when ArgsForward + s(:super, [visit(node.arguments.arguments)], nil) + else + s( + :super, + visit_all(node.arguments.arguments.parts), + smap_keyword( + srange_length(node.start_char, 5), + srange_find(node.start_char + 5, node.end_char, "("), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + end + end + + # Visit a SymbolLiteral node. + def visit_symbol_literal(node) + begin_token = + if buffer.source[node.start_char] == ":" + srange_length(node.start_char, 1) + end + + s( + :sym, + [node.value.value.to_sym], + smap_collection(begin_token, nil, srange_node(node)) + ) + end + + # Visit a Symbols node. + def visit_symbols(node) + parts = + node.elements.map do |element| + part = element.parts.first + + if element.parts.length == 1 && part.is_a?(TStringContent) + SymbolLiteral.new(value: part, location: part.location) + else + DynaSymbol.new( + parts: element.parts, + quote: nil, + location: element.location + ) + end + end + + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: parts, location: node.location), + location: node.location + ) + ) + end + + # Visit a TopConstField node. + def visit_top_const_field(node) + s( + :casgn, + [ + s(:cbase, [], smap(srange_length(node.start_char, 2))), + node.constant.value.to_sym + ], + smap_constant( + srange_length(node.start_char, 2), + srange_node(node.constant), + srange_node(node) + ) + ) + end + + # Visit a TopConstRef node. + def visit_top_const_ref(node) + s( + :const, + [ + s(:cbase, [], smap(srange_length(node.start_char, 2))), + node.constant.value.to_sym + ], + smap_constant( + srange_length(node.start_char, 2), + srange_node(node.constant), + srange_node(node) + ) + ) + end + + # Visit a TStringContent node. + def visit_tstring_content(node) + dumped = node.value.gsub(/([^[:ascii:]])/) { $1.dump[1...-1] } + + s( + :str, + ["\"#{dumped}\"".undump], + smap_collection_bare(srange_node(node)) + ) + end + + # Visit a Unary node. + def visit_unary(node) + # Special handling here for flipflops + if node.statement.is_a?(Paren) && + node.statement.contents.is_a?(Statements) && + node.statement.contents.body.length == 1 && + (range = node.statement.contents.body.first).is_a?(RangeNode) && + node.operator == "!" + type = range.operator.value == ".." ? :iflipflop : :eflipflop + return( + s( + :send, + [s(:begin, [s(type, visit(range).children, nil)], nil), :!], + nil + ) + ) + end + + visit(canonical_unary(node)) + end + + # Visit an Undef node. + def visit_undef(node) + s( + :undef, + visit_all(node.symbols), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + end + + # Visit an UnlessNode node. + def visit_unless(node) + predicate = + case node.predicate + when RegexpLiteral + s(:match_current_line, [visit(node.predicate)], nil) + when Unary + if node.predicate.operator.value == "!" && + node.predicate.statement.is_a?(RegexpLiteral) + s( + :send, + [ + s(:match_current_line, [visit(node.predicate.statement)]), + :! + ], + nil + ) + else + visit(node.predicate) + end + else + visit(node.predicate) + end + + s( + :if, + [predicate, visit(node.consequent), visit(node.statements)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "unless"), + srange_node(node) + ) + else + smap_condition( + srange_length(node.start_char, 6), + srange_search_between(node.predicate, node.statements, "then"), + nil, + srange_length(node.end_char, -3), + srange_node(node) + ) + end + ) + end + + # Visit an UntilNode node. + def visit_until(node) + s( + loop_post?(node) ? :until_post : :until, + [visit(node.predicate), visit(node.statements)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "until"), + srange_node(node) + ) + else + smap_keyword( + srange_length(node.start_char, 5), + srange_search_between(node.predicate, node.statements, "do") || + srange_search_between(node.predicate, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) + ) + end + ) + end + + # Visit a VarField node. + def visit_var_field(node) + name = node.value.value.to_sym + match_var = + [stack[-3], stack[-2]].any? do |parent| + case parent + when AryPtn, FndPtn, HshPtn, In, RAssign + true + when Binary + parent.operator == :"=>" + else + false + end + end + + if match_var + s( + :match_var, + [name], + smap_variable(srange_node(node.value), srange_node(node.value)) + ) + elsif node.value.is_a?(Const) + s( + :casgn, + [nil, name], + smap_constant(nil, srange_node(node.value), srange_node(node)) + ) + else + location = smap_variable(srange_node(node), srange_node(node)) + + case node.value + when CVar + s(:cvasgn, [name], location) + when GVar + s(:gvasgn, [name], location) + when Ident + s(:lvasgn, [name], location) + when IVar + s(:ivasgn, [name], location) + when VarRef + s(:lvasgn, [name], location) + else + s(:match_rest, [], nil) + end + end + end + + # Visit a VarRef node. + def visit_var_ref(node) + visit(node.value) + end + + # Visit a VCall node. + def visit_vcall(node) + visit_command_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.value, + arguments: nil, + block: nil, + location: node.location + ) + ) + end + + # Visit a When node. + def visit_when(node) + keyword = srange_length(node.start_char, 4) + begin_token = + if buffer.source[node.statements.start_char] == ";" + srange_length(node.statements.start_char, 1) + end + + end_char = + if node.statements.body.empty? + node.statements.end_char + else + node.statements.body.last.end_char + end + + s( + :when, + visit_all(node.arguments.parts) + [visit(node.statements)], + smap_keyword( + keyword, + begin_token, + nil, + srange(keyword.begin_pos, end_char) + ) + ) + end + + # Visit a WhileNode node. + def visit_while(node) + s( + loop_post?(node) ? :while_post : :while, + [visit(node.predicate), visit(node.statements)], + if node.modifier? + smap_keyword_bare( + srange_find_between(node.statements, node.predicate, "while"), + srange_node(node) + ) + else + smap_keyword( + srange_length(node.start_char, 5), + srange_search_between(node.predicate, node.statements, "do") || + srange_search_between(node.predicate, node.statements, ";"), + srange_length(node.end_char, -3), + srange_node(node) + ) + end + ) + end + + # Visit a Word node. + def visit_word(node) + visit_string_literal( + StringLiteral.new( + parts: node.parts, + quote: nil, + location: node.location + ) + ) + end + + # Visit a Words node. + def visit_words(node) + visit_array( + ArrayLiteral.new( + lbracket: node.beginning, + contents: Args.new(parts: node.elements, location: node.location), + location: node.location + ) + ) + end + + # Visit an XStringLiteral node. + def visit_xstring_literal(node) + s( + :xstr, + visit_all(node.parts), + smap_collection( + srange_length( + node.start_char, + buffer.source[node.start_char] == "%" ? 3 : 1 + ), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + + def visit_yield(node) + case node.arguments + when nil + s( + :yield, + [], + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + when Args + s( + :yield, + visit_all(node.arguments.parts), + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + else + s( + :yield, + visit_all(node.arguments.contents.parts), + smap_keyword( + srange_length(node.start_char, 5), + srange_length(node.arguments.start_char, 1), + srange_length(node.end_char, -1), + srange_node(node) + ) + ) + end + end + + # Visit a ZSuper node. + def visit_zsuper(node) + s( + :zsuper, + [], + smap_keyword_bare( + srange_length(node.start_char, 5), + srange_node(node) + ) + ) + end + end + + private + + def block_children(node) + arguments = + if node.block_var + visit(node.block_var) + else + s(:args, [], smap_collection_bare(nil)) + end + + type = :block + if !node.block_var && (maximum = num_block_type(node.bodystmt)) + type = :numblock + arguments = maximum + end + + [type, arguments] + end + + # Convert a Unary node into a canonical CommandCall node. + def canonical_unary(node) + # For integers and floats with a leading + or -, parser represents them + # as just their values with the signs attached. + if %w[+ -].include?(node.operator) && + (node.statement.is_a?(Int) || node.statement.is_a?(FloatLiteral)) + return( + node.statement.class.new( + value: "#{node.operator}#{node.statement.value}", + location: node.location + ) + ) + end + + value = { "+" => "+@", "-" => "-@" }.fetch(node.operator, node.operator) + length = node.operator.length + + CommandCall.new( + receiver: node.statement, + operator: nil, + message: + Op.new( + value: value, + location: + Location.new( + start_line: node.location.start_line, + start_char: node.start_char, + start_column: node.location.start_column, + end_line: node.location.start_line, + end_char: node.start_char + length, + end_column: node.location.start_column + length + ) + ), + arguments: nil, + block: nil, + location: node.location + ) + end + + # Convert a Binary node into a canonical CommandCall node. + def canonical_binary(node) + operator = node.operator.to_s + + start_char = node.left.end_char + end_char = node.right.start_char + + index = buffer.source[start_char...end_char].index(operator) + start_line = + node.location.start_line + + buffer.source[start_char...index].count("\n") + start_column = + index - (buffer.source[start_char...index].rindex("\n") || 0) + + op_location = + Location.new( + start_line: start_line, + start_column: start_column, + start_char: start_char + index, + end_line: start_line, + end_column: start_column + operator.length, + end_char: start_char + index + operator.length + ) + + CommandCall.new( + receiver: node.left, + operator: nil, + message: Op.new(value: operator, location: op_location), + arguments: + Args.new(parts: [node.right], location: node.right.location), + block: nil, + location: node.location + ) + end + + # When you have a begin..end while or begin..end until, it's a special + # kind of syntax that executes the block in a loop. In this case the + # parser gem has a special node type for it. + def loop_post?(node) + node.modifier? && node.statements.is_a?(Statements) && + node.statements.body.length == 1 && + node.statements.body.first.is_a?(Begin) + end + + # We need to find if we should transform this block into a numblock + # since there could be new numbered variables like _1. + def num_block_type(statements) + variables = [] + queue = [statements] + + while (child_node = queue.shift) + if child_node.is_a?(VarRef) && child_node.value.is_a?(Ident) && + child_node.value.value =~ /^_(\d+)$/ + variables << $1.to_i + end + + queue += child_node.child_nodes.compact + end + + variables.max + end + + # This method comes almost directly from the parser gem and creates a new + # parser gem node from the given s-expression. type is expected to be a + # symbol, children is expected to be an array, and location is expected to + # be a source map. + def s(type, children, location) + ::Parser::AST::Node.new(type, children, location: location) + end + + # Constructs a plain source map just for an expression. + def smap(expression) + ::Parser::Source::Map.new(expression) + end + + # Constructs a new source map for a collection. + def smap_collection(begin_token, end_token, expression) + ::Parser::Source::Map::Collection.new( + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for a collection without a begin or end. + def smap_collection_bare(expression) + smap_collection(nil, nil, expression) + end + + # Constructs a new source map for a conditional expression. + def smap_condition( + keyword, + begin_token, + else_token, + end_token, + expression + ) + ::Parser::Source::Map::Condition.new( + keyword, + begin_token, + else_token, + end_token, + expression + ) + end + + # Constructs a new source map for a conditional expression with no begin + # or end. + def smap_condition_bare(expression) + smap_condition(nil, nil, nil, nil, expression) + end + + # Constructs a new source map for a constant reference. + def smap_constant(double_colon, name, expression) + ::Parser::Source::Map::Constant.new(double_colon, name, expression) + end + + # Constructs a new source map for a class definition. + def smap_definition(keyword, operator, name, end_token) + ::Parser::Source::Map::Definition.new( + keyword, + operator, + name, + end_token + ) + end + + # Constructs a new source map for a for loop. + def smap_for(keyword, in_token, begin_token, end_token, expression) + ::Parser::Source::Map::For.new( + keyword, + in_token, + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for a heredoc. + def smap_heredoc(expression, heredoc_body, heredoc_end) + ::Parser::Source::Map::Heredoc.new( + expression, + heredoc_body, + heredoc_end + ) + end + + # Construct a source map for an index operation. + def smap_index(begin_token, end_token, expression) + ::Parser::Source::Map::Index.new(begin_token, end_token, expression) + end + + # Constructs a new source map for the use of a keyword. + def smap_keyword(keyword, begin_token, end_token, expression) + ::Parser::Source::Map::Keyword.new( + keyword, + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for the use of a keyword without a begin or + # end token. + def smap_keyword_bare(keyword, expression) + smap_keyword(keyword, nil, nil, expression) + end + + # Constructs a new source map for a method definition. + def smap_method_definition( + keyword, + operator, + name, + end_token, + assignment, + expression + ) + ::Parser::Source::Map::MethodDefinition.new( + keyword, + operator, + name, + end_token, + assignment, + expression + ) + end + + # Constructs a new source map for an operator. + def smap_operator(operator, expression) + ::Parser::Source::Map::Operator.new(operator, expression) + end + + # Constructs a source map for the body of a rescue clause. + def smap_rescue_body(keyword, assoc, begin_token, expression) + ::Parser::Source::Map::RescueBody.new( + keyword, + assoc, + begin_token, + expression + ) + end + + # Constructs a new source map for a method call. + def smap_send(dot, selector, begin_token, end_token, expression) + ::Parser::Source::Map::Send.new( + dot, + selector, + begin_token, + end_token, + expression + ) + end + + # Constructs a new source map for a method call without a begin or end. + def smap_send_bare(selector, expression) + smap_send(nil, selector, nil, nil, expression) + end + + # Constructs a new source map for a ternary expression. + def smap_ternary(question, colon, expression) + ::Parser::Source::Map::Ternary.new(question, colon, expression) + end + + # Constructs a new source map for a variable. + def smap_variable(name, expression) + ::Parser::Source::Map::Variable.new(name, expression) + end + + # Constructs a new source range from the given start and end offsets. + def srange(start_char, end_char) + ::Parser::Source::Range.new(buffer, start_char, end_char) + end + + # Constructs a new source range by finding the given needle in the given + # range of the source. If the needle is not found, returns nil. + def srange_search(start_char, end_char, needle) + index = buffer.source[start_char...end_char].index(needle) + return unless index + + offset = start_char + index + srange(offset, offset + needle.length) + end + + # Constructs a new source range by searching for the given needle between + # the end location of the start node and the start location of the end + # node. If the needle is not found, returns nil. + def srange_search_between(start_node, end_node, needle) + srange_search(start_node.end_char, end_node.start_char, needle) + end + + # Constructs a new source range by finding the given needle in the given + # range of the source. If it needle is not found, raises an error. + def srange_find(start_char, end_char, needle) + srange = srange_search(start_char, end_char, needle) + + unless srange + slice = buffer.source[start_char...end_char].inspect + raise "Could not find #{needle.inspect} in #{slice}" + end + + srange + end + + # Constructs a new source range by finding the given needle between the + # end location of the start node and the start location of the end node. + # If the needle is not found, returns raises an error. + def srange_find_between(start_node, end_node, needle) + srange_find(start_node.end_char, end_node.start_char, needle) + end + + # Constructs a new source range from the given start offset and length. + def srange_length(start_char, length) + if length > 0 + srange(start_char, start_char + length) + else + srange(start_char + length, start_char) + end + end + + # Constructs a new source range using the given node's location. + def srange_node(node) + location = node.location + srange(location.start_char, location.end_char) + end + end + end +end diff --git a/lib/syntax_tree/translation/rubocop_ast.rb b/lib/syntax_tree/translation/rubocop_ast.rb new file mode 100644 index 00000000..53c6737b --- /dev/null +++ b/lib/syntax_tree/translation/rubocop_ast.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module SyntaxTree + module Translation + # This visitor is responsible for converting the syntax tree produced by + # Syntax Tree into the syntax tree produced by the rubocop/rubocop-ast gem. + class RuboCopAST < Parser + private + + # This method is effectively the same thing as the parser gem except that + # it uses the rubocop-ast specializations of the nodes. + def s(type, children, location) + ::RuboCop::AST::Builder::NODE_MAP.fetch(type, ::RuboCop::AST::Node).new( + type, + children, + location: location + ) + end + end + end +end diff --git a/lib/syntax_tree/version.rb b/lib/syntax_tree/version.rb index 6cb1fccf..1f028f89 100644 --- a/lib/syntax_tree/version.rb +++ b/lib/syntax_tree/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxTree - VERSION = "5.3.0" + VERSION = "6.0.0" end diff --git a/lib/syntax_tree/visitor/environment.rb b/lib/syntax_tree/visitor/environment.rb deleted file mode 100644 index b07a5203..00000000 --- a/lib/syntax_tree/visitor/environment.rb +++ /dev/null @@ -1,84 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # The environment class is used to keep track of local variables and arguments - # inside a particular scope - class Environment - # This class tracks the occurrences of a local variable or argument - class Local - # [Symbol] The type of the local (e.g. :argument, :variable) - attr_reader :type - - # [Array[Location]] The locations of all definitions and assignments of - # this local - attr_reader :definitions - - # [Array[Location]] The locations of all usages of this local - attr_reader :usages - - # initialize: (Symbol type) -> void - def initialize(type) - @type = type - @definitions = [] - @usages = [] - end - - # add_definition: (Location location) -> void - def add_definition(location) - @definitions << location - end - - # add_usage: (Location location) -> void - def add_usage(location) - @usages << location - end - end - - # [Array[Local]] The local variables and arguments defined in this - # environment - attr_reader :locals - - # [Environment | nil] The parent environment - attr_reader :parent - - # initialize: (Environment | nil parent) -> void - def initialize(parent = nil) - @locals = {} - @parent = parent - end - - # Adding a local definition will either insert a new entry in the locals - # hash or append a new definition location to an existing local. Notice that - # it's not possible to change the type of a local after it has been - # registered - # add_local_definition: (Ident | Label identifier, Symbol type) -> void - def add_local_definition(identifier, type) - name = identifier.value.delete_suffix(":") - - @locals[name] ||= Local.new(type) - @locals[name].add_definition(identifier.location) - end - - # Adding a local usage will either insert a new entry in the locals - # hash or append a new usage location to an existing local. Notice that - # it's not possible to change the type of a local after it has been - # registered - # add_local_usage: (Ident | Label identifier, Symbol type) -> void - def add_local_usage(identifier, type) - name = identifier.value.delete_suffix(":") - - @locals[name] ||= Local.new(type) - @locals[name].add_usage(identifier.location) - end - - # Try to find the local given its name in this environment or any of its - # parents - # find_local: (String name) -> Local | nil - def find_local(name) - local = @locals[name] - return local unless local.nil? - - @parent&.find_local(name) - end - end -end diff --git a/lib/syntax_tree/visitor/json_visitor.rb b/lib/syntax_tree/visitor/json_visitor.rb deleted file mode 100644 index b516980c..00000000 --- a/lib/syntax_tree/visitor/json_visitor.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor transforms the AST into a hash that contains only primitives - # that can be easily serialized into JSON. - class JSONVisitor < FieldVisitor - attr_reader :target - - def initialize - @target = nil - end - - private - - def comments(node) - target[:comments] = visit_all(node.comments) - end - - def field(name, value) - target[name] = value.is_a?(Node) ? visit(value) : value - end - - def list(name, values) - target[name] = visit_all(values) - end - - def node(node, type) - previous = @target - @target = { type: type, location: visit_location(node.location) } - yield - @target - ensure - @target = previous - end - - def pairs(name, values) - target[name] = values.map { |(key, value)| [visit(key), visit(value)] } - end - - def text(name, value) - target[name] = value - end - - def visit_location(location) - [ - location.start_line, - location.start_char, - location.end_line, - location.end_char - ] - end - end - end -end diff --git a/lib/syntax_tree/visitor/match_visitor.rb b/lib/syntax_tree/visitor/match_visitor.rb deleted file mode 100644 index e0bdaf08..00000000 --- a/lib/syntax_tree/visitor/match_visitor.rb +++ /dev/null @@ -1,122 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor transforms the AST into a Ruby pattern matching expression - # that would match correctly against the AST. - class MatchVisitor < FieldVisitor - attr_reader :q - - def initialize(q) - @q = q - end - - def visit(node) - case node - when Node - super - when String - # pp will split up a string on newlines and concat them together using - # a "+" operator. This breaks the pattern matching expression. So - # instead we're going to check here for strings and manually put the - # entire value into the output buffer. - q.text(node.inspect) - else - node.pretty_print(q) - end - end - - private - - def comments(node) - return if node.comments.empty? - - q.nest(0) do - q.text("comments: [") - q.indent do - q.breakable("") - q.seplist(node.comments) { |comment| visit(comment) } - end - q.breakable("") - q.text("]") - end - end - - def field(name, value) - q.nest(0) do - q.text(name) - q.text(": ") - visit(value) - end - end - - def list(name, values) - q.group do - q.text(name) - q.text(": [") - q.indent do - q.breakable("") - q.seplist(values) { |value| visit(value) } - end - q.breakable("") - q.text("]") - end - end - - def node(node, _type) - items = [] - q.with_target(items) { yield } - - if items.empty? - q.text(node.class.name) - return - end - - q.group do - q.text(node.class.name) - q.text("[") - q.indent do - q.breakable("") - q.seplist(items) { |item| q.target << item } - end - q.breakable("") - q.text("]") - end - end - - def pairs(name, values) - q.group do - q.text(name) - q.text(": [") - q.indent do - q.breakable("") - q.seplist(values) do |(key, value)| - q.group do - q.text("[") - q.indent do - q.breakable("") - visit(key) - q.text(",") - q.breakable - visit(value || nil) - end - q.breakable("") - q.text("]") - end - end - end - q.breakable("") - q.text("]") - end - end - - def text(name, value) - q.nest(0) do - q.text(name) - q.text(": ") - value.pretty_print(q) - end - end - end - end -end diff --git a/lib/syntax_tree/visitor/pretty_print_visitor.rb b/lib/syntax_tree/visitor/pretty_print_visitor.rb deleted file mode 100644 index 674e3aac..00000000 --- a/lib/syntax_tree/visitor/pretty_print_visitor.rb +++ /dev/null @@ -1,85 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor pretty-prints the AST into an equivalent s-expression. - class PrettyPrintVisitor < FieldVisitor - attr_reader :q - - def initialize(q) - @q = q - end - - # This is here because we need to make sure the operator is cast to a - # string before we print it out. - def visit_binary(node) - node(node, "binary") do - field("left", node.left) - text("operator", node.operator.to_s) - field("right", node.right) - comments(node) - end - end - - # This is here to make it a little nicer to look at labels since they - # typically have their : at the end of the value. - def visit_label(node) - node(node, "label") do - q.breakable - q.text(":") - q.text(node.value[0...-1]) - comments(node) - end - end - - private - - def comments(node) - return if node.comments.empty? - - q.breakable - q.group(2, "(", ")") do - q.seplist(node.comments) { |comment| q.pp(comment) } - end - end - - def field(_name, value) - q.breakable - q.pp(value) - end - - def list(_name, values) - q.breakable - q.group(2, "(", ")") { q.seplist(values) { |value| q.pp(value) } } - end - - def node(_node, type) - q.group(2, "(", ")") do - q.text(type) - yield - end - end - - def pairs(_name, values) - q.group(2, "(", ")") do - q.seplist(values) do |(key, value)| - q.pp(key) - - if value - q.text("=") - q.group(2) do - q.breakable("") - q.pp(value) - end - end - end - end - end - - def text(_name, value) - q.breakable - q.text(value) - end - end - end -end diff --git a/lib/syntax_tree/visitor/with_environment.rb b/lib/syntax_tree/visitor/with_environment.rb deleted file mode 100644 index 59033d50..00000000 --- a/lib/syntax_tree/visitor/with_environment.rb +++ /dev/null @@ -1,140 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # WithEnvironment is a module intended to be included in classes inheriting - # from Visitor. The module overrides a few visit methods to automatically keep - # track of local variables and arguments defined in the current environment. - # Example usage: - # class MyVisitor < Visitor - # include WithEnvironment - # - # def visit_ident(node) - # # Check if we're visiting an identifier for an argument, a local - # variable or something else - # local = current_environment.find_local(node) - # - # if local.type == :argument - # # handle identifiers for arguments - # elsif local.type == :variable - # # handle identifiers for variables - # else - # # handle other identifiers, such as method names - # end - # end - module WithEnvironment - def current_environment - @current_environment ||= Environment.new - end - - def with_new_environment - previous_environment = @current_environment - @current_environment = Environment.new(previous_environment) - yield - ensure - @current_environment = previous_environment - end - - # Visits for nodes that create new environments, such as classes, modules - # and method definitions - def visit_class(node) - with_new_environment { super } - end - - def visit_module(node) - with_new_environment { super } - end - - # When we find a method invocation with a block, only the code that happens - # inside of the block needs a fresh environment. The method invocation - # itself happens in the same environment - def visit_method_add_block(node) - visit(node.call) - with_new_environment { visit(node.block) } - end - - def visit_def(node) - with_new_environment { super } - end - - # Visit for keeping track of local arguments, such as method and block - # arguments - def visit_params(node) - add_argument_definitions(node.requireds) - - node.posts.each do |param| - current_environment.add_local_definition(param, :argument) - end - - node.keywords.each do |param| - current_environment.add_local_definition(param.first, :argument) - end - - node.optionals.each do |param| - current_environment.add_local_definition(param.first, :argument) - end - - super - end - - def visit_rest_param(node) - name = node.name - current_environment.add_local_definition(name, :argument) if name - - super - end - - def visit_kwrest_param(node) - name = node.name - current_environment.add_local_definition(name, :argument) if name - - super - end - - def visit_blockarg(node) - name = node.name - current_environment.add_local_definition(name, :argument) if name - - super - end - - # Visit for keeping track of local variable definitions - def visit_var_field(node) - value = node.value - - if value.is_a?(SyntaxTree::Ident) - current_environment.add_local_definition(value, :variable) - end - - super - end - - alias visit_pinned_var_ref visit_var_field - - # Visits for keeping track of variable and argument usages - def visit_var_ref(node) - value = node.value - - if value.is_a?(SyntaxTree::Ident) - definition = current_environment.find_local(value.value) - - if definition - current_environment.add_local_usage(value, definition.type) - end - end - - super - end - - private - - def add_argument_definitions(list) - list.each do |param| - if param.is_a?(SyntaxTree::MLHSParen) - add_argument_definitions(param.contents.parts) - else - current_environment.add_local_definition(param, :argument) - end - end - end - end -end diff --git a/lib/syntax_tree/with_scope.rb b/lib/syntax_tree/with_scope.rb new file mode 100644 index 00000000..7fcef067 --- /dev/null +++ b/lib/syntax_tree/with_scope.rb @@ -0,0 +1,244 @@ +# frozen_string_literal: true + +module SyntaxTree + # WithScope is a module intended to be included in classes inheriting from + # Visitor. The module overrides a few visit methods to automatically keep + # track of local variables and arguments defined in the current scope. + # Example usage: + # + # class MyVisitor < Visitor + # include WithScope + # + # def visit_ident(node) + # # Check if we're visiting an identifier for an argument, a local + # # variable or something else + # local = current_scope.find_local(node) + # + # if local.type == :argument + # # handle identifiers for arguments + # elsif local.type == :variable + # # handle identifiers for variables + # else + # # handle other identifiers, such as method names + # end + # end + # end + # + module WithScope + # The scope class is used to keep track of local variables and arguments + # inside a particular scope. + class Scope + # This class tracks the occurrences of a local variable or argument. + class Local + # [Symbol] The type of the local (e.g. :argument, :variable) + attr_reader :type + + # [Array[Location]] The locations of all definitions and assignments of + # this local + attr_reader :definitions + + # [Array[Location]] The locations of all usages of this local + attr_reader :usages + + def initialize(type) + @type = type + @definitions = [] + @usages = [] + end + + def add_definition(location) + @definitions << location + end + + def add_usage(location) + @usages << location + end + end + + # [Integer] a unique identifier for this scope + attr_reader :id + + # [scope | nil] The parent scope + attr_reader :parent + + # [Hash[String, Local]] The local variables and arguments defined in this + # scope + attr_reader :locals + + def initialize(id, parent = nil) + @id = id + @parent = parent + @locals = {} + end + + # Adding a local definition will either insert a new entry in the locals + # hash or append a new definition location to an existing local. Notice + # that it's not possible to change the type of a local after it has been + # registered. + def add_local_definition(identifier, type) + name = identifier.value.delete_suffix(":") + + local = + if type == :argument + locals[name] ||= Local.new(type) + else + resolve_local(name, type) + end + + local.add_definition(identifier.location) + end + + # Adding a local usage will either insert a new entry in the locals + # hash or append a new usage location to an existing local. Notice that + # it's not possible to change the type of a local after it has been + # registered. + def add_local_usage(identifier, type) + name = identifier.value.delete_suffix(":") + resolve_local(name, type).add_usage(identifier.location) + end + + # Try to find the local given its name in this scope or any of its + # parents. + def find_local(name) + locals[name] || parent&.find_local(name) + end + + private + + def resolve_local(name, type) + local = find_local(name) + + unless local + local = Local.new(type) + locals[name] = local + end + + local + end + end + + attr_reader :current_scope + + def initialize(*args, **kwargs, &block) + super + + @current_scope = Scope.new(0) + @next_scope_id = 0 + end + + # Visits for nodes that create new scopes, such as classes, modules + # and method definitions. + def visit_class(node) + with_scope { super } + end + + def visit_module(node) + with_scope { super } + end + + # When we find a method invocation with a block, only the code that happens + # inside of the block needs a fresh scope. The method invocation + # itself happens in the same scope. + def visit_method_add_block(node) + visit(node.call) + with_scope(current_scope) { visit(node.block) } + end + + def visit_def(node) + with_scope { super } + end + + # Visit for keeping track of local arguments, such as method and block + # arguments. + def visit_params(node) + add_argument_definitions(node.requireds) + + node.posts.each do |param| + current_scope.add_local_definition(param, :argument) + end + + node.keywords.each do |param| + current_scope.add_local_definition(param.first, :argument) + end + + node.optionals.each do |param| + current_scope.add_local_definition(param.first, :argument) + end + + super + end + + def visit_rest_param(node) + name = node.name + current_scope.add_local_definition(name, :argument) if name + + super + end + + def visit_kwrest_param(node) + name = node.name + current_scope.add_local_definition(name, :argument) if name + + super + end + + def visit_blockarg(node) + name = node.name + current_scope.add_local_definition(name, :argument) if name + + super + end + + # Visit for keeping track of local variable definitions + def visit_var_field(node) + value = node.value + current_scope.add_local_definition(value, :variable) if value.is_a?(Ident) + + super + end + + # Visit for keeping track of local variable definitions + def visit_pinned_var_ref(node) + value = node.value + current_scope.add_local_usage(value, :variable) if value.is_a?(Ident) + + super + end + + # Visits for keeping track of variable and argument usages + def visit_var_ref(node) + value = node.value + + if value.is_a?(Ident) + definition = current_scope.find_local(value.value) + current_scope.add_local_usage(value, definition.type) if definition + end + + super + end + + private + + def add_argument_definitions(list) + list.each do |param| + if param.is_a?(SyntaxTree::MLHSParen) + add_argument_definitions(param.contents.parts) + else + current_scope.add_local_definition(param, :argument) + end + end + end + + def next_scope_id + @next_scope_id += 1 + end + + def with_scope(parent_scope = nil) + previous_scope = @current_scope + @current_scope = Scope.new(next_scope_id, parent_scope) + yield + ensure + @current_scope = previous_scope + end + end +end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 7e4da7bb..bd5c54b9 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,5 +1,23 @@ # frozen_string_literal: true +require "stringio" + +require_relative "yarv/basic_block" +require_relative "yarv/bf" +require_relative "yarv/calldata" +require_relative "yarv/compiler" +require_relative "yarv/control_flow_graph" +require_relative "yarv/data_flow_graph" +require_relative "yarv/decompiler" +require_relative "yarv/disassembler" +require_relative "yarv/instruction_sequence" +require_relative "yarv/instructions" +require_relative "yarv/legacy" +require_relative "yarv/local_table" +require_relative "yarv/sea_of_nodes" +require_relative "yarv/assembler" +require_relative "yarv/vm" + module SyntaxTree # This module provides an object representation of the YARV bytecode. module YARV diff --git a/lib/syntax_tree/yarv/basic_block.rb b/lib/syntax_tree/yarv/basic_block.rb new file mode 100644 index 00000000..6798a092 --- /dev/null +++ b/lib/syntax_tree/yarv/basic_block.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This object represents a single basic block, wherein all contained + # instructions do not branch except for the last one. + class BasicBlock + # This is the unique identifier for this basic block. + attr_reader :id + + # This is the index into the list of instructions where this block starts. + attr_reader :block_start + + # This is the set of instructions that this block contains. + attr_reader :insns + + # This is an array of basic blocks that lead into this block. + attr_reader :incoming_blocks + + # This is an array of basic blocks that this block leads into. + attr_reader :outgoing_blocks + + def initialize(block_start, insns) + @id = "block_#{block_start}" + + @block_start = block_start + @insns = insns + + @incoming_blocks = [] + @outgoing_blocks = [] + end + + # Yield each instruction in this basic block along with its index from the + # original instruction sequence. + def each_with_length + return enum_for(:each_with_length) unless block_given? + + length = block_start + insns.each do |insn| + yield insn, length + length += insn.length + end + end + + # This method is used to verify that the basic block is well formed. It + # checks that the only instruction in this basic block that branches is + # the last instruction. + def verify + insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } + end + end + end +end diff --git a/lib/syntax_tree/yarv/calldata.rb b/lib/syntax_tree/yarv/calldata.rb new file mode 100644 index 00000000..fadea61b --- /dev/null +++ b/lib/syntax_tree/yarv/calldata.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This is an operand to various YARV instructions that represents the + # information about a specific call site. + class CallData + CALL_ARGS_SPLAT = 1 << 0 + CALL_ARGS_BLOCKARG = 1 << 1 + CALL_FCALL = 1 << 2 + CALL_VCALL = 1 << 3 + CALL_ARGS_SIMPLE = 1 << 4 + CALL_BLOCKISEQ = 1 << 5 + CALL_KWARG = 1 << 6 + CALL_KW_SPLAT = 1 << 7 + CALL_TAILCALL = 1 << 8 + CALL_SUPER = 1 << 9 + CALL_ZSUPER = 1 << 10 + CALL_OPT_SEND = 1 << 11 + CALL_KW_SPLAT_MUT = 1 << 12 + + attr_reader :method, :argc, :flags, :kw_arg + + def initialize( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + @method = method + @argc = argc + @flags = flags + @kw_arg = kw_arg + end + + def flag?(mask) + (flags & mask) > 0 + end + + def to_h + result = { mid: method, flag: flags, orig_argc: argc } + result[:kw_arg] = kw_arg if kw_arg + result + end + + def inspect + names = [] + names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) + names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) + names << :FCALL if flag?(CALL_FCALL) + names << :VCALL if flag?(CALL_VCALL) + names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) + names << :BLOCKISEQ if flag?(CALL_BLOCKISEQ) + names << :KWARG if flag?(CALL_KWARG) + names << :KW_SPLAT if flag?(CALL_KW_SPLAT) + names << :TAILCALL if flag?(CALL_TAILCALL) + names << :SUPER if flag?(CALL_SUPER) + names << :ZSUPER if flag?(CALL_ZSUPER) + names << :OPT_SEND if flag?(CALL_OPT_SEND) + names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{method}" if method + parts << "argc:#{argc}" + parts << "kw:[#{kw_arg.join(", ")}]" if kw_arg + parts << names.join("|") if names.any? + + "" + end + + def self.from(serialized) + new( + serialized[:mid], + serialized[:orig_argc], + serialized[:flag], + serialized[:kw_arg] + ) + end + end + + # A convenience method for creating a CallData object. + def self.calldata( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + CallData.new(method, argc, flags, kw_arg) + end + end +end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index c1b4d6dd..bd20bc19 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -8,7 +8,7 @@ module YARV # # You use this as with any other visitor. First you parse code into a tree, # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # will return a SyntaxTree::YARV::Compiler::InstructionSequence object. # With that object you can call #to_a on it, which will return a serialized # form of the instruction sequence as an array. This array _should_ mirror # the array given by RubyVM::InstructionSequence#to_a. @@ -124,76 +124,122 @@ def self.compile(node) rescue CompilationError end - def visit_array(node) - node.contents ? visit_all(node.contents.parts) : [] - end + visit_methods do + def visit_array(node) + node.contents ? visit_all(node.contents.parts) : [] + end - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end end - end - def visit_float(node) - node.value.to_f - end + def visit_float(node) + node.value.to_f + end - alias visit_hash visit_bare_assoc_hash + alias visit_hash visit_bare_assoc_hash - def visit_imaginary(node) - node.value.to_c - end + def visit_imaginary(node) + node.value.to_c + end - def visit_int(node) - case (value = node.value) - when /^0b/ - value[2..].to_i(2) - when /^0o/ - value[2..].to_i(8) - when /^0d/ - value[2..].to_i - when /^0x/ - value[2..].to_i(16) - else - value.to_i + def visit_int(node) + case (value = node.value) + when /^0b/ + value[2..].to_i(2) + when /^0o/ + value[2..].to_i(8) + when /^0d/ + value[2..].to_i + when /^0x/ + value[2..].to_i(16) + else + value.to_i + end end - end - def visit_label(node) - node.value.chomp(":").to_sym - end + def visit_label(node) + node.value.chomp(":").to_sym + end - def visit_mrhs(node) - visit_all(node.parts) - end + def visit_mrhs(node) + visit_all(node.parts) + end - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end - def visit_qwords(node) - visit_all(node.elements) - end + def visit_qwords(node) + visit_all(node.elements) + end - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end - def visit_rational(node) - node.value.to_r - end + def visit_rational(node) + node.value.to_r + end - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new( + node.parts.first.value, + visit_regexp_literal_flags(node) + ) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_var_ref(node) + raise CompilationError unless node.value.is_a?(Kw) + + case node.value.value + when "nil" + nil + when "true" + true + when "false" + false + else + raise CompilationError + end + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_words(node) + visit_all(node.elements) end end @@ -219,47 +265,6 @@ def visit_regexp_literal_flags(node) end end - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_var_ref(node) - raise CompilationError unless node.value.is_a?(Kw) - - case node.value.value - when "nil" - nil - when "true" - true - when "false" - false - else - raise CompilationError - end - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - def visit_unsupported(_node) raise CompilationError end @@ -1050,11 +1055,16 @@ def visit_if_op(node) visit_if( IfNode.new( predicate: node.predicate, - statements: node.truthy, + statements: + Statements.new(body: [node.truthy], location: Location.default), consequent: Else.new( keyword: Kw.new(value: "else", location: Location.default), - statements: node.falsy, + statements: + Statements.new( + body: [node.falsy], + location: Location.default + ), location: Location.default ), location: Location.default diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb new file mode 100644 index 00000000..2829bb21 --- /dev/null +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -0,0 +1,257 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class represents a control flow graph of a YARV instruction sequence. + # It constructs a graph of basic blocks that hold subsets of the list of + # instructions from the instruction sequence. + # + # You can use this class by calling the ::compile method and passing it a + # YARV instruction sequence. It will return a control flow graph object. + # + # iseq = RubyVM::InstructionSequence.compile("1 + 2") + # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + # + class ControlFlowGraph + # This class is responsible for creating a control flow graph from the + # given instruction sequence. + class Compiler + # This is the instruction sequence that is being compiled. + attr_reader :iseq + + # This is a hash of indices in the YARV instruction sequence that point + # to their corresponding instruction. + attr_reader :insns + + # This is a hash of labels that point to their corresponding index into + # the YARV instruction sequence. Note that this is not the same as the + # index into the list of instructions on the instruction sequence + # object. Instead, this is the index into the C array, so it includes + # operands. + attr_reader :labels + + def initialize(iseq) + @iseq = iseq + + @insns = {} + @labels = {} + + length = 0 + iseq.insns.each do |insn| + case insn + when Instruction + @insns[length] = insn + length += insn.length + when InstructionSequence::Label + @labels[insn] = length + end + end + end + + # This method is used to compile the instruction sequence into a control + # flow graph. It returns an instance of ControlFlowGraph. + def compile + blocks = build_basic_blocks + + connect_basic_blocks(blocks) + prune_basic_blocks(blocks) + + ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify) + end + + private + + # Finds the indices of the instructions that start a basic block because + # they're either: + # + # * the start of an instruction sequence + # * the target of a branch + # * fallen through to from a branch + # + def find_basic_block_starts + block_starts = Set.new([0]) + + insns.each do |index, insn| + branch_targets = insn.branch_targets + + if branch_targets.any? + branch_targets.each do |branch_target| + block_starts.add(labels[branch_target]) + end + + block_starts.add(index + insn.length) if insn.falls_through? + end + end + + block_starts.to_a.sort + end + + # Builds up a set of basic blocks by iterating over the starts of each + # block. They are keyed by the index of their first instruction. + def build_basic_blocks + block_starts = find_basic_block_starts + + length = 0 + blocks = + iseq + .insns + .grep(Instruction) + .slice_after do |insn| + length += insn.length + block_starts.include?(length) + end + + block_starts + .zip(blocks) + .to_h do |block_start, insns| + # It's possible that we have not detected a block start but still + # have branching instructions inside of a basic block. This can + # happen if you have an unconditional jump which is followed by + # instructions that are unreachable. As of Ruby 3.2, this is + # possible with something as simple as "1 => a". In this case we + # can discard all instructions that follow branching instructions. + block_insns = + insns.slice_after { |insn| insn.branch_targets.any? }.first + + [block_start, BasicBlock.new(block_start, block_insns)] + end + end + + # Connect the blocks by letting them know which blocks are incoming and + # outgoing from each block. + def connect_basic_blocks(blocks) + blocks.each do |block_start, block| + insn = block.insns.last + + insn.branch_targets.each do |branch_target| + block.outgoing_blocks << blocks.fetch(labels[branch_target]) + end + + if (insn.branch_targets.empty? && !insn.leaves?) || + insn.falls_through? + fall_through_start = block_start + block.insns.sum(&:length) + block.outgoing_blocks << blocks.fetch(fall_through_start) + end + + block.outgoing_blocks.each do |outgoing_block| + outgoing_block.incoming_blocks << block + end + end + end + + # If there are blocks that are unreachable, we can remove them from the + # graph entirely at this point. + def prune_basic_blocks(blocks) + visited = Set.new + queue = [blocks.fetch(0)] + + until queue.empty? + current_block = queue.shift + next if visited.include?(current_block) + + visited << current_block + queue.concat(current_block.outgoing_blocks) + end + + blocks.select! { |_, block| visited.include?(block) } + end + end + + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks + + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks + end + + def disasm + fmt = Disassembler.new(iseq) + fmt.puts("== cfg: #{iseq.inspect}") + + blocks.each do |block| + fmt.puts(block.id) + fmt.with_prefix(" ") do |prefix| + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id) + fmt.puts("#{prefix}== from: #{from.join(", ")}") + end + + fmt.format_insns!(block.insns, block.block_start) + + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.puts("#{prefix}== to: #{to.join(", ")}") + end + end + + fmt.string + end + + def to_dfg + DataFlowGraph.compile(self) + end + + def to_son + to_dfg.to_son + end + + def to_mermaid + Mermaid.flowchart do |flowchart| + disasm = Disassembler::Squished.new + + blocks.each do |block| + flowchart.subgraph(block.id) do + previous = nil + + block.each_with_length do |insn, length| + node = + flowchart.node( + "node_#{length}", + "%04d %s" % [length, insn.disasm(disasm)] + ) + + flowchart.link(previous, node) if previous + previous = node + end + end + end + + blocks.each do |block| + block.outgoing_blocks.each do |outgoing| + offset = + block.block_start + block.insns.sum(&:length) - + block.insns.last.length + + from = flowchart.fetch("node_#{offset}") + to = flowchart.fetch("node_#{outgoing.block_start}") + flowchart.link(from, to) + end + end + end + end + + # This method is used to verify that the control flow graph is well + # formed. It does this by checking that each basic block is itself well + # formed. + def verify + blocks.each(&:verify) + end + + def self.compile(iseq) + Compiler.new(iseq).compile + end + end + end +end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb new file mode 100644 index 00000000..aedee9ba --- /dev/null +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -0,0 +1,338 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Constructs a data-flow-graph of a YARV instruction sequence, via a + # control-flow-graph. Data flow is discovered locally and then globally. The + # graph only considers data flow through the stack - local variables and + # objects are considered fully escaped in this analysis. + # + # You can use this class by calling the ::compile method and passing it a + # control flow graph. It will return a data flow graph object. + # + # iseq = RubyVM::InstructionSequence.compile("1 + 2") + # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + # dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + # + class DataFlowGraph + # This object represents the flow of data between instructions. + class DataFlow + attr_reader :in + attr_reader :out + + def initialize + @in = [] + @out = [] + end + end + + # This represents an object that goes on the stack that is passed between + # basic blocks. + class BlockArgument + attr_reader :name + + def initialize(name) + @name = name + end + + def local? + false + end + + def to_str + name.to_s + end + end + + # This represents an object that goes on the stack that is passed between + # instructions within a basic block. + class LocalArgument + attr_reader :name, :length + + def initialize(length) + @length = length + end + + def local? + true + end + + def to_str + length.to_s + end + end + + attr_reader :cfg, :insn_flows, :block_flows + + def initialize(cfg, insn_flows, block_flows) + @cfg = cfg + @insn_flows = insn_flows + @block_flows = block_flows + end + + def blocks + cfg.blocks + end + + def disasm + fmt = Disassembler.new(cfg.iseq) + fmt.puts("== dfg: #{cfg.iseq.inspect}") + + blocks.each do |block| + fmt.puts(block.id) + fmt.with_prefix(" ") do |prefix| + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id) + fmt.puts("#{prefix}== from: #{from.join(", ")}") + end + + block_flow = block_flows.fetch(block.id) + unless block_flow.in.empty? + fmt.puts("#{prefix}== in: #{block_flow.in.join(", ")}") + end + + fmt.format_insns!(block.insns, block.block_start) do |_, length| + insn_flow = insn_flows[length] + next if insn_flow.in.empty? && insn_flow.out.empty? + + fmt.print(" # ") + unless insn_flow.in.empty? + fmt.print("in: #{insn_flow.in.join(", ")}") + fmt.print("; ") unless insn_flow.out.empty? + end + + unless insn_flow.out.empty? + fmt.print("out: #{insn_flow.out.join(", ")}") + end + end + + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.puts("#{prefix}== to: #{to.join(", ")}") + + unless block_flow.out.empty? + fmt.puts("#{prefix}== out: #{block_flow.out.join(", ")}") + end + end + end + + fmt.string + end + + def to_son + SeaOfNodes.compile(self) + end + + def to_mermaid + Mermaid.flowchart do |flowchart| + disasm = Disassembler::Squished.new + + blocks.each do |block| + block_flow = block_flows.fetch(block.id) + graph_name = + if block_flow.in.any? + "#{block.id} #{block_flows[block.id].in.join(", ")}" + else + block.id + end + + flowchart.subgraph(graph_name) do + previous = nil + + block.each_with_length do |insn, length| + node = + flowchart.node( + "node_#{length}", + "%04d %s" % [length, insn.disasm(disasm)], + shape: :rounded + ) + + flowchart.link(previous, node, color: :red) if previous + insn_flows[length].in.each do |input| + if input.is_a?(LocalArgument) + from = flowchart.fetch("node_#{input.length}") + flowchart.link(from, node, color: :green) + end + end + + previous = node + end + end + end + + blocks.each do |block| + block.outgoing_blocks.each do |outgoing| + offset = + block.block_start + block.insns.sum(&:length) - + block.insns.last.length + + from = flowchart.fetch("node_#{offset}") + to = flowchart.fetch("node_#{outgoing.block_start}") + flowchart.link(from, to, color: :red) + end + end + end + end + + # Verify that we constructed the data flow graph correctly. + def verify + # Check that the first block has no arguments. + raise unless block_flows.fetch(blocks.first.id).in.empty? + + # Check all control flow edges between blocks pass the right number of + # arguments. + blocks.each do |block| + block_flow = block_flows.fetch(block.id) + + if block.outgoing_blocks.empty? + # With no outgoing blocks, there should be no output arguments. + raise unless block_flow.out.empty? + else + # Check with outgoing blocks... + block.outgoing_blocks.each do |outgoing_block| + outgoing_flow = block_flows.fetch(outgoing_block.id) + + # The block should have as many output arguments as the + # outgoing block has input arguments. + raise unless block_flow.out.size == outgoing_flow.in.size + end + end + end + end + + def self.compile(cfg) + Compiler.new(cfg).compile + end + + # This class is responsible for creating a data flow graph from the given + # control flow graph. + class Compiler + # This is the control flow graph that is being compiled. + attr_reader :cfg + + # This data structure will hold the data flow between instructions + # within individual basic blocks. + attr_reader :insn_flows + + # This data structure will hold the data flow between basic blocks. + attr_reader :block_flows + + def initialize(cfg) + @cfg = cfg + @insn_flows = cfg.insns.to_h { |length, _| [length, DataFlow.new] } + @block_flows = cfg.blocks.to_h { |block| [block.id, DataFlow.new] } + end + + def compile + find_internal_flow + find_external_flow + DataFlowGraph.new(cfg, insn_flows, block_flows).tap(&:verify) + end + + private + + # Find the data flow within each basic block. Using an abstract stack, + # connect from consumers of data to the producers of that data. + def find_internal_flow + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.id) + stack = [] + + # Go through each instruction in the block. + block.each_with_length do |insn, length| + insn_flow = insn_flows[length] + + # How many values will be missing from the local stack to run this + # instruction? This will be used to determine if the values that + # are being used by this instruction are coming from previous + # instructions or from previous basic blocks. + missing = insn.pops - stack.size + + # For every value the instruction pops off the stack. + insn.pops.times do + # Was the value it pops off from another basic block? + if stack.empty? + # If the stack is empty, then there aren't enough values being + # pushed from previous instructions to fulfill the needs of + # this instruction. In that case the values must be coming + # from previous basic blocks. + missing -= 1 + argument = BlockArgument.new(:"in_#{missing}") + + insn_flow.in.unshift(argument) + block_flow.in.unshift(argument) + else + # Since there are values in the stack, we can connect this + # consumer to the producer of the value. + insn_flow.in.unshift(stack.pop) + end + end + + # Record on our abstract stack that this instruction pushed + # this value onto the stack. + insn.pushes.times { stack << LocalArgument.new(length) } + end + + # Values that are left on the stack after going through all + # instructions are arguments to the basic block that we jump to. + stack.reverse_each.with_index do |producer, index| + block_flow.out << producer + + argument = BlockArgument.new(:"out_#{index}") + insn_flows[producer.length].out << argument + end + end + + # Go backwards and connect from producers to consumers. + cfg.insns.each_key do |length| + # For every instruction that produced a value used in this + # instruction... + insn_flows[length].in.each do |producer| + # If it's actually another instruction and not a basic block + # argument... + if producer.is_a?(LocalArgument) + # Record in the producing instruction that it produces a value + # used by this construction. + insn_flows[producer.length].out << LocalArgument.new(length) + end + end + end + end + + # Find the data that flows between basic blocks. + def find_external_flow + stack = [*cfg.blocks] + + until stack.empty? + block = stack.pop + block_flow = block_flows.fetch(block.id) + + block.incoming_blocks.each do |incoming_block| + incoming_flow = block_flows.fetch(incoming_block.id) + + # Does a predecessor block have fewer outputs than the successor + # has inputs? + if incoming_flow.out.size < block_flow.in.size + # If so then add arguments to pass data through from the + # incoming block's incoming blocks. + (block_flow.in.size - incoming_flow.out.size).times do |index| + name = BlockArgument.new(:"pass_#{index}") + + incoming_flow.in.unshift(name) + incoming_flow.out.unshift(name) + end + + # Since we modified the incoming block, add it back to the stack + # so it'll be considered as an outgoing block again, and + # propogate the external data flow back up the control flow + # graph. + stack << incoming_block + end + end + end + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb index 753ba80a..4ea99e3a 100644 --- a/lib/syntax_tree/yarv/decompiler.rb +++ b/lib/syntax_tree/yarv/decompiler.rb @@ -151,7 +151,7 @@ def decompile(iseq) elsif argc == 1 && method.end_with?("=") receiver, argument = clause.pop(2) clause << Assign( - CallNode(receiver, Period("."), Ident(method[0..-2]), nil), + Field(receiver, Period("."), Ident(method[0..-2])), argument ) else diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index d303bcb7..dac220fd 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -3,16 +3,52 @@ module SyntaxTree module YARV class Disassembler + # This class is another object that handles disassembling a YARV + # instruction sequence but it renders it without any of the extra spacing + # or alignment. + class Squished + def calldata(value) + value.inspect + end + + def enqueue(iseq) + end + + def event(name) + end + + def inline_storage(cache) + "" + end + + def instruction(name, operands = []) + operands.empty? ? name : "#{name} #{operands.join(", ")}" + end + + def label(value) + "%04d" % value.name["label_".length..] + end + + def local(index, **) + index.inspect + end + + def object(value) + value.inspect + end + end + attr_reader :output, :queue + attr_reader :current_prefix attr_accessor :current_iseq - def initialize + def initialize(current_iseq = nil) @output = StringIO.new @queue = [] @current_prefix = "" - @current_iseq = nil + @current_iseq = current_iseq end ######################################################################## @@ -20,30 +56,7 @@ def initialize ######################################################################## def calldata(value) - flag_names = [] - flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) - if value.flag?(CallData::CALL_ARGS_BLOCKARG) - flag_names << :ARGS_BLOCKARG - end - flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) - flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) - flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) - flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) - flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) - flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) - flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) - flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) - flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) - - parts = [] - parts << "mid:#{value.method}" if value.method - parts << "argc:#{value.argc}" - parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg - parts << flag_names.join("|") if flag_names.any? - - "" + value.inspect end def enqueue(iseq) @@ -97,7 +110,7 @@ def object(value) end ######################################################################## - # Main entrypoint + # Entrypoints ######################################################################## def format! @@ -105,63 +118,13 @@ def format! output << "\n" if output.pos > 0 format_iseq(@current_iseq) end - - output.string end - private - - def format_iseq(iseq) - output << "#{current_prefix}== disasm: " - output << "#:1 " - - location = Location.fixed(line: iseq.line, char: 0, column: 0) - output << "(#{location.start_line},#{location.start_column})-" - output << "(#{location.end_line},#{location.end_column})" - output << "> " - - if iseq.catch_table.any? - output << "(catch: TRUE)\n" - output << "#{current_prefix}== catch table\n" - - with_prefix("#{current_prefix}| ") do - iseq.catch_table.each do |entry| - case entry - when InstructionSequence::CatchBreak - output << "#{current_prefix}catch type: break\n" - format_iseq(entry.iseq) - when InstructionSequence::CatchNext - output << "#{current_prefix}catch type: next\n" - when InstructionSequence::CatchRedo - output << "#{current_prefix}catch type: redo\n" - when InstructionSequence::CatchRescue - output << "#{current_prefix}catch type: rescue\n" - format_iseq(entry.iseq) - end - end - end - - output << "#{current_prefix}|#{"-" * 72}\n" - else - output << "(catch: FALSE)\n" - end - - if (local_table = iseq.local_table) && !local_table.empty? - output << "#{current_prefix}local table (size: #{local_table.size})\n" - - locals = - local_table.locals.each_with_index.map do |local, index| - "[%2d] %s@%d" % [local_table.offset(index), local.name, index] - end - - output << "#{current_prefix}#{locals.join(" ")}\n" - end - - length = 0 + def format_insns!(insns, length = 0) events = [] lines = [] - iseq.insns.each do |insn| + insns.each do |insn| case insn when Integer lines << insn @@ -191,22 +154,83 @@ def format_iseq(iseq) events.clear end + # A hook here to allow for custom formatting of instructions after + # the main body has been processed. + yield insn, length if block_given? + output << "\n" length += insn.length end end end + def print(string) + output.print(string) + end + + def puts(string) + output.puts(string) + end + + def string + output.string + end + def with_prefix(value) previous = @current_prefix begin @current_prefix = value - yield + yield value ensure @current_prefix = previous end end + + private + + def format_iseq(iseq) + output << "#{current_prefix}== disasm: #{iseq.inspect} " + + if iseq.catch_table.any? + output << "(catch: TRUE)\n" + output << "#{current_prefix}== catch table\n" + + with_prefix("#{current_prefix}| ") do + iseq.catch_table.each do |entry| + case entry + when InstructionSequence::CatchBreak + output << "#{current_prefix}catch type: break\n" + format_iseq(entry.iseq) + when InstructionSequence::CatchNext + output << "#{current_prefix}catch type: next\n" + when InstructionSequence::CatchRedo + output << "#{current_prefix}catch type: redo\n" + when InstructionSequence::CatchRescue + output << "#{current_prefix}catch type: rescue\n" + format_iseq(entry.iseq) + end + end + end + + output << "#{current_prefix}|#{"-" * 72}\n" + else + output << "(catch: FALSE)\n" + end + + if (local_table = iseq.local_table) && !local_table.empty? + output << "#{current_prefix}local table (size: #{local_table.size})\n" + + locals = + local_table.locals.each_with_index.map do |local, index| + "[%2d] %s@%d" % [local_table.offset(index), local.name, index] + end + + output << "#{current_prefix}#{locals.join(" ")}\n" + end + + format_insns!(iseq.insns) + end end end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 6aa7279e..45b543e6 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -7,6 +7,28 @@ module YARV # list of instructions along with the metadata pertaining to them. It also # functions as a builder for the instruction sequence. class InstructionSequence + # This provides a handle to the rb_iseq_load function, which allows you + # to pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + def self.iseq_load(iseq) + require "fiddle" + + @iseq_load_function ||= + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + + Fiddle.dlunwrap(@iseq_load_function.call(Fiddle.dlwrap(iseq), 0, nil)) + rescue LoadError + raise "Could not load the Fiddle library" + rescue NameError + raise "Unable to find rb_iseq_load" + rescue Fiddle::DLError + raise "Unable to perform a dynamic load" + end + # When the list of instructions is first being created, it's stored as a # linked list. This is to make it easier to perform peephole optimizations # and other transformations like instruction specialization. @@ -60,19 +82,6 @@ def push(instruction) MAGIC = "YARVInstructionSequence/SimpleDataFormat" - # This provides a handle to the rb_iseq_load function, which allows you to - # pass a serialized iseq to Ruby and have it return a - # RubyVM::InstructionSequence object. - ISEQ_LOAD = - begin - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - rescue NameError, Fiddle::DLError - end - # This object is used to track the size of the stack at any given time. It # is effectively a mini symbolic interpreter. It's necessary because when # instruction sequences get serialized they include a :stack_max field on @@ -221,8 +230,7 @@ def length end def eval - raise "Unsupported platform" if ISEQ_LOAD.nil? - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval + InstructionSequence.iseq_load(to_a).eval end def to_a @@ -269,10 +277,27 @@ def to_a ] end + def to_cfg + ControlFlowGraph.compile(self) + end + + def to_dfg + to_cfg.to_dfg + end + + def to_son + to_dfg.to_son + end + def disasm - disassembler = Disassembler.new - disassembler.enqueue(self) - disassembler.format! + fmt = Disassembler.new + fmt.enqueue(self) + fmt.format! + fmt.string + end + + def inspect + "#:1 (#{line},0)-(#{line},0)>" end # This method converts our linked list of instructions into a final array diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index bba06f8d..38c80fde 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -2,65 +2,55 @@ module SyntaxTree module YARV - # This is an operand to various YARV instructions that represents the - # information about a specific call site. - class CallData - CALL_ARGS_SPLAT = 1 << 0 - CALL_ARGS_BLOCKARG = 1 << 1 - CALL_FCALL = 1 << 2 - CALL_VCALL = 1 << 3 - CALL_ARGS_SIMPLE = 1 << 4 - CALL_BLOCKISEQ = 1 << 5 - CALL_KWARG = 1 << 6 - CALL_KW_SPLAT = 1 << 7 - CALL_TAILCALL = 1 << 8 - CALL_SUPER = 1 << 9 - CALL_ZSUPER = 1 << 10 - CALL_OPT_SEND = 1 << 11 - CALL_KW_SPLAT_MUT = 1 << 12 - - attr_reader :method, :argc, :flags, :kw_arg - - def initialize( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - @method = method - @argc = argc - @flags = flags - @kw_arg = kw_arg + # This is a base class for all YARV instructions. It provides a few + # convenience methods for working with instructions. + class Instruction + # This method creates an instruction that represents the canonical + # (non-specialized) form of this instruction. If this instruction is not + # a specialized instruction, then this method returns `self`. + def canonical + self end - def flag?(mask) - (flags & mask) > 0 + # This returns the size of the instruction in terms of the number of slots + # it occupies in the instruction sequence. Effectively this is 1 plus the + # number of operands. + def length + 1 end - def to_h - result = { mid: method, flag: flags, orig_argc: argc } - result[:kw_arg] = kw_arg if kw_arg - result + # This returns the number of values that are pushed onto the stack. + def pushes + 0 end - def self.from(serialized) - new( - serialized[:mid], - serialized[:orig_argc], - serialized[:flag], - serialized[:kw_arg] - ) + # This returns the number of values that are popped off the stack. + def pops + 0 + end + + # This returns an array of labels. + def branch_targets + [] + end + + # Whether or not this instruction leaves the current frame. + def leaves? + false end - end - # A convenience method for creating a CallData object. - def self.calldata( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - CallData.new(method, argc, flags, kw_arg) + # Whether or not this instruction falls through to the next instruction if + # its branching fails. + def falls_through? + false + end + + # Does the instruction have side effects? Control-flow counts as a + # side-effect, as do some special-case instructions like Leave. By default + # every instruction is marked as having side effects. + def side_effects? + true + end end # ### Summary @@ -76,7 +66,7 @@ def self.calldata( # x[0] # ~~~ # - class AdjustStack + class AdjustStack < Instruction attr_reader :number def initialize(number) @@ -107,14 +97,6 @@ def pops number end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.pop(number) end @@ -138,7 +120,7 @@ def call(vm) # "#{5}" # ~~~ # - class AnyToString + class AnyToString < Instruction def disasm(fmt) fmt.instruction("anytostring") end @@ -155,10 +137,6 @@ def ==(other) other.is_a?(AnyToString) end - def length - 1 - end - def pops 2 end @@ -167,10 +145,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) original, value = vm.pop(2) @@ -198,7 +172,7 @@ def call(vm) # puts x # ~~~ # - class BranchIf + class BranchIf < Instruction attr_reader :label def initialize(label) @@ -229,16 +203,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop end - def canonical - self + def branch_targets + [label] end - def call(vm) - vm.jump(label) if vm.pop + def falls_through? + true end end @@ -259,7 +233,7 @@ def call(vm) # end # ~~~ # - class BranchNil + class BranchNil < Instruction attr_reader :label def initialize(label) @@ -290,16 +264,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop.nil? end - def canonical - self + def branch_targets + [label] end - def call(vm) - vm.jump(label) if vm.pop.nil? + def falls_through? + true end end @@ -319,7 +293,7 @@ def call(vm) # end # ~~~ # - class BranchUnless + class BranchUnless < Instruction attr_reader :label def initialize(label) @@ -350,16 +324,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) unless vm.pop end - def canonical - self + def branch_targets + [label] end - def call(vm) - vm.jump(label) unless vm.pop + def falls_through? + true end end @@ -382,7 +356,7 @@ def call(vm) # evaluate(value: 3) # ~~~ # - class CheckKeyword + class CheckKeyword < Instruction attr_reader :keyword_bits_index, :keyword_index def initialize(keyword_bits_index, keyword_index) @@ -419,18 +393,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(keyword_bits_index, 0)[keyword_index]) end @@ -448,7 +414,7 @@ def call(vm) # foo in Foo # ~~~ # - class CheckMatch + class CheckMatch < Instruction VM_CHECKMATCH_TYPE_WHEN = 1 VM_CHECKMATCH_TYPE_CASE = 2 VM_CHECKMATCH_TYPE_RESCUE = 3 @@ -489,10 +455,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) target, pattern = vm.pop(2) @@ -536,7 +498,7 @@ def check?(pattern, target) # foo in [bar] # ~~~ # - class CheckType + class CheckType < Instruction TYPE_OBJECT = 0x01 TYPE_CLASS = 0x02 TYPE_MODULE = 0x03 @@ -643,10 +605,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) object = vm.pop result = @@ -713,7 +671,7 @@ def call(vm) # [1, *2] # ~~~ # - class ConcatArray + class ConcatArray < Instruction def disasm(fmt) fmt.instruction("concatarray") end @@ -730,10 +688,6 @@ def ==(other) other.is_a?(ConcatArray) end - def length - 1 - end - def pops 2 end @@ -742,10 +696,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push([*left, *right]) @@ -767,7 +717,7 @@ def call(vm) # "#{5}" # ~~~ # - class ConcatStrings + class ConcatStrings < Instruction attr_reader :number def initialize(number) @@ -802,10 +752,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).join) end @@ -826,7 +772,7 @@ def call(vm) # end # ~~~ # - class DefineClass + class DefineClass < Instruction TYPE_CLASS = 0 TYPE_SINGLETON_CLASS = 1 TYPE_MODULE = 2 @@ -874,10 +820,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object, superclass = vm.pop(2) @@ -914,7 +856,7 @@ def call(vm) # defined?(x) # ~~~ # - class Defined + class Defined < Instruction TYPE_NIL = 1 TYPE_IVAR = 2 TYPE_LVAR = 3 @@ -1011,10 +953,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object = vm.pop @@ -1069,7 +1007,7 @@ def call(vm) # def value = "value" # ~~~ # - class DefineMethod + class DefineMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -1102,18 +1040,6 @@ def length 3 end - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) name = method_name nesting = vm.frame.nesting @@ -1150,7 +1076,7 @@ def call(vm) # def self.value = "value" # ~~~ # - class DefineSMethod + class DefineSMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -1187,14 +1113,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) name = method_name nesting = vm.frame.nesting @@ -1227,7 +1145,7 @@ def call(vm) # $global = 5 # ~~~ # - class Dup + class Dup < Instruction def disasm(fmt) fmt.instruction("dup") end @@ -1244,10 +1162,6 @@ def ==(other) other.is_a?(Dup) end - def length - 1 - end - def pops 1 end @@ -1256,13 +1170,13 @@ def pushes 2 end - def canonical - self - end - def call(vm) vm.push(vm.stack.last.dup) end + + def side_effects? + false + end end # ### Summary @@ -1275,7 +1189,7 @@ def call(vm) # [true] # ~~~ # - class DupArray + class DupArray < Instruction attr_reader :object def initialize(object) @@ -1302,18 +1216,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -1329,7 +1235,7 @@ def call(vm) # { a: 1 } # ~~~ # - class DupHash + class DupHash < Instruction attr_reader :object def initialize(object) @@ -1356,18 +1262,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -1383,7 +1281,7 @@ def call(vm) # Object::X ||= true # ~~~ # - class DupN + class DupN < Instruction attr_reader :number def initialize(number) @@ -1410,18 +1308,10 @@ def length 2 end - def pops - 0 - end - def pushes number end - def canonical - self - end - def call(vm) values = vm.pop(number) vm.push(*values) @@ -1441,7 +1331,7 @@ def call(vm) # x, = [true, false, nil] # ~~~ # - class ExpandArray + class ExpandArray < Instruction attr_reader :number, :flags def initialize(number, flags) @@ -1478,10 +1368,6 @@ def pushes number end - def canonical - self - end - def call(vm) object = vm.pop object = @@ -1539,7 +1425,7 @@ def call(vm) # end # ~~~ # - class GetBlockParam + class GetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -1570,18 +1456,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1602,7 +1480,7 @@ def call(vm) # end # ~~~ # - class GetBlockParamProxy + class GetBlockParamProxy < Instruction attr_reader :index, :level def initialize(index, level) @@ -1636,18 +1514,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1665,7 +1535,7 @@ def call(vm) # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1697,18 +1567,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) @@ -1728,7 +1590,7 @@ def call(vm) # Constant # ~~~ # - class GetConstant + class GetConstant < Instruction attr_reader :name def initialize(name) @@ -1763,10 +1625,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) const_base, allow_nil = vm.pop(2) @@ -1798,7 +1656,7 @@ def call(vm) # $$ # ~~~ # - class GetGlobal + class GetGlobal < Instruction attr_reader :name def initialize(name) @@ -1825,18 +1683,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) # Evaluating the name of the global variable because there isn't a # reflection API for global variables. @@ -1861,7 +1711,7 @@ def call(vm) # @instance_variable # ~~~ # - class GetInstanceVariable + class GetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1893,18 +1743,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) method = Object.instance_method(:instance_variable_get) vm.push(method.bind(vm.frame._self).call(name)) @@ -1925,7 +1767,7 @@ def call(vm) # tap { tap { value } } # ~~~ # - class GetLocal + class GetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -1955,18 +1797,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1985,7 +1819,7 @@ def call(vm) # value # ~~~ # - class GetLocalWC0 + class GetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -2012,10 +1846,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -2042,7 +1872,7 @@ def call(vm) # self.then { value } # ~~~ # - class GetLocalWC1 + class GetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -2069,10 +1899,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -2096,7 +1922,7 @@ def call(vm) # 1 if (a == 1) .. (b == 2) # ~~~ # - class GetSpecial + class GetSpecial < Instruction SVAR_LASTLINE = 0 # $_ SVAR_BACKREF = 1 # $~ SVAR_FLIPFLOP_START = 2 # flipflop @@ -2128,18 +1954,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) case key when SVAR_LASTLINE @@ -2163,7 +1981,7 @@ def call(vm) # :"#{"foo"}" # ~~~ # - class Intern + class Intern < Instruction def disasm(fmt) fmt.instruction("intern") end @@ -2180,10 +1998,6 @@ def ==(other) other.is_a?(Intern) end - def length - 1 - end - def pops 1 end @@ -2192,10 +2006,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_sym) end @@ -2215,7 +2025,7 @@ def call(vm) # end # ~~~ # - class InvokeBlock + class InvokeBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -2250,10 +2060,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) end @@ -2273,7 +2079,7 @@ def call(vm) # end # ~~~ # - class InvokeSuper + class InvokeSuper < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -2302,10 +2108,6 @@ def ==(other) other.block_iseq == block_iseq end - def length - 1 - end - def pops argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) argb + calldata.argc + 1 @@ -2315,10 +2117,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -2358,7 +2156,7 @@ def call(vm) # end # ~~~ # - class Jump + class Jump < Instruction attr_reader :label def initialize(label) @@ -2385,21 +2183,13 @@ def length 2 end - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.jump(label) end + + def branch_targets + [label] + end end # ### Summary @@ -2412,7 +2202,7 @@ def call(vm) # ;; # ~~~ # - class Leave + class Leave < Instruction def disasm(fmt) fmt.instruction("leave") end @@ -2429,10 +2219,6 @@ def ==(other) other.is_a?(Leave) end - def length - 1 - end - def pops 1 end @@ -2443,13 +2229,13 @@ def pushes 0 end - def canonical - self - end - def call(vm) vm.leave end + + def leaves? + true + end end # ### Summary @@ -2464,7 +2250,7 @@ def call(vm) # ["string"] # ~~~ # - class NewArray + class NewArray < Instruction attr_reader :number def initialize(number) @@ -2499,10 +2285,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2520,7 +2302,7 @@ def call(vm) # ["string", **{ foo: "bar" }] # ~~~ # - class NewArrayKwSplat + class NewArrayKwSplat < Instruction attr_reader :number def initialize(number) @@ -2555,10 +2337,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2578,7 +2356,7 @@ def call(vm) # end # ~~~ # - class NewHash + class NewHash < Instruction attr_reader :number def initialize(number) @@ -2613,10 +2391,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).each_slice(2).to_h) end @@ -2637,7 +2411,7 @@ def call(vm) # p (x..y), (x...y) # ~~~ # - class NewRange + class NewRange < Instruction attr_reader :exclude_end def initialize(exclude_end) @@ -2672,10 +2446,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Range.new(*vm.pop(2), exclude_end == 1)) end @@ -2692,7 +2462,7 @@ def call(vm) # raise rescue true # ~~~ # - class Nop + class Nop < Instruction def disasm(fmt) fmt.instruction("nop") end @@ -2705,27 +2475,15 @@ def deconstruct_keys(_keys) {} end - def ==(other) - other.is_a?(Nop) - end - - def length - 1 - end - - def pops - 0 - end - - def pushes - 0 + def ==(other) + other.is_a?(Nop) end - def canonical - self + def call(vm) end - def call(vm) + def side_effects? + false end end @@ -2743,7 +2501,7 @@ def call(vm) # "#{5}" # ~~~ # - class ObjToString + class ObjToString < Instruction attr_reader :calldata def initialize(calldata) @@ -2778,10 +2536,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_s) end @@ -2800,7 +2554,7 @@ def call(vm) # END { puts "END" } # ~~~ # - class Once + class Once < Instruction attr_reader :iseq, :cache def initialize(iseq, cache) @@ -2829,18 +2583,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) return if @executed vm.push(vm.run_block_frame(iseq, vm.frame)) @@ -2861,7 +2607,7 @@ def call(vm) # 2 & 3 # ~~~ # - class OptAnd + class OptAnd < Instruction attr_reader :calldata def initialize(calldata) @@ -2917,7 +2663,7 @@ def call(vm) # 7[2] # ~~~ # - class OptAref + class OptAref < Instruction attr_reader :calldata def initialize(calldata) @@ -2974,7 +2720,7 @@ def call(vm) # { 'test' => true }['test'] # ~~~ # - class OptArefWith + class OptArefWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -3014,10 +2760,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop[object]) end @@ -3036,7 +2778,7 @@ def call(vm) # {}[:key] = value # ~~~ # - class OptAset + class OptAset < Instruction attr_reader :calldata def initialize(calldata) @@ -3092,7 +2834,7 @@ def call(vm) # {}["key"] = value # ~~~ # - class OptAsetWith + class OptAsetWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -3132,10 +2874,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) hash, value = vm.pop(2) vm.push(hash[object] = value) @@ -3165,7 +2903,7 @@ def call(vm) # end # ~~~ # - class OptCaseDispatch + class OptCaseDispatch < Instruction attr_reader :case_dispatch_hash, :else_label def initialize(case_dispatch_hash, else_label) @@ -3206,16 +2944,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) end - def canonical - self + def branch_targets + case_dispatch_hash.values.push(else_label) end - def call(vm) - vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) + def falls_through? + true end end @@ -3232,7 +2970,7 @@ def call(vm) # 2 / 3 # ~~~ # - class OptDiv + class OptDiv < Instruction attr_reader :calldata def initialize(calldata) @@ -3288,7 +3026,7 @@ def call(vm) # "".empty? # ~~~ # - class OptEmptyP + class OptEmptyP < Instruction attr_reader :calldata def initialize(calldata) @@ -3345,7 +3083,7 @@ def call(vm) # 2 == 2 # ~~~ # - class OptEq + class OptEq < Instruction attr_reader :calldata def initialize(calldata) @@ -3402,7 +3140,7 @@ def call(vm) # 4 >= 3 # ~~~ # - class OptGE + class OptGE < Instruction attr_reader :calldata def initialize(calldata) @@ -3458,7 +3196,7 @@ def call(vm) # ::Object # ~~~ # - class OptGetConstantPath + class OptGetConstantPath < Instruction attr_reader :names def initialize(names) @@ -3486,18 +3224,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) current = vm.frame._self current = current.class unless current.is_a?(Class) @@ -3523,7 +3253,7 @@ def call(vm) # 4 > 3 # ~~~ # - class OptGT + class OptGT < Instruction attr_reader :calldata def initialize(calldata) @@ -3580,7 +3310,7 @@ def call(vm) # 3 <= 4 # ~~~ # - class OptLE + class OptLE < Instruction attr_reader :calldata def initialize(calldata) @@ -3637,7 +3367,7 @@ def call(vm) # "".length # ~~~ # - class OptLength + class OptLength < Instruction attr_reader :calldata def initialize(calldata) @@ -3694,7 +3424,7 @@ def call(vm) # 3 < 4 # ~~~ # - class OptLT + class OptLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3751,7 +3481,7 @@ def call(vm) # "" << 2 # ~~~ # - class OptLTLT + class OptLTLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3809,7 +3539,7 @@ def call(vm) # 3 - 2 # ~~~ # - class OptMinus + class OptMinus < Instruction attr_reader :calldata def initialize(calldata) @@ -3866,7 +3596,7 @@ def call(vm) # 4 % 2 # ~~~ # - class OptMod + class OptMod < Instruction attr_reader :calldata def initialize(calldata) @@ -3923,7 +3653,7 @@ def call(vm) # 3 * 2 # ~~~ # - class OptMult + class OptMult < Instruction attr_reader :calldata def initialize(calldata) @@ -3982,7 +3712,7 @@ def call(vm) # 2 != 2 # ~~~ # - class OptNEq + class OptNEq < Instruction attr_reader :eq_calldata, :neq_calldata def initialize(eq_calldata, neq_calldata) @@ -4022,10 +3752,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) receiver, argument = vm.pop(2) vm.push(receiver != argument) @@ -4044,7 +3770,7 @@ def call(vm) # [a, b, c].max # ~~~ # - class OptNewArrayMax + class OptNewArrayMax < Instruction attr_reader :number def initialize(number) @@ -4079,10 +3805,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).max) end @@ -4100,7 +3822,7 @@ def call(vm) # [a, b, c].min # ~~~ # - class OptNewArrayMin + class OptNewArrayMin < Instruction attr_reader :number def initialize(number) @@ -4135,10 +3857,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).min) end @@ -4157,7 +3875,7 @@ def call(vm) # "".nil? # ~~~ # - class OptNilP + class OptNilP < Instruction attr_reader :calldata def initialize(calldata) @@ -4212,7 +3930,7 @@ def call(vm) # !true # ~~~ # - class OptNot + class OptNot < Instruction attr_reader :calldata def initialize(calldata) @@ -4269,7 +3987,7 @@ def call(vm) # 2 | 3 # ~~~ # - class OptOr + class OptOr < Instruction attr_reader :calldata def initialize(calldata) @@ -4326,7 +4044,7 @@ def call(vm) # 2 + 3 # ~~~ # - class OptPlus + class OptPlus < Instruction attr_reader :calldata def initialize(calldata) @@ -4382,7 +4100,7 @@ def call(vm) # /a/ =~ "a" # ~~~ # - class OptRegExpMatch2 + class OptRegExpMatch2 < Instruction attr_reader :calldata def initialize(calldata) @@ -4438,7 +4156,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class OptSendWithoutBlock + class OptSendWithoutBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -4495,7 +4213,7 @@ def call(vm) # "".size # ~~~ # - class OptSize + class OptSize < Instruction attr_reader :calldata def initialize(calldata) @@ -4551,7 +4269,7 @@ def call(vm) # "hello".freeze # ~~~ # - class OptStrFreeze + class OptStrFreeze < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -4583,18 +4301,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.freeze) end @@ -4612,7 +4322,7 @@ def call(vm) # -"string" # ~~~ # - class OptStrUMinus + class OptStrUMinus < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -4644,18 +4354,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(-object) end @@ -4674,7 +4376,7 @@ def call(vm) # "".succ # ~~~ # - class OptSucc + class OptSucc < Instruction attr_reader :calldata def initialize(calldata) @@ -4728,7 +4430,7 @@ def call(vm) # a ||= 2 # ~~~ # - class Pop + class Pop < Instruction def disasm(fmt) fmt.instruction("pop") end @@ -4745,25 +4447,17 @@ def ==(other) other.is_a?(Pop) end - def length - 1 - end - def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.pop end + + def side_effects? + false + end end # ### Summary @@ -4776,7 +4470,7 @@ def call(vm) # nil # ~~~ # - class PutNil + class PutNil < Instruction def disasm(fmt) fmt.instruction("putnil") end @@ -4793,14 +4487,6 @@ def ==(other) other.is_a?(PutNil) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4812,6 +4498,10 @@ def canonical def call(vm) canonical.call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4824,7 +4514,7 @@ def call(vm) # 5 # ~~~ # - class PutObject + class PutObject < Instruction attr_reader :object def initialize(object) @@ -4851,21 +4541,17 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object) end + + def side_effects? + false + end end # ### Summary @@ -4880,7 +4566,7 @@ def call(vm) # 0 # ~~~ # - class PutObjectInt2Fix0 + class PutObjectInt2Fix0 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_0_") end @@ -4897,14 +4583,6 @@ def ==(other) other.is_a?(PutObjectInt2Fix0) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4916,6 +4594,10 @@ def canonical def call(vm) canonical.call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4930,7 +4612,7 @@ def call(vm) # 1 # ~~~ # - class PutObjectInt2Fix1 + class PutObjectInt2Fix1 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_1_") end @@ -4947,14 +4629,6 @@ def ==(other) other.is_a?(PutObjectInt2Fix1) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4966,6 +4640,10 @@ def canonical def call(vm) canonical.call(vm) end + + def side_effects? + false + end end # ### Summary @@ -4978,7 +4656,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class PutSelf + class PutSelf < Instruction def disasm(fmt) fmt.instruction("putself") end @@ -4995,25 +4673,17 @@ def ==(other) other.is_a?(PutSelf) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame._self) end + + def side_effects? + false + end end # ### Summary @@ -5028,7 +4698,7 @@ def call(vm) # alias foo bar # ~~~ # - class PutSpecialObject + class PutSpecialObject < Instruction OBJECT_VMCORE = 1 OBJECT_CBASE = 2 OBJECT_CONST_BASE = 3 @@ -5059,18 +4729,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) case object when OBJECT_VMCORE @@ -5095,7 +4757,7 @@ def call(vm) # "foo" # ~~~ # - class PutString + class PutString < Instruction attr_reader :object def initialize(object) @@ -5122,18 +4784,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -5152,7 +4806,7 @@ def call(vm) # "hello".tap { |i| p i } # ~~~ # - class Send + class Send < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -5194,10 +4848,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -5240,7 +4890,7 @@ def call(vm) # end # ~~~ # - class SetBlockParam + class SetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -5275,14 +4925,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.local_set(index, level, vm.pop) end @@ -5301,7 +4943,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -5337,14 +4979,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) @@ -5363,7 +4997,7 @@ def call(vm) # Constant = 1 # ~~~ # - class SetConstant + class SetConstant < Instruction attr_reader :name def initialize(name) @@ -5394,14 +5028,6 @@ def pops 2 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) value, parent = vm.pop(2) parent.const_set(name, value) @@ -5419,7 +5045,7 @@ def call(vm) # $global = 5 # ~~~ # - class SetGlobal + class SetGlobal < Instruction attr_reader :name def initialize(name) @@ -5450,14 +5076,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) # Evaluating the name of the global variable because there isn't a # reflection API for global variables. @@ -5481,7 +5099,7 @@ def call(vm) # @instance_variable = 1 # ~~~ # - class SetInstanceVariable + class SetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -5517,14 +5135,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) method = Object.instance_method(:instance_variable_set) method.bind(vm.frame._self).call(name, vm.pop) @@ -5545,7 +5155,7 @@ def call(vm) # tap { tap { value = 10 } } # ~~~ # - class SetLocal + class SetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -5579,14 +5189,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.local_set(index, level, vm.pop) end @@ -5605,7 +5207,7 @@ def call(vm) # value = 5 # ~~~ # - class SetLocalWC0 + class SetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -5636,10 +5238,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 0) end @@ -5662,7 +5260,7 @@ def call(vm) # self.then { value = 10 } # ~~~ # - class SetLocalWC1 + class SetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -5693,10 +5291,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 1) end @@ -5717,7 +5311,7 @@ def call(vm) # {}[:key] = 'val' # ~~~ # - class SetN + class SetN < Instruction attr_reader :number def initialize(number) @@ -5752,10 +5346,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.stack[-number - 1] = vm.stack.last end @@ -5773,7 +5363,7 @@ def call(vm) # baz if (foo == 1) .. (bar == 1) # ~~~ # - class SetSpecial + class SetSpecial < Instruction attr_reader :key def initialize(key) @@ -5804,14 +5394,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) case key when GetSpecial::SVAR_LASTLINE @@ -5836,7 +5418,7 @@ def call(vm) # x = *(5) # ~~~ # - class SplatArray + class SplatArray < Instruction attr_reader :flag def initialize(flag) @@ -5871,10 +5453,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) value = vm.pop @@ -5914,7 +5492,7 @@ def call(vm) # !!defined?([[]]) # ~~~ # - class Swap + class Swap < Instruction def disasm(fmt) fmt.instruction("swap") end @@ -5931,10 +5509,6 @@ def ==(other) other.is_a?(Swap) end - def length - 1 - end - def pops 2 end @@ -5943,10 +5517,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push(right, left) @@ -5965,7 +5535,7 @@ def call(vm) # [1, 2, 3].map { break 2 } # ~~~ # - class Throw + class Throw < Instruction RUBY_TAG_NONE = 0x0 RUBY_TAG_RETURN = 0x1 RUBY_TAG_BREAK = 0x2 @@ -6013,10 +5583,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) state = type & VM_THROW_STATE_MASK value = vm.pop @@ -6072,7 +5638,7 @@ def error_backtrace(vm) # end # ~~~ # - class TopN + class TopN < Instruction attr_reader :number def initialize(number) @@ -6099,18 +5665,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.stack[-number - 1]) end @@ -6127,7 +5685,7 @@ def call(vm) # /foo #{bar}/ # ~~~ # - class ToRegExp + class ToRegExp < Instruction attr_reader :options, :length def initialize(options, length) @@ -6160,10 +5718,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Regexp.new(vm.pop(length).join, options)) end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index ab9b00df..e20729d9 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -19,7 +19,7 @@ module Legacy # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name def initialize(name) @@ -46,10 +46,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -79,7 +75,7 @@ def call(vm) # Constant # ~~~ # - class OptGetInlineCache + class OptGetInlineCache < Instruction attr_reader :label, :cache def initialize(label, cache) @@ -111,21 +107,21 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(nil) end + + def branch_targets + [label] + end + + def falls_through? + true + end end # ### Summary @@ -143,7 +139,7 @@ def call(vm) # Constant # ~~~ # - class OptSetInlineCache + class OptSetInlineCache < Instruction attr_reader :cache def initialize(cache) @@ -178,10 +174,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) end end @@ -200,7 +192,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name def initialize(name) @@ -231,10 +223,6 @@ def pops 1 end - def pushes - 0 - end - def canonical YARV::SetClassVariable.new(name, nil) end diff --git a/lib/syntax_tree/yarv/sea_of_nodes.rb b/lib/syntax_tree/yarv/sea_of_nodes.rb new file mode 100644 index 00000000..33ef14f7 --- /dev/null +++ b/lib/syntax_tree/yarv/sea_of_nodes.rb @@ -0,0 +1,534 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # A sea of nodes is an intermediate representation used by a compiler to + # represent both control and data flow in the same graph. The way we use it + # allows us to have the vertices of the graph represent either an + # instruction in the instruction sequence or a synthesized node that we add + # to the graph. The edges of the graph represent either control flow or data + # flow. + class SeaOfNodes + # This object represents a node in the graph that holds a YARV + # instruction. + class InsnNode + attr_reader :inputs, :outputs, :insn, :offset + + def initialize(insn, offset) + @inputs = [] + @outputs = [] + + @insn = insn + @offset = offset + end + + def id + offset + end + + def label + "%04d %s" % [offset, insn.disasm(Disassembler::Squished.new)] + end + end + + # Phi nodes are used to represent the merging of data flow from multiple + # incoming blocks. + class PhiNode + attr_reader :inputs, :outputs, :id + + def initialize(id) + @inputs = [] + @outputs = [] + @id = id + end + + def label + "#{id} φ" + end + end + + # Merge nodes are present in any block that has multiple incoming blocks. + # It provides a place for Phi nodes to attach their results. + class MergeNode + attr_reader :inputs, :outputs, :id + + def initialize(id) + @inputs = [] + @outputs = [] + @id = id + end + + def label + "#{id} ψ" + end + end + + # The edge of a graph represents either control flow or data flow. + class Edge + TYPES = %i[data control info].freeze + + attr_reader :from + attr_reader :to + attr_reader :type + attr_reader :label + + def initialize(from, to, type, label) + raise unless TYPES.include?(type) + + @from = from + @to = to + @type = type + @label = label + end + end + + # A subgraph represents the local data and control flow of a single basic + # block. + class SubGraph + attr_reader :first_fixed, :last_fixed, :inputs, :outputs + + def initialize(first_fixed, last_fixed, inputs, outputs) + @first_fixed = first_fixed + @last_fixed = last_fixed + @inputs = inputs + @outputs = outputs + end + end + + # The compiler is responsible for taking a data flow graph and turning it + # into a sea of nodes. + class Compiler + attr_reader :dfg, :nodes + + def initialize(dfg) + @dfg = dfg + @nodes = [] + + # We need to put a unique ID on the synthetic nodes in the graph, so + # we keep a counter that we increment any time we create a new + # synthetic node. + @id_counter = 999 + end + + def compile + local_graphs = {} + dfg.blocks.each do |block| + local_graphs[block.id] = create_local_graph(block) + end + + connect_local_graphs_control(local_graphs) + connect_local_graphs_data(local_graphs) + cleanup_phi_nodes + cleanup_insn_nodes + + SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify) + end + + private + + # Counter for synthetic nodes. + def id_counter + @id_counter += 1 + end + + # Create a sub-graph for a single basic block - block block argument + # inputs and outputs will be left dangling, to be connected later. + def create_local_graph(block) + block_flow = dfg.block_flows.fetch(block.id) + + # A map of instructions to nodes. + insn_nodes = {} + + # Create a node for each instruction in the block. + block.each_with_length do |insn, offset| + node = InsnNode.new(insn, offset) + insn_nodes[offset] = node + nodes << node + end + + # The first and last node in the sub-graph, and the last fixed node. + previous_fixed = nil + first_fixed = nil + last_fixed = nil + + # The merge node for the phi nodes to attach to. + merge_node = nil + + # If there is more than one predecessor and we have basic block + # arguments coming in, then we need a merge node for the phi nodes to + # attach to. + if block.incoming_blocks.size > 1 && !block_flow.in.empty? + merge_node = MergeNode.new(id_counter) + nodes << merge_node + + previous_fixed = merge_node + first_fixed = merge_node + last_fixed = merge_node + end + + # Connect local control flow (only nodes with side effects.) + block.each_with_length do |insn, length| + if insn.side_effects? + insn_node = insn_nodes[length] + connect previous_fixed, insn_node, :control if previous_fixed + previous_fixed = insn_node + first_fixed ||= insn_node + last_fixed = insn_node + end + end + + # Connect basic block arguments. + inputs = {} + outputs = {} + block_flow.in.each do |arg| + # Each basic block argument gets a phi node. Even if there's only + # one predecessor! We'll tidy this up later. + phi = PhiNode.new(id_counter) + connect(phi, merge_node, :info) if merge_node + nodes << phi + inputs[arg] = phi + + block.each_with_length do |_, consumer_offset| + consumer_flow = dfg.insn_flows[consumer_offset] + consumer_flow.in.each_with_index do |producer, input_index| + if producer == arg + connect(phi, insn_nodes[consumer_offset], :data, input_index) + end + end + end + + block_flow.out.each { |out| outputs[out] = phi if out == arg } + end + + # Connect local dataflow from consumers back to producers. + block.each_with_length do |_, consumer_offset| + consumer_flow = dfg.insn_flows.fetch(consumer_offset) + consumer_flow.in.each_with_index do |producer, input_index| + if producer.local? + connect( + insn_nodes[producer.length], + insn_nodes[consumer_offset], + :data, + input_index + ) + end + end + end + + # Connect dataflow from producers that leaves the block. + block.each_with_length do |_, producer_pc| + dfg + .insn_flows + .fetch(producer_pc) + .out + .each do |consumer| + unless consumer.local? + # This is an argument to the successor block - not to an + # instruction here. + outputs[consumer.name] = insn_nodes[producer_pc] + end + end + end + + # A graph with only side-effect free instructions will currently have + # no fixed nodes! In that case just use the first instruction's node + # for both first and last. But it's a bug that it'll appear in the + # control flow path! + SubGraph.new( + first_fixed || insn_nodes[block.block_start], + last_fixed || insn_nodes[block.block_start], + inputs, + outputs + ) + end + + # Connect control flow that flows between basic blocks. + def connect_local_graphs_control(local_graphs) + dfg.blocks.each do |predecessor| + predecessor_last = local_graphs[predecessor.id].last_fixed + predecessor.outgoing_blocks.each_with_index do |successor, index| + label = + if index > 0 && + index == (predecessor.outgoing_blocks.length - 1) + # If there are multiple outgoing blocks from this block, then + # the last one is a fallthrough. Otherwise it's a branch. + :fallthrough + else + :"branch#{index}" + end + + connect( + predecessor_last, + local_graphs[successor.id].first_fixed, + :control, + label + ) + end + end + end + + # Connect data flow that flows between basic blocks. + def connect_local_graphs_data(local_graphs) + dfg.blocks.each do |predecessor| + arg_outs = local_graphs[predecessor.id].outputs.values + arg_outs.each_with_index do |arg_out, arg_n| + predecessor.outgoing_blocks.each do |successor| + successor_graph = local_graphs[successor.id] + arg_in = successor_graph.inputs.values[arg_n] + + # We're connecting to a phi node, so we may need a special + # label. + raise unless arg_in.is_a?(PhiNode) + + label = + case arg_out + when InsnNode + # Instructions that go into a phi node are labelled by the + # offset of last instruction in the block that executed + # them. This way you know which value to use for the phi, + # based on the last instruction you executed. + dfg.blocks.find do |block| + block_start = block.block_start + block_end = + block_start + block.insns.sum(&:length) - + block.insns.last.length + + if (block_start..block_end).cover?(arg_out.offset) + break block_end + end + end + when PhiNode + # Phi nodes to phi nodes are not labelled. + else + raise + end + + connect(arg_out, arg_in, :data, label) + end + end + end + end + + # We don't always build things in an optimal way. Go back and fix up + # some mess we left. Ideally we wouldn't create these problems in the + # first place. + def cleanup_phi_nodes + nodes.dup.each do |node| # dup because we're mutating + next unless node.is_a?(PhiNode) + + if node.inputs.size == 1 + # Remove phi nodes with a single input. + connect_over(node) + remove(node) + elsif node.inputs.map(&:from).uniq.size == 1 + # Remove phi nodes where all inputs are the same. + producer_edge = node.inputs.first + consumer_edge = node.outputs.find { |e| !e.to.is_a?(MergeNode) } + connect( + producer_edge.from, + consumer_edge.to, + :data, + consumer_edge.label + ) + remove(node) + end + end + end + + # Eliminate as many unnecessary nodes as we can. + def cleanup_insn_nodes + nodes.dup.each do |node| + next unless node.is_a?(InsnNode) + + case node.insn + when AdjustStack + # If there are any inputs to the adjust stack that are immediately + # discarded, we can remove them from the input list. + number = node.insn.number + + node.inputs.dup.each do |input_edge| + next if input_edge.type != :data + + from = input_edge.from + next unless from.is_a?(InsnNode) + + if from.inputs.empty? && from.outputs.size == 1 + number -= 1 + remove(input_edge.from) + elsif from.insn.is_a?(Dup) + number -= 1 + connect_over(from) + remove(from) + + new_edge = node.inputs.last + new_edge.from.outputs.delete(new_edge) + node.inputs.delete(new_edge) + end + end + + if number == 0 + connect_over(node) + remove(node) + else + next_node = + if number == 1 + InsnNode.new(Pop.new, node.offset) + else + InsnNode.new(AdjustStack.new(number), node.offset) + end + + next_node.inputs.concat(node.inputs) + next_node.outputs.concat(node.outputs) + + # Dynamically finding the index of the node in the nodes array + # because we're mutating the array as we go. + nodes[nodes.index(node)] = next_node + end + when Jump + # When you have a jump instruction that only has one input and one + # output, you can just connect over top of it and remove it. + if node.inputs.size == 1 && node.outputs.size == 1 + connect_over(node) + remove(node) + end + when Pop + from = node.inputs.find { |edge| edge.type == :data }.from + next unless from.is_a?(InsnNode) + + removed = + if from.inputs.empty? && from.outputs.size == 1 + remove(from) + true + elsif from.insn.is_a?(Dup) + connect_over(from) + remove(from) + + new_edge = node.inputs.last + new_edge.from.outputs.delete(new_edge) + node.inputs.delete(new_edge) + true + else + false + end + + if removed + connect_over(node) + remove(node) + end + end + end + end + + # Connect one node to another. + def connect(from, to, type, label = nil) + raise if from == to + raise if !to.is_a?(PhiNode) && type == :data && label.nil? + + edge = Edge.new(from, to, type, label) + from.outputs << edge + to.inputs << edge + end + + # Connect all of the inputs to all of the outputs of a node. + def connect_over(node) + node.inputs.each do |producer_edge| + node.outputs.each do |consumer_edge| + connect( + producer_edge.from, + consumer_edge.to, + producer_edge.type, + producer_edge.label + ) + end + end + end + + # Remove a node from the graph. + def remove(node) + node.inputs.each do |producer_edge| + producer_edge.from.outputs.reject! { |edge| edge.to == node } + end + + node.outputs.each do |consumer_edge| + consumer_edge.to.inputs.reject! { |edge| edge.from == node } + end + + nodes.delete(node) + end + end + + attr_reader :dfg, :nodes, :local_graphs + + def initialize(dfg, nodes, local_graphs) + @dfg = dfg + @nodes = nodes + @local_graphs = local_graphs + end + + def to_mermaid + Mermaid.flowchart do |flowchart| + nodes.each do |node| + flowchart.node("node_#{node.id}", node.label, shape: :rounded) + end + + nodes.each do |producer| + producer.outputs.each do |consumer_edge| + label = + if !consumer_edge.label + # No label. + elsif consumer_edge.to.is_a?(PhiNode) + # Edges into phi nodes are labelled by the offset of the + # instruction going into the merge. + "%04d" % consumer_edge.label + else + consumer_edge.label.to_s + end + + flowchart.link( + flowchart.fetch("node_#{producer.id}"), + flowchart.fetch("node_#{consumer_edge.to.id}"), + label, + type: consumer_edge.type == :info ? :dotted : :directed, + color: { data: :green, control: :red }[consumer_edge.type] + ) + end + end + end + end + + def verify + # Verify edge labels. + nodes.each do |node| + # Not talking about phi nodes right now. + next if node.is_a?(PhiNode) + + if node.is_a?(InsnNode) && node.insn.branch_targets.any? && + !node.insn.is_a?(Leave) + # A branching node must have at least one branch edge and + # potentially a fallthrough edge coming out. + + labels = node.outputs.map(&:label).sort + raise if labels[0] != :branch0 + raise if labels[1] != :fallthrough && labels.size > 2 + else + labels = node.inputs.filter { |e| e.type == :data }.map(&:label) + next if labels.empty? + + # No nil labels + raise if labels.any?(&:nil?) + + # Labels should start at zero. + raise unless labels.min.zero? + + # Labels should be contiguous. + raise unless labels.sort == (labels.min..labels.max).to_a + end + end + end + + def self.compile(dfg) + Compiler.new(dfg).compile + end + end + end +end diff --git a/spec/mspec b/spec/mspec deleted file mode 160000 index 4877d58d..00000000 --- a/spec/mspec +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4877d58dff577641bc1ecd1bf3d3c3daa93b423f diff --git a/spec/ruby b/spec/ruby deleted file mode 160000 index 71873ae4..00000000 --- a/spec/ruby +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 71873ae4421f5b551a5af0f3427e901414736835 diff --git a/tasks/sorbet.rake b/tasks/sorbet.rake new file mode 100644 index 00000000..e4152664 --- /dev/null +++ b/tasks/sorbet.rake @@ -0,0 +1,277 @@ +# frozen_string_literal: true + +module SyntaxTree + class RBI + include DSL + + attr_reader :body, :line + + def initialize + @body = [] + @line = 1 + end + + def generate + require "syntax_tree/reflection" + + body << Comment("# typed: strict", false, location) + @line += 2 + + generate_parent + Reflection.nodes.sort.each { |(_, node)| generate_node(node) } + + Formatter.format(nil, Program(Statements(body))) + end + + private + + def generate_comments(comment) + comment + .lines(chomp: true) + .map { |line| Comment("# #{line}", false, location).tap { @line += 1 } } + end + + def generate_parent + attribute = Reflection.nodes[:Program].attributes[:location] + class_location = location + + node_body = generate_comments(attribute.comment) + node_body << sig_block { sig_returns { sig_type_for(attribute.type) } } + @line += 1 + + node_body << Command( + Ident("attr_reader"), + Args([SymbolLiteral(Ident("location"))]), + nil, + location + ) + @line += 1 + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Node")), + nil, + BodyStmt(Statements(node_body), nil, nil, nil, nil), + class_location + ) + @line += 2 + end + + def generate_node(node) + body.concat(generate_comments(node.comment)) + class_location = location + @line += 2 + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const(node.name.to_s)), + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Node")), + BodyStmt(Statements(generate_node_body(node)), nil, nil, nil, nil), + class_location + ) + + @line += 2 + end + + def generate_node_body(node) + node_body = [] + node.attributes.sort.each do |(name, attribute)| + next if name == :location + + node_body.concat(generate_comments(attribute.comment)) + node_body << sig_block { sig_returns { sig_type_for(attribute.type) } } + @line += 1 + + node_body << Command( + Ident("attr_reader"), + Args([SymbolLiteral(Ident(attribute.name.to_s))]), + nil, + location + ) + @line += 2 + end + + node_body.concat(generate_initialize(node)) + + node_body << sig_block do + CallNode( + sig_params do + BareAssocHash( + [Assoc(Label("visitor:"), sig_type_for(BasicVisitor))] + ) + end, + Period("."), + Ident("returns"), + ArgParen( + Args( + [CallNode(VarRef(Const("T")), Period("."), Ident("untyped"), nil)] + ) + ) + ) + end + @line += 1 + + node_body << generate_def_node( + "accept", + Paren( + LParen("("), + Params.new(requireds: [Ident("visitor")], location: location) + ) + ) + @line += 2 + + node_body << generate_child_nodes + @line += 1 + + node_body << generate_def_node("child_nodes", nil) + @line += 1 + + node_body + end + + def generate_initialize(node) + parameters = + SyntaxTree.const_get(node.name).instance_method(:initialize).parameters + + assocs = + parameters.map do |(_, name)| + Assoc(Label("#{name}:"), sig_type_for(node.attributes[name].type)) + end + + node_body = [] + node_body << sig_block do + CallNode( + sig_params { BareAssocHash(assocs) }, + Period("."), + Ident("void"), + nil + ) + end + @line += 1 + + params = Params.new(location: location) + parameters.each do |(type, name)| + case type + when :req + params.requireds << Ident(name.to_s) + when :keyreq + params.keywords << [Label("#{name}:"), nil] + when :key + params.keywords << [ + Label("#{name}:"), + CallNode( + VarRef(Const("T")), + Period("."), + Ident("unsafe"), + ArgParen(Args([VarRef(Kw("nil"))])) + ) + ] + else + raise + end + end + + node_body << generate_def_node("initialize", Paren(LParen("("), params)) + @line += 2 + + node_body + end + + def generate_child_nodes + type = + Reflection::Type::ArrayType.new( + Reflection::Type::UnionType.new([NilClass, Node]) + ) + + sig_block { sig_returns { sig_type_for(type) } } + end + + def generate_def_node(name, params) + DefNode( + nil, + nil, + Ident(name), + params, + BodyStmt(Statements([VoidStmt()]), nil, nil, nil, nil), + location + ) + end + + def sig_block + MethodAddBlock( + CallNode(nil, nil, Ident("sig"), nil), + BlockNode( + LBrace("{"), + nil, + BodyStmt(Statements([yield]), nil, nil, nil, nil) + ), + location + ) + end + + def sig_params + CallNode(nil, nil, Ident("params"), ArgParen(Args([yield]))) + end + + def sig_returns + CallNode(nil, nil, Ident("returns"), ArgParen(Args([yield]))) + end + + def sig_type_for(type) + case type + when Reflection::Type::ArrayType + ARef( + ConstPathRef(VarRef(Const("T")), Const("Array")), + sig_type_for(type.type) + ) + when Reflection::Type::TupleType + ArrayLiteral(LBracket("["), Args(type.types.map { sig_type_for(_1) })) + when Reflection::Type::UnionType + if type.types.include?(NilClass) + selected = type.types.reject { _1 == NilClass } + subtype = + if selected.size == 1 + selected.first + else + Reflection::Type::UnionType.new(selected) + end + + CallNode( + VarRef(Const("T")), + Period("."), + Ident("nilable"), + ArgParen(Args([sig_type_for(subtype)])) + ) + else + CallNode( + VarRef(Const("T")), + Period("."), + Ident("any"), + ArgParen(Args(type.types.map { sig_type_for(_1) })) + ) + end + when Symbol + ConstRef(Const("Symbol")) + else + *parents, constant = type.name.split("::").map { Const(_1) } + + if parents.empty? + ConstRef(constant) + else + [*parents[1..], constant].inject( + VarRef(parents.first) + ) { |accum, const| ConstPathRef(accum, const) } + end + end + end + + def location + Location.fixed(line: line, char: 0, column: 0) + end + end +end + +namespace :sorbet do + desc "Generate RBI files for Sorbet" + task :rbi do + puts SyntaxTree::RBI.new.generate + end +end diff --git a/tasks/whitequark.rake b/tasks/whitequark.rake new file mode 100644 index 00000000..4f7ee650 --- /dev/null +++ b/tasks/whitequark.rake @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +# This file's purpose is to extract the examples from the whitequark/parser +# gem and generate a test file that we can use to ensure that our parser +# generates equivalent syntax trees when translating. To do this, it runs the +# parser's test suite but overrides the `assert_parses` method to collect the +# examples into a hash. Then, it writes out the hash to a file that we can use +# to generate our own tests. +# +# To run the test suite, it's important to note that we have to mirror both any +# APIs provided to the test suite (for example the ParseHelper module below). +# This is obviously relatively brittle, but it's effective for now. + +require "ast" + +module ParseHelper + # This object is going to collect all of the examples from the parser gem into + # a hash that we can use to generate our own tests. + COLLECTED = Hash.new { |hash, key| hash[key] = [] } + + include AST::Sexp + ALL_VERSIONS = %w[3.1 3.2] + + private + + def assert_context(*) + end + + def assert_diagnoses(*) + end + + def assert_diagnoses_many(*) + end + + def refute_diagnoses(*) + end + + def with_versions(*) + end + + def assert_parses(_ast, code, _source_maps = "", versions = ALL_VERSIONS) + # We're going to skip any examples that are for older Ruby versions + # that we do not support. + return if (versions & %w[3.1 3.2]).empty? + + entry = caller.find { _1.include?("test_parser.rb") } + _, lineno, name = *entry.match(/(\d+):in `(.+)'/) + + COLLECTED["#{name}:#{lineno}"] << code + end +end + +namespace :extract do + desc "Extract the whitequark/parser tests" + task :whitequark do + directory = File.expand_path("../tmp/parser", __dir__) + unless File.directory?(directory) + sh "git clone --depth 1 https://github.com/whitequark/parser #{directory}" + end + + mkdir_p "#{directory}/extract" + touch "#{directory}/extract/helper.rb" + touch "#{directory}/extract/parse_helper.rb" + touch "#{directory}/extract/extracted.txt" + $:.unshift "#{directory}/extract" + + require "parser/current" + require "minitest/autorun" + require_relative "#{directory}/test/test_parser" + + Minitest.after_run do + filepath = File.expand_path("../test/translation/parser.txt", __dir__) + + File.open(filepath, "w") do |file| + ParseHelper::COLLECTED.sort.each do |(key, codes)| + if codes.length == 1 + file.puts("!!! #{key}\n#{codes.first}") + else + codes.each_with_index do |code, index| + file.puts("!!! #{key}:#{index}\n#{code}") + end + end + end + end + end + end +end diff --git a/test/fixtures/break.rb b/test/fixtures/break.rb index a77c6b35..a608a6b2 100644 --- a/test/fixtures/break.rb +++ b/test/fixtures/break.rb @@ -27,3 +27,9 @@ ) % break foo.bar :baz do |qux| qux end +- +break( + foo.bar :baz do |qux| + qux + end +) diff --git a/test/fixtures/call.rb b/test/fixtures/call.rb index c41ee4ac..d35c6036 100644 --- a/test/fixtures/call.rb +++ b/test/fixtures/call.rb @@ -60,3 +60,8 @@ % a b do end.c d +% +self. +=begin +=end + to_s diff --git a/test/fixtures/def.rb b/test/fixtures/def.rb index a827adfe..0cc49e0a 100644 --- a/test/fixtures/def.rb +++ b/test/fixtures/def.rb @@ -23,3 +23,9 @@ def foo() # comment def foo( # comment ) end +% +def +=begin +=end +a +end diff --git a/test/fixtures/def_endless.rb b/test/fixtures/def_endless.rb index 4595fba9..8d1f9d33 100644 --- a/test/fixtures/def_endless.rb +++ b/test/fixtures/def_endless.rb @@ -22,3 +22,13 @@ def self.foo = bar baz end def foo? = true +% +def a() +=begin +=end +=1 +- +def a() = +=begin +=end + 1 diff --git a/test/fixtures/next.rb b/test/fixtures/next.rb index be667951..79a8c62e 100644 --- a/test/fixtures/next.rb +++ b/test/fixtures/next.rb @@ -65,3 +65,10 @@ next([1, 2]) - next 1, 2 +% +next fun foo do end +- +next( + fun foo do + end +) diff --git a/test/fixtures/symbols.rb b/test/fixtures/symbols.rb index 5e2673f3..12f0a22f 100644 --- a/test/fixtures/symbols.rb +++ b/test/fixtures/symbols.rb @@ -19,3 +19,8 @@ %I[foo] # comment % %I{foo[]} +% +:\ +=begin +=end +symbol diff --git a/test/formatting_test.rb b/test/formatting_test.rb index 37ca29e1..5e5f9e9f 100644 --- a/test/formatting_test.rb +++ b/test/formatting_test.rb @@ -7,6 +7,7 @@ class FormattingTest < Minitest::Test Fixtures.each_fixture do |fixture| define_method(:"test_formatted_#{fixture.name}") do assert_equal(fixture.formatted, SyntaxTree.format(fixture.source)) + assert_syntax_tree(SyntaxTree.parse(fixture.source)) end end diff --git a/test/interface_test.rb b/test/interface_test.rb deleted file mode 100644 index 5086680e..00000000 --- a/test/interface_test.rb +++ /dev/null @@ -1,72 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module SyntaxTree - class InterfaceTest < Minitest::Test - ObjectSpace.each_object(Node.singleton_class) do |klass| - next if klass == Node - - define_method(:"test_instantiate_#{klass.name}") do - assert_syntax_tree(instantiate(klass)) - end - end - - Fixtures.each_fixture do |fixture| - define_method(:"test_#{fixture.name}") do - assert_syntax_tree(SyntaxTree.parse(fixture.source)) - end - end - - private - - # This method is supposed to instantiate a new instance of the given class. - # The class is always a descendant from SyntaxTree::Node, so we can make - # certain assumptions about the way the initialize method is set up. If it - # needs to be special-cased, it's done so at the end of this method. - def instantiate(klass) - params = {} - - # Set up all of the keyword parameters for the class. - klass - .instance_method(:initialize) - .parameters - .each { |(type, name)| params[name] = nil if type.start_with?("key") } - - # Set up any default values that have to be arrays. - %i[ - assocs - comments - elements - keywords - locals - optionals - parts - posts - requireds - symbols - values - ].each { |key| params[key] = [] if params.key?(key) } - - # Set up a default location for the node. - params[:location] = Location.fixed(line: 0, char: 0, column: 0) - - case klass.name - when "SyntaxTree::Binary" - klass.new(**params, operator: :+) - when "SyntaxTree::Kw" - klass.new(**params, value: "kw") - when "SyntaxTree::Label" - klass.new(**params, value: "label:") - when "SyntaxTree::Op" - klass.new(**params, value: "+") - when "SyntaxTree::RegexpLiteral" - klass.new(**params, ending: "/") - when "SyntaxTree::Statements" - klass.new(nil, **params, body: []) - else - klass.new(**params) - end - end - end -end diff --git a/test/node_test.rb b/test/node_test.rb index 7254c086..19fbeed2 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -60,7 +60,7 @@ def test_arg_paren_heredoc ARGUMENT SOURCE - at = location(lines: 1..3, chars: 6..28) + at = location(lines: 1..3, chars: 6..37) assert_node(ArgParen, source, at: at, &:arguments) end @@ -131,7 +131,7 @@ def test_aryptn end SOURCE - at = location(lines: 2..2, chars: 18..47) + at = location(lines: 2..2, chars: 18..48) assert_node(AryPtn, source, at: at) { |node| node.consequent.pattern } end @@ -533,7 +533,7 @@ def test_heredoc HEREDOC SOURCE - at = location(lines: 1..3, chars: 0..22) + at = location(lines: 1..3, chars: 0..30) assert_node(Heredoc, source, at: at) end @@ -544,7 +544,7 @@ def test_heredoc_beg HEREDOC SOURCE - at = location(chars: 0..11) + at = location(chars: 0..10) assert_node(HeredocBeg, source, at: at, &:beginning) end @@ -555,7 +555,7 @@ def test_heredoc_end HEREDOC SOURCE - at = location(lines: 3..3, chars: 22..31, columns: 0..9) + at = location(lines: 3..3, chars: 22..30, columns: 0..8) assert_node(HeredocEnd, source, at: at, &:ending) end @@ -950,7 +950,7 @@ def test_var_field guard_version("3.1.0") do def test_pinned_var_ref source = "foo in ^bar" - at = location(chars: 8..11) + at = location(chars: 7..11) assert_node(PinnedVarRef, source, at: at, &:pattern) end @@ -1008,7 +1008,7 @@ def test_xstring_heredoc HEREDOC SOURCE - at = location(lines: 1..3, chars: 0..18) + at = location(lines: 1..3, chars: 0..26) assert_node(Heredoc, source, at: at) end diff --git a/test/ractor_test.rb b/test/ractor_test.rb index bcdb2a51..7e0201ca 100644 --- a/test/ractor_test.rb +++ b/test/ractor_test.rb @@ -33,7 +33,7 @@ def test_formatting private def filepaths - Dir.glob(File.expand_path("../lib/syntax_tree/{node,parser}.rb", __dir__)) + Dir.glob(File.expand_path("../lib/syntax_tree/plugin/*.rb", __dir__)) end # Ractors still warn about usage, so I'm disabling that warning here just to diff --git a/test/ruby-syntax-fixtures b/test/ruby-syntax-fixtures deleted file mode 160000 index 5b333f5a..00000000 --- a/test/ruby-syntax-fixtures +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5b333f5a34d6fb08f88acc93b69c7d19b3fee8e7 diff --git a/test/ruby_syntax_fixtures_test.rb b/test/ruby_syntax_fixtures_test.rb deleted file mode 100644 index 0cf89310..00000000 --- a/test/ruby_syntax_fixtures_test.rb +++ /dev/null @@ -1,15 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module SyntaxTree - class RubySyntaxFixturesTest < Minitest::Test - Dir[ - File.expand_path("ruby-syntax-fixtures/**/*.rb", __dir__) - ].each do |file| - define_method "test_ruby_syntax_fixtures_#{file}" do - refute_nil(SyntaxTree.parse(SyntaxTree.read(file))) - end - end - end -end diff --git a/test/syntax_tree_test.rb b/test/syntax_tree_test.rb index 05242d94..27aa6851 100644 --- a/test/syntax_tree_test.rb +++ b/test/syntax_tree_test.rb @@ -22,13 +22,18 @@ def method # comment SOURCE bodystmt = SyntaxTree.parse(source).statements.body.first.bodystmt - assert_equal(20, bodystmt.location.start_char) + assert_equal(20, bodystmt.start_char) end def test_parse_error assert_raises(Parser::ParseError) { SyntaxTree.parse("<>") } end + def test_marshalable + node = SyntaxTree.parse("1 + 2") + assert_operator(node, :===, Marshal.load(Marshal.dump(node))) + end + def test_maxwidth_format assert_equal("foo +\n bar\n", SyntaxTree.format("foo + bar", 5)) end diff --git a/test/test_helper.rb b/test/test_helper.rb index 77627e26..2c8f6466 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -11,6 +11,39 @@ require "syntax_tree" require "syntax_tree/cli" +# Here we are going to establish type verification whenever a new node is +# created. We do this through the reflection module, which in turn parses the +# source code of the node classes. +require "syntax_tree/reflection" +SyntaxTree::Reflection.nodes.each do |name, node| + next if name == :Statements + + clazz = SyntaxTree.const_get(name) + parameters = clazz.instance_method(:initialize).parameters + + # First, verify that all of the parameters listed in the list of attributes. + # If there are any parameters that aren't listed in the attributes, then + # something went wrong with the parsing in the reflection module. + raise unless (parameters.map(&:last) - node.attributes.keys).empty? + + # Now we're going to use an alias chain to redefine the initialize method to + # include type checking. + clazz.alias_method(:initialize_without_verify, :initialize) + clazz.define_method(:initialize) do |**kwargs| + kwargs.each do |kwarg, value| + attribute = node.attributes.fetch(kwarg) + + unless attribute.type === value + raise TypeError, + "invalid type for #{name}##{kwarg}, expected " \ + "#{attribute.type.inspect}, got #{value.inspect}" + end + end + + initialize_without_verify(**kwargs) + end +end + require "json" require "tempfile" require "pp" @@ -61,7 +94,7 @@ def assert_syntax_tree(node) assert_includes(pretty, type) # Assert that we can get back a new tree by using the mutation visitor. - assert_operator node, :===, node.accept(Visitor::MutationVisitor.new) + assert_operator node, :===, node.accept(MutationVisitor.new) # Serialize the node to JSON, parse it back out, and assert that we have # found the expected type. diff --git a/test/translation/parser.txt b/test/translation/parser.txt new file mode 100644 index 00000000..5e9e8d31 --- /dev/null +++ b/test/translation/parser.txt @@ -0,0 +1,1824 @@ +!!! assert_parses_args:2249:0 +def f (foo: 1, bar: 2, **baz, &b); end +!!! assert_parses_args:2249:1 +def f (foo: 1, &b); end +!!! assert_parses_args:2249:2 +def f **baz, &b; end +!!! assert_parses_args:2249:3 +def f *, **; end +!!! assert_parses_args:2249:4 +def f a, o=1, *r, &b; end +!!! assert_parses_args:2249:5 +def f a, o=1, *r, p, &b; end +!!! assert_parses_args:2249:6 +def f a, o=1, &b; end +!!! assert_parses_args:2249:7 +def f a, o=1, p, &b; end +!!! assert_parses_args:2249:8 +def f a, *r, &b; end +!!! assert_parses_args:2249:9 +def f a, *r, p, &b; end +!!! assert_parses_args:2249:10 +def f a, &b; end +!!! assert_parses_args:2249:11 +def f o=1, *r, &b; end +!!! assert_parses_args:2249:12 +def f o=1, *r, p, &b; end +!!! assert_parses_args:2249:13 +def f o=1, &b; end +!!! assert_parses_args:2249:14 +def f o=1, p, &b; end +!!! assert_parses_args:2249:15 +def f *r, &b; end +!!! assert_parses_args:2249:16 +def f *r, p, &b; end +!!! assert_parses_args:2249:17 +def f &b; end +!!! assert_parses_args:2249:18 +def f ; end +!!! assert_parses_args:2249:19 +def f (((a))); end +!!! assert_parses_args:2249:20 +def f ((a, a1)); end +!!! assert_parses_args:2249:21 +def f ((a, *r)); end +!!! assert_parses_args:2249:22 +def f ((a, *r, p)); end +!!! assert_parses_args:2249:23 +def f ((a, *)); end +!!! assert_parses_args:2249:24 +def f ((a, *, p)); end +!!! assert_parses_args:2249:25 +def f ((*r)); end +!!! assert_parses_args:2249:26 +def f ((*r, p)); end +!!! assert_parses_args:2249:27 +def f ((*)); end +!!! assert_parses_args:2249:28 +def f ((*, p)); end +!!! assert_parses_args:2249:29 +def f foo: +; end +!!! assert_parses_args:2249:30 +def f foo: -1 +; end +!!! assert_parses_blockargs:2506:0 +f{ |a| } +!!! assert_parses_blockargs:2506:1 +f{ |a, b,| } +!!! assert_parses_blockargs:2506:2 +f{ |a| } +!!! assert_parses_blockargs:2506:3 +f{ |foo:| } +!!! assert_parses_blockargs:2506:4 +f{ } +!!! assert_parses_blockargs:2506:5 +f{ | | } +!!! assert_parses_blockargs:2506:6 +f{ |;a| } +!!! assert_parses_blockargs:2506:7 +f{ |; +a +| } +!!! assert_parses_blockargs:2506:8 +f{ || } +!!! assert_parses_blockargs:2506:9 +f{ |a| } +!!! assert_parses_blockargs:2506:10 +f{ |a, c| } +!!! assert_parses_blockargs:2506:11 +f{ |a,| } +!!! assert_parses_blockargs:2506:12 +f{ |a, &b| } +!!! assert_parses_blockargs:2506:13 +f{ |a, *s, &b| } +!!! assert_parses_blockargs:2506:14 +f{ |a, *, &b| } +!!! assert_parses_blockargs:2506:15 +f{ |a, *s| } +!!! assert_parses_blockargs:2506:16 +f{ |a, *| } +!!! assert_parses_blockargs:2506:17 +f{ |*s, &b| } +!!! assert_parses_blockargs:2506:18 +f{ |*, &b| } +!!! assert_parses_blockargs:2506:19 +f{ |*s| } +!!! assert_parses_blockargs:2506:20 +f{ |*| } +!!! assert_parses_blockargs:2506:21 +f{ |&b| } +!!! assert_parses_blockargs:2506:22 +f{ |a, o=1, o1=2, *r, &b| } +!!! assert_parses_blockargs:2506:23 +f{ |a, o=1, *r, p, &b| } +!!! assert_parses_blockargs:2506:24 +f{ |a, o=1, &b| } +!!! assert_parses_blockargs:2506:25 +f{ |a, o=1, p, &b| } +!!! assert_parses_blockargs:2506:26 +f{ |a, *r, p, &b| } +!!! assert_parses_blockargs:2506:27 +f{ |o=1, *r, &b| } +!!! assert_parses_blockargs:2506:28 +f{ |o=1, *r, p, &b| } +!!! assert_parses_blockargs:2506:29 +f{ |o=1, &b| } +!!! assert_parses_blockargs:2506:30 +f{ |o=1, p, &b| } +!!! assert_parses_blockargs:2506:31 +f{ |*r, p, &b| } +!!! assert_parses_blockargs:2506:32 +f{ |foo: 1, bar: 2, **baz, &b| } +!!! assert_parses_blockargs:2506:33 +f{ |foo: 1, &b| } +!!! assert_parses_blockargs:2506:34 +f{ |**baz, &b| } +!!! assert_parses_pattern_match:8503:0 +case foo; in self then true; end +!!! assert_parses_pattern_match:8503:1 +case foo; in 1..2 then true; end +!!! assert_parses_pattern_match:8503:2 +case foo; in 1.. then true; end +!!! assert_parses_pattern_match:8503:3 +case foo; in ..2 then true; end +!!! assert_parses_pattern_match:8503:4 +case foo; in 1...2 then true; end +!!! assert_parses_pattern_match:8503:5 +case foo; in 1... then true; end +!!! assert_parses_pattern_match:8503:6 +case foo; in ...2 then true; end +!!! assert_parses_pattern_match:8503:7 +case foo; in [*x, 1 => a, *y] then true; end +!!! assert_parses_pattern_match:8503:8 +case foo; in String(*, 1, *) then true; end +!!! assert_parses_pattern_match:8503:9 +case foo; in Array[*, 1, *] then true; end +!!! assert_parses_pattern_match:8503:10 +case foo; in *, 42, * then true; end +!!! assert_parses_pattern_match:8503:11 +case foo; in x, then nil; end +!!! assert_parses_pattern_match:8503:12 +case foo; in *x then nil; end +!!! assert_parses_pattern_match:8503:13 +case foo; in * then nil; end +!!! assert_parses_pattern_match:8503:14 +case foo; in x, y then nil; end +!!! assert_parses_pattern_match:8503:15 +case foo; in x, y, then nil; end +!!! assert_parses_pattern_match:8503:16 +case foo; in x, *y, z then nil; end +!!! assert_parses_pattern_match:8503:17 +case foo; in *x, y, z then nil; end +!!! assert_parses_pattern_match:8503:18 +case foo; in 1, "a", [], {} then nil; end +!!! assert_parses_pattern_match:8503:19 +case foo; in ->{ 42 } then true; end +!!! assert_parses_pattern_match:8503:20 +case foo; in A(1, 2) then true; end +!!! assert_parses_pattern_match:8503:21 +case foo; in A(x:) then true; end +!!! assert_parses_pattern_match:8503:22 +case foo; in A() then true; end +!!! assert_parses_pattern_match:8503:23 +case foo; in A[1, 2] then true; end +!!! assert_parses_pattern_match:8503:24 +case foo; in A[x:] then true; end +!!! assert_parses_pattern_match:8503:25 +case foo; in A[] then true; end +!!! assert_parses_pattern_match:8503:26 +case foo; in x then x; end +!!! assert_parses_pattern_match:8503:27 +case foo; in {} then true; end +!!! assert_parses_pattern_match:8503:28 +case foo; in a: 1 then true; end +!!! assert_parses_pattern_match:8503:29 +case foo; in { a: 1 } then true; end +!!! assert_parses_pattern_match:8503:30 +case foo; in { a: 1, } then true; end +!!! assert_parses_pattern_match:8503:31 +case foo; in a: then true; end +!!! assert_parses_pattern_match:8503:32 +case foo; in **a then true; end +!!! assert_parses_pattern_match:8503:33 +case foo; in ** then true; end +!!! assert_parses_pattern_match:8503:34 +case foo; in a: 1, b: 2 then true; end +!!! assert_parses_pattern_match:8503:35 +case foo; in a:, b: then true; end +!!! assert_parses_pattern_match:8503:36 +case foo; in a: 1, _a:, ** then true; end +!!! assert_parses_pattern_match:8503:37 +case foo; + in {a: 1 + } + false + ; end +!!! assert_parses_pattern_match:8503:38 +case foo; + in {a: + 2} + false + ; end +!!! assert_parses_pattern_match:8503:39 +case foo; + in {Foo: 42 + } + false + ; end +!!! assert_parses_pattern_match:8503:40 +case foo; + in a: {b:}, c: + p c + ; end +!!! assert_parses_pattern_match:8503:41 +case foo; + in {a: + } + true + ; end +!!! assert_parses_pattern_match:8503:42 +case foo; in A then true; end +!!! assert_parses_pattern_match:8503:43 +case foo; in A::B then true; end +!!! assert_parses_pattern_match:8503:44 +case foo; in ::A then true; end +!!! assert_parses_pattern_match:8503:45 +case foo; in [x] then nil; end +!!! assert_parses_pattern_match:8503:46 +case foo; in [x,] then nil; end +!!! assert_parses_pattern_match:8503:47 +case foo; in [x, y] then true; end +!!! assert_parses_pattern_match:8503:48 +case foo; in [x, y,] then true; end +!!! assert_parses_pattern_match:8503:49 +case foo; in [x, y, *] then true; end +!!! assert_parses_pattern_match:8503:50 +case foo; in [x, y, *z] then true; end +!!! assert_parses_pattern_match:8503:51 +case foo; in [x, *y, z] then true; end +!!! assert_parses_pattern_match:8503:52 +case foo; in [x, *, y] then true; end +!!! assert_parses_pattern_match:8503:53 +case foo; in [*x, y] then true; end +!!! assert_parses_pattern_match:8503:54 +case foo; in [*, x] then true; end +!!! assert_parses_pattern_match:8503:55 +case foo; in (1) then true; end +!!! assert_parses_pattern_match:8503:56 +case foo; in x if true; nil; end +!!! assert_parses_pattern_match:8503:57 +case foo; in x unless true; nil; end +!!! assert_parses_pattern_match:8503:58 +case foo; in 1; end +!!! assert_parses_pattern_match:8503:59 +case foo; in ^foo then nil; end +!!! assert_parses_pattern_match:8503:60 +case foo; in "a": then true; end +!!! assert_parses_pattern_match:8503:61 +case foo; in "#{ 'a' }": then true; end +!!! assert_parses_pattern_match:8503:62 +case foo; in "#{ %q{a} }": then true; end +!!! assert_parses_pattern_match:8503:63 +case foo; in "#{ %Q{a} }": then true; end +!!! assert_parses_pattern_match:8503:64 +case foo; in "a": 1 then true; end +!!! assert_parses_pattern_match:8503:65 +case foo; in "#{ 'a' }": 1 then true; end +!!! assert_parses_pattern_match:8503:66 +case foo; in "#{ %q{a} }": 1 then true; end +!!! assert_parses_pattern_match:8503:67 +case foo; in "#{ %Q{a} }": 1 then true; end +!!! assert_parses_pattern_match:8503:68 +case foo; in ^(42) then nil; end +!!! assert_parses_pattern_match:8503:69 +case foo; in { foo: ^(42) } then nil; end +!!! assert_parses_pattern_match:8503:70 +case foo; in ^(0+0) then nil; end +!!! assert_parses_pattern_match:8503:71 +case foo; in ^@a; end +!!! assert_parses_pattern_match:8503:72 +case foo; in ^@@TestPatternMatching; end +!!! assert_parses_pattern_match:8503:73 +case foo; in ^$TestPatternMatching; end +!!! assert_parses_pattern_match:8503:74 +case foo; in ^(1 +); end +!!! assert_parses_pattern_match:8503:75 +case foo; in 1 | 2 then true; end +!!! assert_parses_pattern_match:8503:76 +case foo; in 1 => a then true; end +!!! assert_parses_pattern_match:8503:77 +case foo; in **nil then true; end +!!! block in test_endless_comparison_method:10392:0 +def ===(other) = do_something +!!! block in test_endless_comparison_method:10392:1 +def ==(other) = do_something +!!! block in test_endless_comparison_method:10392:2 +def !=(other) = do_something +!!! block in test_endless_comparison_method:10392:3 +def <=(other) = do_something +!!! block in test_endless_comparison_method:10392:4 +def >=(other) = do_something +!!! block in test_endless_comparison_method:10392:5 +def !=(other) = do_something +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:0 +'a\ +b' +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:1 +<<-'HERE' +a\ +b +HERE +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:2 +%q{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:3 +"a\ +b" +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:4 +<<-"HERE" +a\ +b +HERE +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:5 +%{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:6 +%Q{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:7 +%w{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:8 +%W{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:9 +%i{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:10 +%I{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:11 +:'a\ +b' +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:12 +%s{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:13 +:"a\ +b" +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:14 +/a\ +b/ +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:15 +%r{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:16 +%x{a\ +b} +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:17 +`a\ +b` +!!! block in test_parser_slash_slash_n_escaping_in_literals:7327:18 +<<-`HERE` +a\ +b +HERE +!!! block in test_ruby_bug_11873_a:6017:0 +a b{c d}, :e do end +!!! block in test_ruby_bug_11873_a:6017:1 +a b{c d}, 1 do end +!!! block in test_ruby_bug_11873_a:6017:2 +a b{c d}, 1.0 do end +!!! block in test_ruby_bug_11873_a:6017:3 +a b{c d}, 1.0r do end +!!! block in test_ruby_bug_11873_a:6017:4 +a b{c d}, 1.0i do end +!!! block in test_ruby_bug_11873_a:6022:0 +a b{c(d)}, :e do end +!!! block in test_ruby_bug_11873_a:6022:1 +a b{c(d)}, 1 do end +!!! block in test_ruby_bug_11873_a:6022:2 +a b{c(d)}, 1.0 do end +!!! block in test_ruby_bug_11873_a:6022:3 +a b{c(d)}, 1.0r do end +!!! block in test_ruby_bug_11873_a:6022:4 +a b{c(d)}, 1.0i do end +!!! block in test_ruby_bug_11873_a:6036:0 +a b(c d), :e do end +!!! block in test_ruby_bug_11873_a:6036:1 +a b(c d), 1 do end +!!! block in test_ruby_bug_11873_a:6036:2 +a b(c d), 1.0 do end +!!! block in test_ruby_bug_11873_a:6036:3 +a b(c d), 1.0r do end +!!! block in test_ruby_bug_11873_a:6036:4 +a b(c d), 1.0i do end +!!! block in test_ruby_bug_11873_a:6041:0 +a b(c(d)), :e do end +!!! block in test_ruby_bug_11873_a:6041:1 +a b(c(d)), 1 do end +!!! block in test_ruby_bug_11873_a:6041:2 +a b(c(d)), 1.0 do end +!!! block in test_ruby_bug_11873_a:6041:3 +a b(c(d)), 1.0r do end +!!! block in test_ruby_bug_11873_a:6041:4 +a b(c(d)), 1.0i do end +!!! test___ENCODING__:1037 +__ENCODING__ +!!! test___ENCODING___legacy_:1046 +__ENCODING__ +!!! test_alias:2020 +alias :foo bar +!!! test_alias_gvar:2032 +alias $a $b +!!! test_alias_gvar:2037 +alias $a $+ +!!! test_ambiuous_quoted_label_in_ternary_operator:7204 +a ? b & '': nil +!!! test_and:4447 +foo and bar +!!! test_and:4453 +foo && bar +!!! test_and_asgn:1748 +foo.a &&= 1 +!!! test_and_asgn:1758 +foo[0, 1] &&= 2 +!!! test_and_or_masgn:4475 +foo && (a, b = bar) +!!! test_and_or_masgn:4484 +foo || (a, b = bar) +!!! test_anonymous_blockarg:10861 +def foo(&); bar(&); end +!!! test_arg:2055 +def f(foo); end +!!! test_arg:2066 +def f(foo, bar); end +!!! test_arg_duplicate_ignored:2958 +def foo(_, _); end +!!! test_arg_duplicate_ignored:2972 +def foo(_a, _a); end +!!! test_arg_label:3012 +def foo() a:b end +!!! test_arg_label:3019 +def foo + a:b end +!!! test_arg_label:3026 +f { || a:b } +!!! test_arg_scope:2238 +lambda{|;a|a} +!!! test_args_args_assocs:4077 +fun(foo, :foo => 1) +!!! test_args_args_assocs:4083 +fun(foo, :foo => 1, &baz) +!!! test_args_args_assocs_comma:4092 +foo[bar, :baz => 1,] +!!! test_args_args_comma:3941 +foo[bar,] +!!! test_args_args_star:3908 +fun(foo, *bar) +!!! test_args_args_star:3913 +fun(foo, *bar, &baz) +!!! test_args_assocs:4001 +fun(:foo => 1) +!!! test_args_assocs:4006 +fun(:foo => 1, &baz) +!!! test_args_assocs:4012 +self[:bar => 1] +!!! test_args_assocs:4021 +self.[]= foo, :a => 1 +!!! test_args_assocs:4031 +yield(:foo => 42) +!!! test_args_assocs:4039 +super(:foo => 42) +!!! test_args_assocs_comma:4068 +foo[:baz => 1,] +!!! test_args_assocs_legacy:3951 +fun(:foo => 1) +!!! test_args_assocs_legacy:3956 +fun(:foo => 1, &baz) +!!! test_args_assocs_legacy:3962 +self[:bar => 1] +!!! test_args_assocs_legacy:3971 +self.[]= foo, :a => 1 +!!! test_args_assocs_legacy:3981 +yield(:foo => 42) +!!! test_args_assocs_legacy:3989 +super(:foo => 42) +!!! test_args_block_pass:3934 +fun(&bar) +!!! test_args_cmd:3901 +fun(f bar) +!!! test_args_star:3921 +fun(*bar) +!!! test_args_star:3926 +fun(*bar, &baz) +!!! test_array_assocs:629 +[ 1 => 2 ] +!!! test_array_assocs:637 +[ 1, 2 => 3 ] +!!! test_array_plain:589 +[1, 2] +!!! test_array_splat:598 +[1, *foo, 2] +!!! test_array_splat:611 +[1, *foo] +!!! test_array_splat:622 +[*foo] +!!! test_array_symbols:695 +%i[foo bar] +!!! test_array_symbols_empty:732 +%i[] +!!! test_array_symbols_empty:740 +%I() +!!! test_array_symbols_interp:706 +%I[foo #{bar}] +!!! test_array_symbols_interp:721 +%I[foo#{bar}] +!!! test_array_words:647 +%w[foo bar] +!!! test_array_words_empty:682 +%w[] +!!! test_array_words_empty:689 +%W() +!!! test_array_words_interp:657 +%W[foo #{bar}] +!!! test_array_words_interp:671 +%W[foo #{bar}foo#@baz] +!!! test_asgn_cmd:1126 +foo = m foo +!!! test_asgn_cmd:1130 +foo = bar = m foo +!!! test_asgn_mrhs:1449 +foo = bar, 1 +!!! test_asgn_mrhs:1456 +foo = *bar +!!! test_asgn_mrhs:1461 +foo = baz, *bar +!!! test_back_ref:995 +$+ +!!! test_bang:3434 +!foo +!!! test_bang_cmd:3448 +!m foo +!!! test_begin_cmdarg:5526 +p begin 1.times do 1 end end +!!! test_beginless_erange_after_newline:935 +foo +...100 +!!! test_beginless_irange_after_newline:923 +foo +..100 +!!! test_beginless_range:903 +..100 +!!! test_beginless_range:912 +...100 +!!! test_blockarg:2187 +def f(&block); end +!!! test_break:5037 +break(foo) +!!! test_break:5051 +break foo +!!! test_break:5057 +break() +!!! test_break:5064 +break +!!! test_break_block:5072 +break fun foo do end +!!! test_bug_435:7067 +"#{-> foo {}}" +!!! test_bug_447:7046 +m [] do end +!!! test_bug_447:7055 +m [], 1 do end +!!! test_bug_452:7080 +td (1_500).toString(); td.num do; end +!!! test_bug_466:7096 +foo "#{(1+1).to_i}" do; end +!!! test_bug_473:7113 +m "#{[]}" +!!! test_bug_480:7124 +m "#{}#{()}" +!!! test_bug_481:7136 +m def x(); end; 1.tap do end +!!! test_bug_ascii_8bit_in_literal:5880 +# coding:utf-8 + "\xD0\xBF\xD1\x80\xD0\xBE\xD0\xB2\xD0\xB5\xD1\x80\xD0\xBA\xD0\xB0" +!!! test_bug_cmd_string_lookahead:5752 +desc "foo" do end +!!! test_bug_cmdarg:5549 +assert dogs +!!! test_bug_cmdarg:5554 +assert do: true +!!! test_bug_cmdarg:5562 +f x: -> do meth do end end +!!! test_bug_def_no_paren_eql_begin:5799 +def foo +=begin +=end +end +!!! test_bug_do_block_in_call_args:5762 +bar def foo; self.each do end end +!!! test_bug_do_block_in_cmdarg:5777 +tap (proc do end) +!!! test_bug_do_block_in_hash_brace:6569 +p :foo, {a: proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6587 +p :foo, {:a => proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6605 +p :foo, {"a": proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6623 +p :foo, {proc do end => proc do end, b: proc do end} +!!! test_bug_do_block_in_hash_brace:6643 +p :foo, {** proc do end, b: proc do end} +!!! test_bug_heredoc_do:5835 +f <<-TABLE do +TABLE +end +!!! test_bug_interp_single:5789 +"#{1}" +!!! test_bug_interp_single:5793 +%W"#{1}" +!!! test_bug_lambda_leakage:6550 +->(scope) {}; scope +!!! test_bug_regex_verification:6563 +/#)/x +!!! test_bug_rescue_empty_else:5813 +begin; rescue LoadError; else; end +!!! test_bug_while_not_parens_do:5805 +while not (true) do end +!!! test_case_cond:4844 +case; when foo; 'foo'; end +!!! test_case_cond_else:4857 +case; when foo; 'foo'; else 'bar'; end +!!! test_case_expr:4816 +case foo; when 'bar'; bar; end +!!! test_case_expr_else:4830 +case foo; when 'bar'; bar; else baz; end +!!! test_casgn_scoped:1192 +Bar::Foo = 10 +!!! test_casgn_toplevel:1181 +::Foo = 10 +!!! test_casgn_unscoped:1203 +Foo = 10 +!!! test_character:248 +?a +!!! test_class:1827 +class Foo; end +!!! test_class:1837 +class Foo end +!!! test_class_definition_in_while_cond:6870 +while class Foo; tap do end; end; break; end +!!! test_class_definition_in_while_cond:6882 +while class Foo a = tap do end; end; break; end +!!! test_class_definition_in_while_cond:6895 +while class << self; tap do end; end; break; end +!!! test_class_definition_in_while_cond:6907 +while class << self; a = tap do end; end; break; end +!!! test_class_super:1848 +class Foo < Bar; end +!!! test_class_super_label:1860 +class Foo < a:b; end +!!! test_comments_before_leading_dot__27:7750 +a # +# +.foo +!!! test_comments_before_leading_dot__27:7757 +a # + # +.foo +!!! test_comments_before_leading_dot__27:7764 +a # +# +&.foo +!!! test_comments_before_leading_dot__27:7771 +a # + # +&.foo +!!! test_complex:156 +42i +!!! test_complex:162 +42ri +!!! test_complex:168 +42.1i +!!! test_complex:174 +42.1ri +!!! test_cond_begin:4686 +if (bar); foo; end +!!! test_cond_begin_masgn:4695 +if (bar; a, b = foo); end +!!! test_cond_eflipflop:4758 +if foo...bar; end +!!! test_cond_eflipflop:4772 +!(foo...bar) +!!! test_cond_iflipflop:4735 +if foo..bar; end +!!! test_cond_iflipflop:4749 +!(foo..bar) +!!! test_cond_match_current_line:4781 +if /wat/; end +!!! test_cond_match_current_line:4801 +!/wat/ +!!! test_const_op_asgn:1536 +A += 1 +!!! test_const_op_asgn:1542 +::A += 1 +!!! test_const_op_asgn:1550 +B::A += 1 +!!! test_const_op_asgn:1558 +def x; self::A ||= 1; end +!!! test_const_op_asgn:1567 +def x; ::A ||= 1; end +!!! test_const_scoped:1020 +Bar::Foo +!!! test_const_toplevel:1011 +::Foo +!!! test_const_unscoped:1029 +Foo +!!! test_control_meta_escape_chars_in_regexp__since_31:10686 +/\c\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10692 +/\c\M-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10698 +/\C-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10704 +/\C-\M-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10710 +/\M-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10716 +/\M-\C-\xFF/ +!!! test_control_meta_escape_chars_in_regexp__since_31:10722 +/\M-\c\xFF/ +!!! test_cpath:1807 +module ::Foo; end +!!! test_cpath:1813 +module Bar::Foo; end +!!! test_cvar:973 +@@foo +!!! test_cvasgn:1106 +@@var = 10 +!!! test_dedenting_heredoc:297 +p <<~E +E +!!! test_dedenting_heredoc:304 +p <<~E + E +!!! test_dedenting_heredoc:311 +p <<~E + x +E +!!! test_dedenting_heredoc:318 +p <<~E + ư +E +!!! test_dedenting_heredoc:325 +p <<~E + x + y +E +!!! test_dedenting_heredoc:334 +p <<~E + x + y +E +!!! test_dedenting_heredoc:343 +p <<~E + x + y +E +!!! test_dedenting_heredoc:352 +p <<~E + x + y +E +!!! test_dedenting_heredoc:361 +p <<~E + x + y +E +!!! test_dedenting_heredoc:370 +p <<~E + x + +y +E +!!! test_dedenting_heredoc:380 +p <<~E + x + + y +E +!!! test_dedenting_heredoc:390 +p <<~E + x + \ y +E +!!! test_dedenting_heredoc:399 +p <<~E + x + \ y +E +!!! test_dedenting_heredoc:408 +p <<~"E" + x + #{foo} +E +!!! test_dedenting_heredoc:419 +p <<~`E` + x + #{foo} +E +!!! test_dedenting_heredoc:430 +p <<~"E" + x + #{" y"} +E +!!! test_dedenting_interpolating_heredoc_fake_line_continuation:459 +<<~'FOO' + baz\\ + qux +FOO +!!! test_dedenting_non_interpolating_heredoc_line_continuation:451 +<<~'FOO' + baz\ + qux +FOO +!!! test_def:1899 +def foo; end +!!! test_def:1907 +def String; end +!!! test_def:1911 +def String=; end +!!! test_def:1915 +def until; end +!!! test_def:1919 +def BEGIN; end +!!! test_def:1923 +def END; end +!!! test_defined:1058 +defined? foo +!!! test_defined:1064 +defined?(foo) +!!! test_defined:1072 +defined? @foo +!!! test_defs:1929 +def self.foo; end +!!! test_defs:1937 +def self::foo; end +!!! test_defs:1945 +def (foo).foo; end +!!! test_defs:1949 +def String.foo; end +!!! test_defs:1954 +def String::foo; end +!!! test_empty_stmt:60 +!!! test_endless_method:9786 +def foo() = 42 +!!! test_endless_method:9798 +def inc(x) = x + 1 +!!! test_endless_method:9811 +def obj.foo() = 42 +!!! test_endless_method:9823 +def obj.inc(x) = x + 1 +!!! test_endless_method_command_syntax:9880 +def foo = puts "Hello" +!!! test_endless_method_command_syntax:9892 +def foo() = puts "Hello" +!!! test_endless_method_command_syntax:9904 +def foo(x) = puts x +!!! test_endless_method_command_syntax:9917 +def obj.foo = puts "Hello" +!!! test_endless_method_command_syntax:9931 +def obj.foo() = puts "Hello" +!!! test_endless_method_command_syntax:9945 +def rescued(x) = raise "to be caught" rescue "instance #{x}" +!!! test_endless_method_command_syntax:9964 +def self.rescued(x) = raise "to be caught" rescue "class #{x}" +!!! test_endless_method_command_syntax:9985 +def obj.foo(x) = puts x +!!! test_endless_method_forwarded_args_legacy:9840 +def foo(...) = bar(...) +!!! test_endless_method_with_rescue_mod:9855 +def m() = 1 rescue 2 +!!! test_endless_method_with_rescue_mod:9866 +def self.m() = 1 rescue 2 +!!! test_endless_method_without_args:10404 +def foo = 42 +!!! test_endless_method_without_args:10412 +def foo = 42 rescue nil +!!! test_endless_method_without_args:10423 +def self.foo = 42 +!!! test_endless_method_without_args:10432 +def self.foo = 42 rescue nil +!!! test_ensure:5261 +begin; meth; ensure; bar; end +!!! test_ensure_empty:5274 +begin ensure end +!!! test_false:96 +false +!!! test_float:129 +1.33 +!!! test_float:134 +-1.33 +!!! test_for:5002 +for a in foo do p a; end +!!! test_for:5014 +for a in foo; p a; end +!!! test_for_mlhs:5023 +for a, b in foo; p a, b; end +!!! test_forward_arg:7899 +def foo(...); bar(...); end +!!! test_forward_arg_with_open_args:10745 +def foo ... +end +!!! test_forward_arg_with_open_args:10752 +def foo a, b = 1, ... +end +!!! test_forward_arg_with_open_args:10770 +def foo(a, ...) bar(...) end +!!! test_forward_arg_with_open_args:10781 +def foo a, ... + bar(...) +end +!!! test_forward_arg_with_open_args:10792 +def foo b = 1, ... + bar(...) +end +!!! test_forward_arg_with_open_args:10804 +def foo ...; bar(...); end +!!! test_forward_arg_with_open_args:10814 +def foo a, ...; bar(...); end +!!! test_forward_arg_with_open_args:10825 +def foo b = 1, ...; bar(...); end +!!! test_forward_arg_with_open_args:10837 +(def foo ... + bar(...) +end) +!!! test_forward_arg_with_open_args:10848 +(def foo ...; bar(...); end) +!!! test_forward_args_legacy:7863 +def foo(...); bar(...); end +!!! test_forward_args_legacy:7875 +def foo(...); super(...); end +!!! test_forward_args_legacy:7887 +def foo(...); end +!!! test_forwarded_argument_with_kwrestarg:10962 +def foo(argument, **); bar(argument, **); end +!!! test_forwarded_argument_with_restarg:10923 +def foo(argument, *); bar(argument, *); end +!!! test_forwarded_kwrestarg:10943 +def foo(**); bar(**); end +!!! test_forwarded_restarg:10905 +def foo(*); bar(*); end +!!! test_gvar:980 +$foo +!!! test_gvasgn:1116 +$var = 10 +!!! test_hash_empty:750 +{ } +!!! test_hash_hashrocket:759 +{ 1 => 2 } +!!! test_hash_hashrocket:768 +{ 1 => 2, :foo => "bar" } +!!! test_hash_kwsplat:821 +{ foo: 2, **bar } +!!! test_hash_label:776 +{ foo: 2 } +!!! test_hash_label_end:789 +{ 'foo': 2 } +!!! test_hash_label_end:802 +{ 'foo': 2, 'bar': {}} +!!! test_hash_label_end:810 +f(a ? "a":1) +!!! test_hash_pair_value_omission:10040 +{a:, b:} +!!! test_hash_pair_value_omission:10054 +{puts:} +!!! test_hash_pair_value_omission:10065 +{BAR:} +!!! test_heredoc:263 +<(**nil) {} +!!! test_kwoptarg:2124 +def f(foo: 1); end +!!! test_kwrestarg_named:2135 +def f(**foo); end +!!! test_kwrestarg_unnamed:2146 +def f(**); end +!!! test_lbrace_arg_after_command_args:7235 +let (:a) { m do; end } +!!! test_lparenarg_after_lvar__since_25:6679 +meth (-1.3).abs +!!! test_lparenarg_after_lvar__since_25:6688 +foo (-1.3).abs +!!! test_lvar:959 +foo +!!! test_lvar_injecting_match:3778 +/(?bar)/ =~ 'bar'; match +!!! test_lvasgn:1084 +var = 10; var +!!! test_masgn:1247 +foo, bar = 1, 2 +!!! test_masgn:1258 +(foo, bar) = 1, 2 +!!! test_masgn:1268 +foo, bar, baz = 1, 2 +!!! test_masgn_attr:1390 +self.a, self[1, 2] = foo +!!! test_masgn_attr:1403 +self::a, foo = foo +!!! test_masgn_attr:1411 +self.A, foo = foo +!!! test_masgn_cmd:1439 +foo, bar = m foo +!!! test_masgn_const:1421 +self::A, foo = foo +!!! test_masgn_const:1429 +::A, foo = foo +!!! test_masgn_nested:1365 +a, (b, c) = foo +!!! test_masgn_nested:1379 +((b, )) = foo +!!! test_masgn_splat:1279 +@foo, @@bar = *foo +!!! test_masgn_splat:1288 +a, b = *foo, bar +!!! test_masgn_splat:1296 +a, *b = bar +!!! test_masgn_splat:1302 +a, *b, c = bar +!!! test_masgn_splat:1313 +a, * = bar +!!! test_masgn_splat:1319 +a, *, c = bar +!!! test_masgn_splat:1330 +*b = bar +!!! test_masgn_splat:1336 +*b, c = bar +!!! test_masgn_splat:1346 +* = bar +!!! test_masgn_splat:1352 +*, c, d = bar +!!! test_method_definition_in_while_cond:6816 +while def foo; tap do end; end; break; end +!!! test_method_definition_in_while_cond:6828 +while def self.foo; tap do end; end; break; end +!!! test_method_definition_in_while_cond:6841 +while def foo a = tap do end; end; break; end +!!! test_method_definition_in_while_cond:6854 +while def self.foo a = tap do end; end; break; end +!!! test_module:1789 +module Foo; end +!!! test_multiple_pattern_matches:11086 +{a: 0} => a: +{a: 0} => a: +!!! test_multiple_pattern_matches:11102 +{a: 0} in a: +{a: 0} in a: +!!! test_newline_in_hash_argument:11035 +obj.set foo: +1 +!!! test_newline_in_hash_argument:11046 +obj.set "foo": +1 +!!! test_newline_in_hash_argument:11057 +case foo +in a: +0 +true +in "b": +0 +true +end +!!! test_next:5131 +next(foo) +!!! test_next:5145 +next foo +!!! test_next:5151 +next() +!!! test_next:5158 +next +!!! test_next_block:5166 +next fun foo do end +!!! test_nil:66 +nil +!!! test_nil_expression:73 +() +!!! test_nil_expression:80 +begin end +!!! test_non_lvar_injecting_match:3793 +/#{1}(?bar)/ =~ 'bar' +!!! test_not:3462 +not foo +!!! test_not:3468 +not(foo) +!!! test_not:3474 +not() +!!! test_not_cmd:3488 +not m foo +!!! test_not_masgn__24:4672 +!(a, b = foo) +!!! test_nth_ref:1002 +$10 +!!! test_numbered_args_after_27:7358 +m { _1 + _9 } +!!! test_numbered_args_after_27:7373 +m do _1 + _9 end +!!! test_numbered_args_after_27:7390 +-> { _1 + _9} +!!! test_numbered_args_after_27:7405 +-> do _1 + _9 end +!!! test_numparam_outside_block:7512 +class A; _1; end +!!! test_numparam_outside_block:7520 +module A; _1; end +!!! test_numparam_outside_block:7528 +class << foo; _1; end +!!! test_numparam_outside_block:7536 +def self.m; _1; end +!!! test_numparam_outside_block:7545 +_1 +!!! test_op_asgn:1606 +foo.a += 1 +!!! test_op_asgn:1616 +foo::a += 1 +!!! test_op_asgn:1622 +foo.A += 1 +!!! test_op_asgn_cmd:1630 +foo.a += m foo +!!! test_op_asgn_cmd:1636 +foo::a += m foo +!!! test_op_asgn_cmd:1642 +foo.A += m foo +!!! test_op_asgn_cmd:1654 +foo::A += m foo +!!! test_op_asgn_index:1664 +foo[0, 1] += 2 +!!! test_op_asgn_index_cmd:1678 +foo[0, 1] += m foo +!!! test_optarg:2074 +def f foo = 1; end +!!! test_optarg:2084 +def f(foo=1, bar=2); end +!!! test_or:4461 +foo or bar +!!! test_or:4467 +foo || bar +!!! test_or_asgn:1724 +foo.a ||= 1 +!!! test_or_asgn:1734 +foo[0, 1] ||= 2 +!!! test_parser_bug_272:6528 +a @b do |c|;end +!!! test_parser_bug_490:7151 +def m; class << self; class C; end; end; end +!!! test_parser_bug_490:7162 +def m; class << self; module M; end; end; end +!!! test_parser_bug_490:7173 +def m; class << self; A = nil; end; end +!!! test_parser_bug_507:7265 +m = -> *args do end +!!! test_parser_bug_518:7277 +class A < B +end +!!! test_parser_bug_525:7287 +m1 :k => m2 do; m3() do end; end +!!! test_parser_bug_604:7737 +m a + b do end +!!! test_parser_bug_640:443 +<<~FOO + baz\ + qux +FOO +!!! test_parser_bug_645:9774 +-> (arg={}) {} +!!! test_parser_bug_830:10630 +/\(/ +!!! test_parser_drops_truncated_parts_of_squiggly_heredoc:10446 +<<~HERE + #{} +HERE +!!! test_pattern_matching__FILE__LINE_literals:9473 + case [__FILE__, __LINE__ + 1, __ENCODING__] + in [__FILE__, __LINE__, __ENCODING__] + end +!!! test_pattern_matching_blank_else:9390 +case 1; in 2; 3; else; end +!!! test_pattern_matching_else:9376 +case 1; in 2; 3; else; 4; end +!!! test_pattern_matching_single_line:9540 +1 => [a]; a +!!! test_pattern_matching_single_line:9552 +1 in [a]; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9566 +[1, 2] => a, b; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9581 +{a: 1} => a:; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9596 +[1, 2] in a, b; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9611 +{a: 1} in a:; a +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9626 +{key: :value} in key: value; value +!!! test_pattern_matching_single_line_allowed_omission_of_parentheses:9643 +{key: :value} => key: value; value +!!! test_postexe:5486 +END { 1 } +!!! test_preexe:5467 +BEGIN { 1 } +!!! test_procarg0:2803 +m { |foo| } +!!! test_procarg0:2812 +m { |(foo, bar)| } +!!! test_range_endless:869 +1.. +!!! test_range_endless:877 +1... +!!! test_range_exclusive:861 +1...2 +!!! test_range_inclusive:853 +1..2 +!!! test_rational:142 +42r +!!! test_rational:148 +42.1r +!!! test_redo:5178 +redo +!!! test_regex_interp:551 +/foo#{bar}baz/ +!!! test_regex_plain:541 +/source/im +!!! test_resbody_list:5398 +begin; meth; rescue Exception; bar; end +!!! test_resbody_list_mrhs:5411 +begin; meth; rescue Exception, foo; bar; end +!!! test_resbody_list_var:5444 +begin; meth; rescue foo => ex; bar; end +!!! test_resbody_var:5426 +begin; meth; rescue => ex; bar; end +!!! test_resbody_var:5434 +begin; meth; rescue => @ex; bar; end +!!! test_rescue:5188 +begin; meth; rescue; foo; end +!!! test_rescue_else:5203 +begin; meth; rescue; foo; else; bar; end +!!! test_rescue_else_ensure:5302 +begin; meth; rescue; baz; else foo; ensure; bar end +!!! test_rescue_ensure:5286 +begin; meth; rescue; baz; ensure; bar; end +!!! test_rescue_in_lambda_block:6928 +-> do rescue; end +!!! test_rescue_mod:5319 +meth rescue bar +!!! test_rescue_mod_asgn:5331 +foo = meth rescue bar +!!! test_rescue_mod_masgn:5345 +foo, bar = meth rescue [1, 2] +!!! test_rescue_mod_op_assign:5365 +foo += meth rescue bar +!!! test_rescue_without_begin_end:5381 +meth do; foo; rescue; bar; end +!!! test_restarg_named:2094 +def f(*foo); end +!!! test_restarg_unnamed:2104 +def f(*); end +!!! test_retry:5457 +retry +!!! test_return:5084 +return(foo) +!!! test_return:5098 +return foo +!!! test_return:5104 +return() +!!! test_return:5111 +return +!!! test_return_block:5119 +return fun foo do end +!!! test_ruby_bug_10279:5905 +{a: if true then 42 end} +!!! test_ruby_bug_10653:5915 +true ? 1.tap do |n| p n end : 0 +!!! test_ruby_bug_10653:5945 +false ? raise {} : tap {} +!!! test_ruby_bug_10653:5958 +false ? raise do end : tap do end +!!! test_ruby_bug_11107:5973 +p ->() do a() do end end +!!! test_ruby_bug_11380:5985 +p -> { :hello }, a: 1 do end +!!! test_ruby_bug_11873:6353 +a b{c d}, "x" do end +!!! test_ruby_bug_11873:6367 +a b(c d), "x" do end +!!! test_ruby_bug_11873:6380 +a b{c(d)}, "x" do end +!!! test_ruby_bug_11873:6394 +a b(c(d)), "x" do end +!!! test_ruby_bug_11873:6407 +a b{c d}, /x/ do end +!!! test_ruby_bug_11873:6421 +a b(c d), /x/ do end +!!! test_ruby_bug_11873:6434 +a b{c(d)}, /x/ do end +!!! test_ruby_bug_11873:6448 +a b(c(d)), /x/ do end +!!! test_ruby_bug_11873:6461 +a b{c d}, /x/m do end +!!! test_ruby_bug_11873:6475 +a b(c d), /x/m do end +!!! test_ruby_bug_11873:6488 +a b{c(d)}, /x/m do end +!!! test_ruby_bug_11873:6502 +a b(c(d)), /x/m do end +!!! test_ruby_bug_11873_b:6050 +p p{p(p);p p}, tap do end +!!! test_ruby_bug_11989:6069 +p <<~"E" + x\n y +E +!!! test_ruby_bug_11990:6078 +p <<~E " y" + x +E +!!! test_ruby_bug_12073:6089 +a = 1; a b: 1 +!!! test_ruby_bug_12073:6102 +def foo raise; raise A::B, ''; end +!!! test_ruby_bug_12402:6116 +foo = raise(bar) rescue nil +!!! test_ruby_bug_12402:6127 +foo += raise(bar) rescue nil +!!! test_ruby_bug_12402:6139 +foo[0] += raise(bar) rescue nil +!!! test_ruby_bug_12402:6153 +foo.m += raise(bar) rescue nil +!!! test_ruby_bug_12402:6166 +foo::m += raise(bar) rescue nil +!!! test_ruby_bug_12402:6179 +foo.C += raise(bar) rescue nil +!!! test_ruby_bug_12402:6192 +foo::C ||= raise(bar) rescue nil +!!! test_ruby_bug_12402:6205 +foo = raise bar rescue nil +!!! test_ruby_bug_12402:6216 +foo += raise bar rescue nil +!!! test_ruby_bug_12402:6228 +foo[0] += raise bar rescue nil +!!! test_ruby_bug_12402:6242 +foo.m += raise bar rescue nil +!!! test_ruby_bug_12402:6255 +foo::m += raise bar rescue nil +!!! test_ruby_bug_12402:6268 +foo.C += raise bar rescue nil +!!! test_ruby_bug_12402:6281 +foo::C ||= raise bar rescue nil +!!! test_ruby_bug_12669:6296 +a = b = raise :x +!!! test_ruby_bug_12669:6305 +a += b = raise :x +!!! test_ruby_bug_12669:6314 +a = b += raise :x +!!! test_ruby_bug_12669:6323 +a += b += raise :x +!!! test_ruby_bug_12686:6334 +f (g rescue nil) +!!! test_ruby_bug_13547:7018 +meth[] {} +!!! test_ruby_bug_14690:7250 +let () { m(a) do; end } +!!! test_ruby_bug_15789:7622 +m ->(a = ->{_1}) {a} +!!! test_ruby_bug_15789:7636 +m ->(a: ->{_1}) {a} +!!! test_ruby_bug_9669:5889 +def a b: +return +end +!!! test_ruby_bug_9669:5895 +o = { +a: +1 +} +!!! test_sclass:1884 +class << foo; nil; end +!!! test_self:952 +self +!!! test_send_attr_asgn:3528 +foo.a = 1 +!!! test_send_attr_asgn:3536 +foo::a = 1 +!!! test_send_attr_asgn:3544 +foo.A = 1 +!!! test_send_attr_asgn:3552 +foo::A = 1 +!!! test_send_attr_asgn_conditional:3751 +a&.b = 1 +!!! test_send_binary_op:3308 +foo + 1 +!!! test_send_binary_op:3314 +foo - 1 +!!! test_send_binary_op:3318 +foo * 1 +!!! test_send_binary_op:3322 +foo / 1 +!!! test_send_binary_op:3326 +foo % 1 +!!! test_send_binary_op:3330 +foo ** 1 +!!! test_send_binary_op:3334 +foo | 1 +!!! test_send_binary_op:3338 +foo ^ 1 +!!! test_send_binary_op:3342 +foo & 1 +!!! test_send_binary_op:3346 +foo <=> 1 +!!! test_send_binary_op:3350 +foo < 1 +!!! test_send_binary_op:3354 +foo <= 1 +!!! test_send_binary_op:3358 +foo > 1 +!!! test_send_binary_op:3362 +foo >= 1 +!!! test_send_binary_op:3366 +foo == 1 +!!! test_send_binary_op:3376 +foo != 1 +!!! test_send_binary_op:3382 +foo === 1 +!!! test_send_binary_op:3386 +foo =~ 1 +!!! test_send_binary_op:3396 +foo !~ 1 +!!! test_send_binary_op:3402 +foo << 1 +!!! test_send_binary_op:3406 +foo >> 1 +!!! test_send_block_chain_cmd:3201 +meth 1 do end.fun bar +!!! test_send_block_chain_cmd:3212 +meth 1 do end.fun(bar) +!!! test_send_block_chain_cmd:3225 +meth 1 do end::fun bar +!!! test_send_block_chain_cmd:3236 +meth 1 do end::fun(bar) +!!! test_send_block_chain_cmd:3249 +meth 1 do end.fun bar do end +!!! test_send_block_chain_cmd:3261 +meth 1 do end.fun(bar) {} +!!! test_send_block_chain_cmd:3273 +meth 1 do end.fun {} +!!! test_send_block_conditional:3759 +foo&.bar {} +!!! test_send_call:3721 +foo.(1) +!!! test_send_call:3731 +foo::(1) +!!! test_send_conditional:3743 +a&.b +!!! test_send_index:3562 +foo[1, 2] +!!! test_send_index_asgn:3591 +foo[1, 2] = 3 +!!! test_send_index_asgn_legacy:3603 +foo[1, 2] = 3 +!!! test_send_index_cmd:3584 +foo[m bar] +!!! test_send_index_legacy:3573 +foo[1, 2] +!!! test_send_lambda:3615 +->{ } +!!! test_send_lambda:3625 +-> * { } +!!! test_send_lambda:3636 +-> do end +!!! test_send_lambda_args:3648 +->(a) { } +!!! test_send_lambda_args:3662 +-> (a) { } +!!! test_send_lambda_args_noparen:3686 +-> a: 1 { } +!!! test_send_lambda_args_noparen:3695 +-> a: { } +!!! test_send_lambda_args_shadow:3673 +->(a; foo, bar) { } +!!! test_send_lambda_legacy:3707 +->{ } +!!! test_send_op_asgn_conditional:3770 +a&.b &&= 1 +!!! test_send_plain:3105 +foo.fun +!!! test_send_plain:3112 +foo::fun +!!! test_send_plain:3119 +foo::Fun() +!!! test_send_plain_cmd:3128 +foo.fun bar +!!! test_send_plain_cmd:3135 +foo::fun bar +!!! test_send_plain_cmd:3142 +foo::Fun bar +!!! test_send_self:3044 +fun +!!! test_send_self:3050 +fun! +!!! test_send_self:3056 +fun(1) +!!! test_send_self_block:3066 +fun { } +!!! test_send_self_block:3070 +fun() { } +!!! test_send_self_block:3074 +fun(1) { } +!!! test_send_self_block:3078 +fun do end +!!! test_send_unary_op:3412 +-foo +!!! test_send_unary_op:3418 ++foo +!!! test_send_unary_op:3422 +~foo +!!! test_slash_newline_in_heredocs:7186 +<<~E + 1 \ + 2 + 3 +E +!!! test_slash_newline_in_heredocs:7194 +<<-E + 1 \ + 2 + 3 +E +!!! test_space_args_arg:4132 +fun (1) +!!! test_space_args_arg_block:4146 +fun (1) {} +!!! test_space_args_arg_block:4160 +foo.fun (1) {} +!!! test_space_args_arg_block:4176 +foo::fun (1) {} +!!! test_space_args_arg_call:4198 +fun (1).to_i +!!! test_space_args_arg_newline:4138 +fun (1 +) +!!! test_space_args_block:4430 +fun () {} +!!! test_space_args_cmd:4125 +fun (f bar) +!!! test_string___FILE__:241 +__FILE__ +!!! test_string_concat:226 +"foo#@a" "bar" +!!! test_string_dvar:215 +"#@a #@@a #$a" +!!! test_string_interp:200 +"foo#{bar}baz" +!!! test_string_plain:184 +'foobar' +!!! test_string_plain:191 +%q(foobar) +!!! test_super:3807 +super(foo) +!!! test_super:3815 +super foo +!!! test_super:3821 +super() +!!! test_super_block:3839 +super foo, bar do end +!!! test_super_block:3845 +super do end +!!! test_symbol_interp:484 +:"foo#{bar}baz" +!!! test_symbol_plain:469 +:foo +!!! test_symbol_plain:475 +:'foo' +!!! test_ternary:4605 +foo ? 1 : 2 +!!! test_ternary_ambiguous_symbol:4614 +t=1;(foo)?t:T +!!! test_trailing_forward_arg:8022 +def foo(a, b, ...); bar(a, 42, ...); end +!!! test_true:89 +true +!!! test_unary_num_pow_precedence:3505 ++2.0 ** 10 +!!! test_unary_num_pow_precedence:3512 +-2 ** 10 +!!! test_unary_num_pow_precedence:3519 +-2.0 ** 10 +!!! test_undef:2003 +undef foo, :bar, :"foo#{1}" +!!! test_unless:4529 +unless foo then bar; end +!!! test_unless:4537 +unless foo; bar; end +!!! test_unless_else:4573 +unless foo then bar; else baz; end +!!! test_unless_else:4582 +unless foo; bar; else baz; end +!!! test_unless_mod:4546 +bar unless foo +!!! test_until:4948 +until foo do meth end +!!! test_until:4955 +until foo; meth end +!!! test_until_mod:4963 +meth until foo +!!! test_until_post:4978 +begin meth end until foo +!!! test_var_and_asgn:1714 +a &&= 1 +!!! test_var_op_asgn:1498 +a += 1 +!!! test_var_op_asgn:1504 +@a |= 1 +!!! test_var_op_asgn:1510 +@@var |= 10 +!!! test_var_op_asgn:1514 +def a; @@var |= 10; end +!!! test_var_op_asgn_cmd:1521 +foo += m foo +!!! test_var_or_asgn:1706 +a ||= 1 +!!! test_when_multi:4895 +case foo; when 'bar', 'baz'; bar; end +!!! test_when_splat:4904 +case foo; when 1, *baz; bar; when *foo; end +!!! test_when_then:4883 +case foo; when 'bar' then bar; end +!!! test_while:4924 +while foo do meth end +!!! test_while:4932 +while foo; meth end +!!! test_while_mod:4941 +meth while foo +!!! test_while_post:4970 +begin meth end while foo +!!! test_xstring_interp:524 +`foo#{bar}baz` +!!! test_xstring_plain:515 +`foobar` +!!! test_yield:3855 +yield(foo) +!!! test_yield:3863 +yield foo +!!! test_yield:3869 +yield() +!!! test_yield:3877 +yield +!!! test_zsuper:3831 +super diff --git a/test/translation/parser_test.rb b/test/translation/parser_test.rb new file mode 100644 index 00000000..ad87d8c6 --- /dev/null +++ b/test/translation/parser_test.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +require_relative "../test_helper" +require "parser/current" + +Parser::Builders::Default.modernize + +module SyntaxTree + module Translation + class ParserTest < Minitest::Test + known_failures = [ + # I think this may be a bug in the parser gem's precedence calculation. + # Unary plus appears to be parsed as part of the number literal in + # CRuby, but parser is parsing it as a separate operator. + "test_unary_num_pow_precedence:3505", + + # Not much to be done about this. Basically, regular expressions with + # named capture groups that use the =~ operator inject local variables + # into the current scope. In the parser gem, it detects this and changes + # future references to that name to be a local variable instead of a + # potential method call. CRuby does not do this. + "test_lvar_injecting_match:3778", + + # This is failing because CRuby is not marking values captured in hash + # patterns as local variables, while the parser gem is. + "test_pattern_matching_hash:8971", + + # This is not actually allowed in the CRuby parser but the parser gem + # thinks it is allowed. + "test_pattern_matching_hash_with_string_keys:9016", + "test_pattern_matching_hash_with_string_keys:9027", + "test_pattern_matching_hash_with_string_keys:9038", + "test_pattern_matching_hash_with_string_keys:9060", + "test_pattern_matching_hash_with_string_keys:9071", + "test_pattern_matching_hash_with_string_keys:9082", + + # This happens with pattern matching where you're matching a literal + # value inside parentheses, which doesn't really do anything. Ripper + # doesn't capture that this value is inside a parentheses, so it's hard + # to translate properly. + "test_pattern_matching_expr_in_paren:9206", + + # These are also failing because of CRuby not marking values captured in + # hash patterns as local variables. + "test_pattern_matching_single_line_allowed_omission_of_parentheses:*", + + # I'm not even sure what this is testing, because the code is invalid in + # CRuby. + "test_control_meta_escape_chars_in_regexp__since_31:*", + ] + + todo_failures = [ + "test_dedenting_heredoc:334", + "test_dedenting_heredoc:390", + "test_dedenting_heredoc:399", + "test_slash_newline_in_heredocs:7194", + "test_parser_slash_slash_n_escaping_in_literals:*", + "test_cond_match_current_line:4801", + "test_forwarded_restarg:*", + "test_forwarded_kwrestarg:*", + "test_forwarded_argument_with_restarg:*", + "test_forwarded_argument_with_kwrestarg:*" + ] + + current_version = RUBY_VERSION.split(".")[0..1].join(".") + + if current_version <= "2.7" + # I'm not sure why this is failing on 2.7.0, but we'll turn it off for + # now until we have more time to investigate. + todo_failures.push( + "test_pattern_matching_hash:*", + "test_pattern_matching_single_line:9552" + ) + end + + if current_version <= "3.0" + # In < 3.0, there are some changes to the way the parser gem handles + # forwarded args. We should eventually support this, but for now we're + # going to mark them as todo. + todo_failures.push( + "test_forward_arg:*", + "test_forward_args_legacy:*", + "test_endless_method_forwarded_args_legacy:*", + "test_trailing_forward_arg:*", + "test_forward_arg_with_open_args:10770", + ) + end + + if current_version == "3.1" + # This test actually fails on 3.1.0, even though it's marked as being + # since 3.1. So we're going to skip this test on 3.1, but leave it in + # for other versions. + known_failures.push( + "test_multiple_pattern_matches:11086", + "test_multiple_pattern_matches:11102" + ) + end + + if current_version < "3.2" || RUBY_ENGINE == "truffleruby" + known_failures.push( + "test_if_while_after_class__since_32:11004", + "test_if_while_after_class__since_32:11014", + "test_newline_in_hash_argument:11057" + ) + end + + all_failures = known_failures + todo_failures + + File + .foreach(File.expand_path("parser.txt", __dir__), chomp: true) + .slice_before { |line| line.start_with?("!!!") } + .each do |(prefix, *lines)| + name = prefix[4..] + next if all_failures.any? { |pattern| File.fnmatch?(pattern, name) } + + define_method(name) { assert_parses("#{lines.join("\n")}\n") } + end + + private + + def assert_parses(source) + parser = ::Parser::CurrentRuby.default_parser + parser.diagnostics.consumer = ->(*) {} + + buffer = ::Parser::Source::Buffer.new("(string)", 1) + buffer.source = source + + expected = + begin + parser.parse(buffer) + rescue ::Parser::SyntaxError + # We can get a syntax error if we're parsing a fixture that was + # designed for a later Ruby version but we're running an earlier + # Ruby version. In this case we can just return early from the test. + end + + return if expected.nil? + node = SyntaxTree.parse(source) + assert_equal expected, SyntaxTree::Translation.to_parser(node, buffer) + end + end + end +end + +if ENV["PARSER_LOCATION"] + # Modify the source map == check so that it doesn't check against the node + # itself so we don't get into a recursive loop. + Parser::Source::Map.prepend( + Module.new do + def ==(other) + self.class == other.class && + (instance_variables - %i[@node]).map do |ivar| + instance_variable_get(ivar) == other.instance_variable_get(ivar) + end.reduce(:&) + end + end + ) + + # Next, ensure that we're comparing the nodes and also comparing the source + # ranges so that we're getting all of the necessary information. + Parser::AST::Node.prepend( + Module.new do + def ==(other) + super && (location == other.location) + end + end + ) +end diff --git a/test/visitor_test.rb b/test/visitor_test.rb index 74f3df75..d9637df0 100644 --- a/test/visitor_test.rb +++ b/test/visitor_test.rb @@ -30,13 +30,15 @@ def initialize @visited_nodes = [] end - visit_method def visit_class(node) - @visited_nodes << node.constant.constant.value - super - end + visit_methods do + def visit_class(node) + @visited_nodes << node.constant.constant.value + super + end - visit_method def visit_def(node) - @visited_nodes << node.name.value + def visit_def(node) + @visited_nodes << node.name.value + end end end @@ -53,5 +55,19 @@ def test_visit_method_correction assert_match(/visit_binary/, message) end end + + class VisitMethodsTestVisitor < BasicVisitor + end + + def test_visit_methods + VisitMethodsTestVisitor.visit_methods do + assert_raises(BasicVisitor::VisitMethodError) do + # In reality, this would be a method defined using the def keyword, + # but we're using method_added here to trigger the checker so that we + # aren't defining methods dynamically in the test suite. + VisitMethodsTestVisitor.method_added(:visit_foo) + end + end + end end end diff --git a/test/visitor_with_environment_test.rb b/test/visitor_with_environment_test.rb deleted file mode 100644 index cc4007fe..00000000 --- a/test/visitor_with_environment_test.rb +++ /dev/null @@ -1,659 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module SyntaxTree - class VisitorWithEnvironmentTest < Minitest::Test - class Collector < Visitor - include WithEnvironment - - attr_reader :variables, :arguments - - def initialize - @variables = {} - @arguments = {} - end - - def visit_ident(node) - local = current_environment.find_local(node.value) - return unless local - - value = node.value.delete_suffix(":") - - case local.type - when :argument - @arguments[value] = local - when :variable - @variables[value] = local - end - end - - def visit_label(node) - value = node.value.delete_suffix(":") - local = current_environment.find_local(value) - return unless local - - @arguments[value] = node if local.type == :argument - end - end - - def test_collecting_simple_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = 1 - a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_aref_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = [] - a[1] - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_multi_assign_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a, b = [1, 2] - puts a - puts b - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(2, visitor.variables.length) - - variable_a = visitor.variables["a"] - assert_equal(1, variable_a.definitions.length) - assert_equal(1, variable_a.usages.length) - - assert_equal(2, variable_a.definitions[0].start_line) - assert_equal(3, variable_a.usages[0].start_line) - - variable_b = visitor.variables["b"] - assert_equal(1, variable_b.definitions.length) - assert_equal(1, variable_b.usages.length) - - assert_equal(2, variable_b.definitions[0].start_line) - assert_equal(4, variable_b.usages[0].start_line) - end - - def test_collecting_pattern_matching_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - case [1, 2] - in Integer => a, Integer - puts a - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - # There are two occurrences, one on line 3 for pinning and one on line 4 - # for reference - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - - # Assignment a - assert_equal(3, variable.definitions[0].start_line) - assert_equal(4, variable.usages[0].start_line) - end - - def test_collecting_pinned_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = 18 - case [1, 2] - in ^a, *rest - puts a - puts rest - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(2, visitor.variables.length) - - variable_a = visitor.variables["a"] - assert_equal(2, variable_a.definitions.length) - assert_equal(1, variable_a.usages.length) - - assert_equal(2, variable_a.definitions[0].start_line) - assert_equal(4, variable_a.definitions[1].start_line) - assert_equal(5, variable_a.usages[0].start_line) - - variable_rest = visitor.variables["rest"] - assert_equal(1, variable_rest.definitions.length) - assert_equal(4, variable_rest.definitions[0].start_line) - - # Rest is considered a vcall by the parser instead of a var_ref - # assert_equal(1, variable_rest.usages.length) - # assert_equal(6, variable_rest.usages[0].start_line) - end - - if RUBY_VERSION >= "3.1" - def test_collecting_one_line_pattern_matching_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - [1] => a - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_endless_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) = puts a - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(1, argument.usages[0].start_line) - end - end - - def test_collecting_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_singleton_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def self.foo(a) - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_method_arguments_all_types - tree = SyntaxTree.parse(<<~RUBY) - def foo(a, b = 1, *c, d, e: 1, **f, &block) - puts a - puts b - puts c - puts d - puts e - puts f - block.call - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(7, visitor.arguments.length) - - argument_a = visitor.arguments["a"] - assert_equal(1, argument_a.definitions.length) - assert_equal(1, argument_a.usages.length) - assert_equal(1, argument_a.definitions[0].start_line) - assert_equal(2, argument_a.usages[0].start_line) - - argument_b = visitor.arguments["b"] - assert_equal(1, argument_b.definitions.length) - assert_equal(1, argument_b.usages.length) - assert_equal(1, argument_b.definitions[0].start_line) - assert_equal(3, argument_b.usages[0].start_line) - - argument_c = visitor.arguments["c"] - assert_equal(1, argument_c.definitions.length) - assert_equal(1, argument_c.usages.length) - assert_equal(1, argument_c.definitions[0].start_line) - assert_equal(4, argument_c.usages[0].start_line) - - argument_d = visitor.arguments["d"] - assert_equal(1, argument_d.definitions.length) - assert_equal(1, argument_d.usages.length) - assert_equal(1, argument_d.definitions[0].start_line) - assert_equal(5, argument_d.usages[0].start_line) - - argument_e = visitor.arguments["e"] - assert_equal(1, argument_e.definitions.length) - assert_equal(1, argument_e.usages.length) - assert_equal(1, argument_e.definitions[0].start_line) - assert_equal(6, argument_e.usages[0].start_line) - - argument_f = visitor.arguments["f"] - assert_equal(1, argument_f.definitions.length) - assert_equal(1, argument_f.usages.length) - assert_equal(1, argument_f.definitions[0].start_line) - assert_equal(7, argument_f.usages[0].start_line) - - argument_block = visitor.arguments["block"] - assert_equal(1, argument_block.definitions.length) - assert_equal(1, argument_block.usages.length) - assert_equal(1, argument_block.definitions[0].start_line) - assert_equal(8, argument_block.usages[0].start_line) - end - - def test_collecting_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - [].each do |i| - puts i - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(2, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - end - - def test_collecting_one_line_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - [].each { |i| puts i } - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(2, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_shadowed_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - i = "something" - - [].each do |i| - puts i - end - - i - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(4, argument.definitions[0].start_line) - assert_equal(5, argument.usages[0].start_line) - - variable = visitor.variables["i"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - assert_equal(2, variable.definitions[0].start_line) - assert_equal(8, variable.usages[0].start_line) - end - - def test_collecting_shadowed_local_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) - puts a - a = 123 - a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - # All occurrences are considered arguments, despite overriding the - # argument value - assert_equal(1, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["a"] - assert_equal(2, argument.definitions.length) - assert_equal(2, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.definitions[1].start_line) - assert_equal(2, argument.usages[0].start_line) - assert_equal(4, argument.usages[1].start_line) - end - - def test_variables_in_the_top_level - tree = SyntaxTree.parse(<<~RUBY) - a = 123 - a - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_field - tree = SyntaxTree.parse(<<~RUBY) - object = {} - object["name"] = "something" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_on_a_method_call - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object.attributes["name"] = "something" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_with_two_accesses - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object["first"]["second"] ||= [] - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_on_a_method_call_with_arguments - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object.instance_variable_get(:@attributes)[:something] = :other_thing - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_double_aref_on_method_call - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object["attributes"].find { |a| a["field"] == "expected" }["value"] = "changed" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(2, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_nested_arguments - tree = SyntaxTree.parse(<<~RUBY) - [[1, [2, 3]]].each do |one, (two, three)| - one - two - three - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(3, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["one"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - - argument = visitor.arguments["two"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - - argument = visitor.arguments["three"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(4, argument.usages[0].start_line) - end - - def test_double_nested_arguments - tree = SyntaxTree.parse(<<~RUBY) - [[1, [2, 3]]].each do |one, (two, (three, four))| - one - two - three - four - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(4, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["one"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - - argument = visitor.arguments["two"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - - argument = visitor.arguments["three"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(4, argument.usages[0].start_line) - - argument = visitor.arguments["four"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(5, argument.usages[0].start_line) - end - - class Resolver < Visitor - include WithEnvironment - - attr_reader :locals - - def initialize - @locals = [] - end - - def visit_assign(node) - level = 0 - environment = current_environment - level += 1 until (environment = environment.parent).nil? - - locals << [node.target.value.value, level] - super - end - end - - def test_class - source = <<~RUBY - module Level0 - level0 = 0 - - module Level1 - level1 = 1 - - class Level2 - level2 = 2 - end - end - end - RUBY - - visitor = Resolver.new - SyntaxTree.parse(source).accept(visitor) - - assert_equal [["level0", 0], ["level1", 1], ["level2", 2]], visitor.locals - end - end -end diff --git a/test/with_scope_test.rb b/test/with_scope_test.rb new file mode 100644 index 00000000..9675e811 --- /dev/null +++ b/test/with_scope_test.rb @@ -0,0 +1,457 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module SyntaxTree + class WithScopeTest < Minitest::Test + class Collector < Visitor + prepend WithScope + + attr_reader :arguments, :variables + + def initialize + @arguments = {} + @variables = {} + end + + def self.collect(source) + new.tap { SyntaxTree.parse(source).accept(_1) } + end + + visit_methods do + def visit_ident(node) + value = node.value.delete_suffix(":") + local = current_scope.find_local(node.value) + + case local&.type + when :argument + arguments[[current_scope.id, value]] = local + when :variable + variables[[current_scope.id, value]] = local + end + end + + def visit_label(node) + value = node.value.delete_suffix(":") + local = current_scope.find_local(value) + + if local&.type == :argument + arguments[[current_scope.id, value]] = node + end + end + end + end + + def test_collecting_simple_variables + collector = Collector.collect(<<~RUBY) + def foo + a = 1 + a + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_aref_variables + collector = Collector.collect(<<~RUBY) + def foo + a = [] + a[1] + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_multi_assign_variables + collector = Collector.collect(<<~RUBY) + def foo + a, b = [1, 2] + puts a + puts b + end + RUBY + + assert_equal(2, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + assert_variable(collector, "b", definitions: [2], usages: [4]) + end + + def test_collecting_pattern_matching_variables + collector = Collector.collect(<<~RUBY) + def foo + case [1, 2] + in Integer => a, Integer + puts a + end + end + RUBY + + # There are two occurrences, one on line 3 for pinning and one on line 4 + # for reference + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [3], usages: [4]) + end + + def test_collecting_pinned_variables + collector = Collector.collect(<<~RUBY) + def foo + a = 18 + case [1, 2] + in ^a, *rest + puts a + puts rest + end + end + RUBY + + assert_equal(2, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [4, 5]) + assert_variable(collector, "rest", definitions: [4]) + + # Rest is considered a vcall by the parser instead of a var_ref + # assert_equal(1, variable_rest.usages.length) + # assert_equal(6, variable_rest.usages[0].start_line) + end + + if RUBY_VERSION >= "3.1" + def test_collecting_one_line_pattern_matching_variables + collector = Collector.collect(<<~RUBY) + def foo + [1] => a + puts a + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_endless_method_arguments + collector = Collector.collect(<<~RUBY) + def foo(a) = puts a + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [1]) + end + end + + def test_collecting_method_arguments + collector = Collector.collect(<<~RUBY) + def foo(a) + puts a + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + end + + def test_collecting_singleton_method_arguments + collector = Collector.collect(<<~RUBY) + def self.foo(a) + puts a + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + end + + def test_collecting_method_arguments_all_types + collector = Collector.collect(<<~RUBY) + def foo(a, b = 1, *c, d, e: 1, **f, &block) + puts a + puts b + puts c + puts d + puts e + puts f + block.call + end + RUBY + + assert_equal(7, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + assert_argument(collector, "b", definitions: [1], usages: [3]) + assert_argument(collector, "c", definitions: [1], usages: [4]) + assert_argument(collector, "d", definitions: [1], usages: [5]) + assert_argument(collector, "e", definitions: [1], usages: [6]) + assert_argument(collector, "f", definitions: [1], usages: [7]) + assert_argument(collector, "block", definitions: [1], usages: [8]) + end + + def test_collecting_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + [].each do |i| + puts i + end + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [2], usages: [3]) + end + + def test_collecting_one_line_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + [].each { |i| puts i } + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [2], usages: [2]) + end + + def test_collecting_shadowed_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + i = "something" + + [].each do |i| + puts i + end + + i + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [4], usages: [5]) + + assert_equal(1, collector.variables.length) + assert_variable(collector, "i", definitions: [2], usages: [8]) + end + + def test_collecting_shadowed_local_variables + collector = Collector.collect(<<~RUBY) + def foo(a) + puts a + a = 123 + a + end + RUBY + + # All occurrences are considered arguments, despite overriding the + # argument value + assert_equal(1, collector.arguments.length) + assert_equal(0, collector.variables.length) + assert_argument(collector, "a", definitions: [1, 3], usages: [2, 4]) + end + + def test_variables_in_the_top_level + collector = Collector.collect(<<~RUBY) + a = 123 + a + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [1], usages: [2]) + end + + def test_aref_field + collector = Collector.collect(<<~RUBY) + object = {} + object["name"] = "something" + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_on_a_method_call + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object.attributes["name"] = "something" + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_with_two_accesses + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object["first"]["second"] ||= [] + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_on_a_method_call_with_arguments + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object.instance_variable_get(:@attributes)[:something] = :other_thing + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_double_aref_on_method_call + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object["attributes"].find { |a| a["field"] == "expected" }["value"] = "changed" + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [2], usages: [2]) + + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_nested_arguments + collector = Collector.collect(<<~RUBY) + [[1, [2, 3]]].each do |one, (two, three)| + one + two + three + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_equal(0, collector.variables.length) + + assert_argument(collector, "one", definitions: [1], usages: [2]) + assert_argument(collector, "two", definitions: [1], usages: [3]) + assert_argument(collector, "three", definitions: [1], usages: [4]) + end + + def test_double_nested_arguments + collector = Collector.collect(<<~RUBY) + [[1, [2, 3]]].each do |one, (two, (three, four))| + one + two + three + four + end + RUBY + + assert_equal(4, collector.arguments.length) + assert_equal(0, collector.variables.length) + + assert_argument(collector, "one", definitions: [1], usages: [2]) + assert_argument(collector, "two", definitions: [1], usages: [3]) + assert_argument(collector, "three", definitions: [1], usages: [4]) + assert_argument(collector, "four", definitions: [1], usages: [5]) + end + + class Resolver < Visitor + prepend WithScope + + attr_reader :locals + + def initialize + @locals = [] + end + + visit_methods do + def visit_assign(node) + super.tap do + level = 0 + name = node.target.value.value + + scope = current_scope + while !scope.locals.key?(name) && !scope.parent.nil? + level += 1 + scope = scope.parent + end + + locals << [name, level] + end + end + end + end + + def test_resolver + source = <<~RUBY + module Level0 + level0 = 0 + + class Level1 + level1 = 1 + + def level2 + level2 = 2 + + tap do |level3| + level2 = 2 + level3 = 3 + + tap do |level4| + level2 = 2 + level4 = 4 + end + end + end + end + end + RUBY + + resolver = Resolver.new + SyntaxTree.parse(source).accept(resolver) + + expected = [ + ["level0", 0], + ["level1", 0], + ["level2", 0], + ["level2", 1], + ["level3", 0], + ["level2", 2], + ["level4", 0] + ] + + assert_equal expected, resolver.locals + end + + private + + def assert_collected(field, name, definitions: [], usages: []) + keys = field.keys.select { |key| key[1] == name } + assert_equal(1, keys.length) + + variable = field[keys.first] + + assert_equal(definitions.length, variable.definitions.length) + definitions.each_with_index do |definition, index| + assert_equal(definition, variable.definitions[index].start_line) + end + + assert_equal(usages.length, variable.usages.length) + usages.each_with_index do |usage, index| + assert_equal(usage, variable.usages[index].start_line) + end + end + + def assert_argument(collector, name, definitions: [], usages: []) + assert_collected( + collector.arguments, + name, + definitions: definitions, + usages: usages + ) + end + + def assert_variable(collector, name, definitions: [], usages: []) + assert_collected( + collector.variables, + name, + definitions: definitions, + usages: usages + ) + end + end +end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index e3995435..78622434 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -288,41 +288,188 @@ def value end end - instructions = - YARV.constants.map { YARV.const_get(_1) } + - YARV::Legacy.constants.map { YARV::Legacy.const_get(_1) } - - [ - YARV::Assembler, - YARV::Bf, - YARV::CallData, - YARV::Compiler, - YARV::Decompiler, - YARV::Disassembler, - YARV::InstructionSequence, - YARV::Legacy, - YARV::LocalTable, - YARV::VM - ] + ObjectSpace.each_object(YARV::Instruction.singleton_class) do |instruction| + next if instruction == YARV::Instruction - interface = %i[ - disasm - to_a - deconstruct_keys - length - pops - pushes - canonical - call - == - ] - - instructions.each do |instruction| define_method("test_instruction_interface_#{instruction.name}") do - instance_methods = instruction.instance_methods(false) - assert_empty(interface - instance_methods) + methods = instruction.instance_methods(false) + assert_empty(%i[disasm to_a deconstruct_keys call ==] - methods) end end + def test_cfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + + assert_equal(<<~DISASM, cfg.disasm) + == cfg: #@:1 (1,0)-(1,0)> + block_0 + 0000 putobject 100 + 0002 putobject 14 + 0004 putobject_INT2FIX_0_ + 0005 opt_lt + 0007 branchunless 13 + == to: block_13, block_9 + block_9 + == from: block_0 + 0009 putobject -1 + 0011 jump 14 + == to: block_14 + block_13 + == from: block_0 + 0013 putobject_INT2FIX_1_ + == to: block_14 + block_14 + == from: block_9, block_13 + 0014 opt_plus + 0016 leave + == to: leaves + DISASM + end + + def test_dfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + + assert_equal(<<~DISASM, dfg.disasm) + == dfg: #@:1 (1,0)-(1,0)> + block_0 + 0000 putobject 100 # out: out_0 + 0002 putobject 14 # out: 5 + 0004 putobject_INT2FIX_0_ # out: 5 + 0005 opt_lt # in: 2, 4; out: 7 + 0007 branchunless 13 # in: 5 + == to: block_13, block_9 + == out: 0 + block_9 + == from: block_0 + == in: pass_0 + 0009 putobject -1 # out: out_0 + 0011 jump 14 + == to: block_14 + == out: pass_0, 9 + block_13 + == from: block_0 + == in: pass_0 + 0013 putobject_INT2FIX_1_ # out: out_0 + == to: block_14 + == out: pass_0, 13 + block_14 + == from: block_9, block_13 + == in: in_0, in_1 + 0014 opt_plus # in: in_0, in_1; out: 16 + 0016 leave # in: 14 + == to: leaves + DISASM + end + + def test_son + iseq = RubyVM::InstructionSequence.compile("(14 < 0 ? -1 : +1) + 100") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) + + assert_equal(<<~MERMAID, son.to_mermaid) + flowchart TD + node_0("0000 putobject 14") + node_2("0002 putobject_INT2FIX_0_") + node_3("0003 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") + node_5("0005 branchunless 0011") + node_7("0007 putobject -1") + node_11("0011 putobject_INT2FIX_1_") + node_12("0012 putobject 100") + node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") + node_16("0016 leave") + node_1000("1000 ψ") + node_1001("1001 φ") + node_0 -- "0" --> node_3 + node_2 -- "1" --> node_3 + node_3 --> node_5 + node_3 -- "0" --> node_5 + node_5 -- "branch0" --> node_11 + node_5 -- "fallthrough" --> node_1000 + node_7 -- "0009" --> node_1001 + node_11 -- "branch0" --> node_1000 + node_11 -- "0011" --> node_1001 + node_12 -- "1" --> node_14 + node_14 --> node_16 + node_14 -- "0" --> node_16 + node_1000 --> node_14 + node_1001 -.-> node_1000 + node_1001 -- "0" --> node_14 + linkStyle 0 stroke:green + linkStyle 1 stroke:green + linkStyle 2 stroke:red + linkStyle 3 stroke:green + linkStyle 4 stroke:red + linkStyle 5 stroke:red + linkStyle 6 stroke:green + linkStyle 7 stroke:red + linkStyle 8 stroke:green + linkStyle 9 stroke:green + linkStyle 10 stroke:red + linkStyle 11 stroke:green + linkStyle 12 stroke:red + linkStyle 14 stroke:green + MERMAID + end + + def test_son_indirect_basic_block_argument + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) + + assert_equal(<<~MERMAID, son.to_mermaid) + flowchart TD + node_0("0000 putobject 100") + node_2("0002 putobject 14") + node_4("0004 putobject_INT2FIX_0_") + node_5("0005 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") + node_7("0007 branchunless 0013") + node_9("0009 putobject -1") + node_13("0013 putobject_INT2FIX_1_") + node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") + node_16("0016 leave") + node_1002("1002 ψ") + node_1004("1004 φ") + node_0 -- "0" --> node_14 + node_2 -- "0" --> node_5 + node_4 -- "1" --> node_5 + node_5 --> node_7 + node_5 -- "0" --> node_7 + node_7 -- "branch0" --> node_13 + node_7 -- "fallthrough" --> node_1002 + node_9 -- "0011" --> node_1004 + node_13 -- "branch0" --> node_1002 + node_13 -- "0013" --> node_1004 + node_14 --> node_16 + node_14 -- "0" --> node_16 + node_1002 --> node_14 + node_1004 -.-> node_1002 + node_1004 -- "1" --> node_14 + linkStyle 0 stroke:green + linkStyle 1 stroke:green + linkStyle 2 stroke:green + linkStyle 3 stroke:red + linkStyle 4 stroke:green + linkStyle 5 stroke:red + linkStyle 6 stroke:red + linkStyle 7 stroke:green + linkStyle 8 stroke:red + linkStyle 9 stroke:green + linkStyle 10 stroke:red + linkStyle 11 stroke:green + linkStyle 12 stroke:red + linkStyle 14 stroke:green + MERMAID + end + private def assert_decompiles(expected, source)