Skip to content

Parser location #306

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/syntax_tree/formatter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def format(node, stackable: true)
# going to just print out the node as it was seen in the source.
doc =
if last_leading&.ignore?
range = source[node.location.start_char...node.location.end_char]
range = source[node.start_char...node.end_char]
first = true

range.each_line(chomp: true) do |line|
Expand Down
8 changes: 8 additions & 0 deletions lib/syntax_tree/node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ def format(q)
raise NotImplementedError
end

def start_char
location.start_char
end

def end_char
location.end_char
end

def pretty_print(q)
accept(Visitor::PrettyPrintVisitor.new(q))
end
Expand Down
211 changes: 149 additions & 62 deletions lib/syntax_tree/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,37 @@ def find_token(type)
tokens[index] if index
end

def find_token_between(type, left, right)
bounds = left.location.end_char...right.location.start_char
index =
tokens.rindex do |token|
char = token.location.start_char
break if char < bounds.begin

token.is_a?(type) && bounds.cover?(char)
end

tokens[index] if index
end

def find_keyword(name)
index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) }
tokens[index] if index
end

def find_keyword_between(name, left, right)
bounds = left.end_char...right.start_char
index =
tokens.rindex do |token|
char = token.location.start_char
break if char < bounds.begin

token.is_a?(Kw) && (token.name == name) && bounds.cover?(char)
end

tokens[index] if index
end

def find_operator(name)
index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) }
tokens[index] if index
Expand Down Expand Up @@ -645,7 +671,7 @@ def visit_var_ref(node)
end

def self.visit(node, tokens)
start_char = node.location.start_char
start_char = node.start_char
allocated = []

tokens.reverse_each do |token|
Expand Down Expand Up @@ -874,13 +900,34 @@ def on_binary(left, operator, right)
# on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar
def on_block_var(params, locals)
index =
tokens.rindex do |node|
node.is_a?(Op) && %w[| ||].include?(node.value) &&
node.location.start_char < params.location.start_char
end
tokens.rindex { |node| node.is_a?(Op) && %w[| ||].include?(node.value) }

ending = tokens.delete_at(index)
beginning = ending.value == "||" ? ending : consume_operator(:|)

# If there are no parameters, then we didn't have anything to base the
# location information of off. Now that we have an opening of the
# block, we can correct this.
if params.empty?
start_line = params.location.start_line
start_char =
(
if beginning.value == "||"
beginning.location.start_char
else
find_next_statement_start(beginning.location.end_char)
end
)

beginning = tokens[index]
ending = tokens[-1]
location =
Location.fixed(
line: start_line,
char: start_char,
column: start_char - line_counts[start_line - 1].start
)

params = params.copy(location: location)
end

BlockVar.new(
params: params,
Expand Down Expand Up @@ -1760,21 +1807,19 @@ def on_for(index, collection, statements)
in_keyword = consume_keyword(:in)
ending = consume_keyword(:end)

# Consume the do keyword if it exists so that it doesn't get confused for
# some other block
keyword = find_keyword(:do)
if keyword &&
keyword.location.start_char > collection.location.end_char &&
keyword.location.end_char < ending.location.start_char
tokens.delete(keyword)
end
delimiter =
find_keyword_between(:do, collection, ending) ||
find_token_between(Semicolon, collection, ending)

tokens.delete(delimiter) if delimiter

start_char =
find_next_statement_start((keyword || collection).location.end_char)
find_next_statement_start((delimiter || collection).location.end_char)

statements.bind(
start_char,
start_char -
line_counts[(keyword || collection).location.end_line - 1].start,
line_counts[(delimiter || collection).location.end_line - 1].start,
ending.location.start_char,
ending.location.start_column
)
Expand Down Expand Up @@ -1984,7 +2029,12 @@ def on_if(predicate, statements, consequent)
beginning = consume_keyword(:if)
ending = consequent || consume_keyword(:end)

start_char = find_next_statement_start(predicate.location.end_char)
if (keyword = find_keyword_between(:then, predicate, ending))
tokens.delete(keyword)
end

start_char =
find_next_statement_start((keyword || predicate).location.end_char)
statements.bind(
start_char,
start_char - line_counts[predicate.location.end_line - 1].start,
Expand Down Expand Up @@ -2068,7 +2118,8 @@ def on_in(pattern, statements, consequent)
statements_start = token
end

start_char = find_next_statement_start(statements_start.location.end_char)
start_char =
find_next_statement_start((token || statements_start).location.end_char)
statements.bind(
start_char,
start_char -
Expand Down Expand Up @@ -2194,12 +2245,19 @@ def on_lambda(params, statements)
token.location.start_char > beginning.location.start_char
end

if braces
opening = consume_token(TLamBeg)
closing = consume_token(RBrace)
else
opening = consume_keyword(:do)
closing = consume_keyword(:end)
end

# We need to do some special mapping here. Since ripper doesn't support
# capturing lambda var until 3.2, we need to normalize all of that here.
# capturing lambda vars, we need to normalize all of that here.
params =
case params
when Paren
# In this case we've gotten to the <3.2 parentheses wrapping a set of
if params.is_a?(Paren)
# In this case we've gotten to the parentheses wrapping a set of
# parameters case. Here we need to manually scan for lambda locals.
range = (params.location.start_char + 1)...params.location.end_char
locals = lambda_locals(source[range])
Expand All @@ -2221,25 +2279,28 @@ def on_lambda(params, statements)

node.comments.concat(params.comments)
node
when Params
# In this case we've gotten to the <3.2 plain set of parameters. In
# this case there cannot be lambda locals, so we will wrap the
# parameters into a lambda var that has no locals.
else
# If there are no parameters, then we didn't have anything to base the
# location information of off. Now that we have an opening of the
# block, we can correct this.
if params.empty?
opening_location = opening.location
location =
Location.fixed(
line: opening_location.start_line,
char: opening_location.start_char,
column: opening_location.start_column
)

params = params.copy(location: location)
end

# In this case we've gotten to the plain set of parameters. In this
# case there cannot be lambda locals, so we will wrap the parameters
# into a lambda var that has no locals.
LambdaVar.new(params: params, locals: [], location: params.location)
when LambdaVar
# In this case we've gotten to 3.2+ lambda var. In this case we don't
# need to do anything and can just the value as given.
params
end

if braces
opening = consume_token(TLamBeg)
closing = consume_token(RBrace)
else
opening = consume_keyword(:do)
closing = consume_keyword(:end)
end

start_char = find_next_statement_start(opening.location.end_char)
statements.bind(
start_char,
Expand Down Expand Up @@ -3134,7 +3195,7 @@ def on_rescue(exceptions, variable, statements, consequent)
exceptions = exceptions[0] if exceptions.is_a?(Array)

last_node = variable || exceptions || keyword
start_char = find_next_statement_start(last_node.location.end_char)
start_char = find_next_statement_start(last_node.end_char)
statements.bind(
start_char,
start_char - line_counts[last_node.location.start_line - 1].start,
Expand All @@ -3156,7 +3217,7 @@ def on_rescue(exceptions, variable, statements, consequent)
start_char: keyword.location.end_char + 1,
start_column: keyword.location.end_column + 1,
end_line: last_node.location.end_line,
end_char: last_node.location.end_char,
end_char: last_node.end_char,
end_column: last_node.location.end_column
)
)
Expand Down Expand Up @@ -3267,9 +3328,29 @@ def on_sclass(target, bodystmt)
)
end

# def on_semicolon(value)
# value
# end
# Semicolons are tokens that get added to the token list but never get
# attached to the AST. Because of this they only need to track their
# associated location so they can be used for computing bounds.
class Semicolon
attr_reader :location

def initialize(location)
@location = location
end
end

# :call-seq:
# on_semicolon: (String value) -> Semicolon
def on_semicolon(value)
tokens << Semicolon.new(
Location.token(
line: lineno,
char: char_pos,
column: current_column,
size: value.size
)
)
end

# def on_sp(value)
# value
Expand Down Expand Up @@ -3706,7 +3787,12 @@ def on_unless(predicate, statements, consequent)
beginning = consume_keyword(:unless)
ending = consequent || consume_keyword(:end)

start_char = find_next_statement_start(predicate.location.end_char)
if (keyword = find_keyword_between(:then, predicate, ending))
tokens.delete(keyword)
end

start_char =
find_next_statement_start((keyword || predicate).location.end_char)
statements.bind(
start_char,
start_char - line_counts[predicate.location.end_line - 1].start,
Expand Down Expand Up @@ -3742,16 +3828,16 @@ def on_until(predicate, statements)
beginning = consume_keyword(:until)
ending = consume_keyword(:end)

# Consume the do keyword if it exists so that it doesn't get confused for
# some other block
keyword = find_keyword(:do)
if keyword && keyword.location.start_char > predicate.location.end_char &&
keyword.location.end_char < ending.location.start_char
tokens.delete(keyword)
end
delimiter =
find_keyword_between(:do, predicate, statements) ||
find_token_between(Semicolon, predicate, statements)

tokens.delete(delimiter) if delimiter

# Update the Statements location information
start_char = find_next_statement_start(predicate.location.end_char)
start_char =
find_next_statement_start((delimiter || predicate).location.end_char)

statements.bind(
start_char,
start_char - line_counts[predicate.location.end_line - 1].start,
Expand Down Expand Up @@ -3845,7 +3931,8 @@ def on_when(arguments, statements, consequent)
statements_start = token
end

start_char = find_next_statement_start(statements_start.location.end_char)
start_char =
find_next_statement_start((token || statements_start).location.end_char)

statements.bind(
start_char,
Expand All @@ -3869,16 +3956,16 @@ def on_while(predicate, statements)
beginning = consume_keyword(:while)
ending = consume_keyword(:end)

# Consume the do keyword if it exists so that it doesn't get confused for
# some other block
keyword = find_keyword(:do)
if keyword && keyword.location.start_char > predicate.location.end_char &&
keyword.location.end_char < ending.location.start_char
tokens.delete(keyword)
end
delimiter =
find_keyword_between(:do, predicate, statements) ||
find_token_between(Semicolon, predicate, statements)

tokens.delete(delimiter) if delimiter

# Update the Statements location information
start_char = find_next_statement_start(predicate.location.end_char)
start_char =
find_next_statement_start((delimiter || predicate).location.end_char)

statements.bind(
start_char,
start_char - line_counts[predicate.location.end_line - 1].start,
Expand Down
11 changes: 11 additions & 0 deletions lib/syntax_tree/translation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,16 @@ def self.to_parser(node, buffer)

node.accept(Parser.new(buffer))
end

# This method translates the given node into the representation defined by
# the rubocop/rubocop-ast gem. We don't explicitly list it as a dependency
# because it's not required for the core functionality of Syntax Tree.
def self.to_rubocop_ast(node, buffer)
require "rubocop/ast"
require_relative "translation/parser"
require_relative "translation/rubocop_ast"

node.accept(RuboCopAST.new(buffer))
end
end
end
Loading