Skip to content

Sea of nodes representation #291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/syntax_tree.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
require_relative "syntax_tree/yarv/instructions"
require_relative "syntax_tree/yarv/legacy"
require_relative "syntax_tree/yarv/local_table"
require_relative "syntax_tree/yarv/sea_of_nodes"
require_relative "syntax_tree/yarv/assembler"
require_relative "syntax_tree/yarv/vm"

Expand Down
212 changes: 123 additions & 89 deletions lib/syntax_tree/yarv/control_flow_graph.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,93 +14,6 @@ module YARV
# cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
#
class ControlFlowGraph
# This is the instruction sequence that this control flow graph
# corresponds to.
attr_reader :iseq

# This is the list of instructions that this control flow graph contains.
# It is effectively the same as the list of instructions in the
# instruction sequence but with line numbers and events filtered out.
attr_reader :insns

# This is the set of basic blocks that this control-flow graph contains.
attr_reader :blocks

def initialize(iseq, insns, blocks)
@iseq = iseq
@insns = insns
@blocks = blocks
end

def disasm
fmt = Disassembler.new(iseq)
fmt.puts("== cfg: #{iseq.inspect}")

blocks.each do |block|
fmt.puts(block.id)
fmt.with_prefix(" ") do |prefix|
unless block.incoming_blocks.empty?
from = block.incoming_blocks.map(&:id)
fmt.puts("#{prefix}== from: #{from.join(", ")}")
end

fmt.format_insns!(block.insns, block.block_start)

to = block.outgoing_blocks.map(&:id)
to << "leaves" if block.insns.last.leaves?
fmt.puts("#{prefix}== to: #{to.join(", ")}")
end
end

fmt.string
end

def to_mermaid
output = StringIO.new
output.puts("flowchart TD")

fmt = Disassembler::Mermaid.new
blocks.each do |block|
output.puts(" subgraph #{block.id}")
previous = nil

block.each_with_length do |insn, length|
node_id = "node_#{length}"
label = "%04d %s" % [length, insn.disasm(fmt)]

output.puts(" #{node_id}(\"#{CGI.escapeHTML(label)}\")")
output.puts(" #{previous} --> #{node_id}") if previous

previous = node_id
end

output.puts(" end")
end

blocks.each do |block|
block.outgoing_blocks.each do |outgoing|
offset =
block.block_start + block.insns.sum(&:length) -
block.insns.last.length

output.puts(" node_#{offset} --> node_#{outgoing.block_start}")
end
end

output.string
end

# This method is used to verify that the control flow graph is well
# formed. It does this by checking that each basic block is itself well
# formed.
def verify
blocks.each(&:verify)
end

def self.compile(iseq)
Compiler.new(iseq).compile
end

# This class is responsible for creating a control flow graph from the
# given instruction sequence.
class Compiler
Expand Down Expand Up @@ -139,7 +52,11 @@ def initialize(iseq)
# This method is used to compile the instruction sequence into a control
# flow graph. It returns an instance of ControlFlowGraph.
def compile
blocks = connect_basic_blocks(build_basic_blocks)
blocks = build_basic_blocks

connect_basic_blocks(blocks)
prune_basic_blocks(blocks)

ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify)
end

Expand Down Expand Up @@ -187,7 +104,16 @@ def build_basic_blocks

block_starts
.zip(blocks)
.to_h do |block_start, block_insns|
.to_h do |block_start, insns|
# It's possible that we have not detected a block start but still
# have branching instructions inside of a basic block. This can
# happen if you have an unconditional jump which is followed by
# instructions that are unreachable. As of Ruby 3.2, this is
# possible with something as simple as "1 => a". In this case we
# can discard all instructions that follow branching instructions.
block_insns =
insns.slice_after { |insn| insn.branch_targets.any? }.first

[block_start, BasicBlock.new(block_start, block_insns)]
end
end
Expand All @@ -213,6 +139,114 @@ def connect_basic_blocks(blocks)
end
end
end

# If there are blocks that are unreachable, we can remove them from the
# graph entirely at this point.
def prune_basic_blocks(blocks)
visited = Set.new
queue = [blocks.fetch(0)]

until queue.empty?
current_block = queue.shift
next if visited.include?(current_block)

visited << current_block
queue.concat(current_block.outgoing_blocks)
end

blocks.select! { |_, block| visited.include?(block) }
end
end

# This is the instruction sequence that this control flow graph
# corresponds to.
attr_reader :iseq

# This is the list of instructions that this control flow graph contains.
# It is effectively the same as the list of instructions in the
# instruction sequence but with line numbers and events filtered out.
attr_reader :insns

# This is the set of basic blocks that this control-flow graph contains.
attr_reader :blocks

def initialize(iseq, insns, blocks)
@iseq = iseq
@insns = insns
@blocks = blocks
end

def disasm
fmt = Disassembler.new(iseq)
fmt.puts("== cfg: #{iseq.inspect}")

blocks.each do |block|
fmt.puts(block.id)
fmt.with_prefix(" ") do |prefix|
unless block.incoming_blocks.empty?
from = block.incoming_blocks.map(&:id)
fmt.puts("#{prefix}== from: #{from.join(", ")}")
end

fmt.format_insns!(block.insns, block.block_start)

to = block.outgoing_blocks.map(&:id)
to << "leaves" if block.insns.last.leaves?
fmt.puts("#{prefix}== to: #{to.join(", ")}")
end
end

fmt.string
end

def to_dfg
DataFlowGraph.compile(self)
end

def to_mermaid
output = StringIO.new
output.puts("flowchart TD")

fmt = Disassembler::Mermaid.new
blocks.each do |block|
output.puts(" subgraph #{block.id}")
previous = nil

block.each_with_length do |insn, length|
node_id = "node_#{length}"
label = "%04d %s" % [length, insn.disasm(fmt)]

output.puts(" #{node_id}(\"#{CGI.escapeHTML(label)}\")")
output.puts(" #{previous} --> #{node_id}") if previous

previous = node_id
end

output.puts(" end")
end

blocks.each do |block|
block.outgoing_blocks.each do |outgoing|
offset =
block.block_start + block.insns.sum(&:length) -
block.insns.last.length

output.puts(" node_#{offset} --> node_#{outgoing.block_start}")
end
end

output.string
end

# This method is used to verify that the control flow graph is well
# formed. It does this by checking that each basic block is itself well
# formed.
def verify
blocks.each(&:verify)
end

def self.compile(iseq)
Compiler.new(iseq).compile
end
end
end
Expand Down
Loading