32981 – Phi operands are not sorted in predecessor order

LLVM Bugzilla is read-only and represents the historical archive of all LLVM issues filled before November 26, 2021. Use github to submit LLVM bugs

Bug 32981 - Phi operands are not sorted in predecessor order

Summary: Phi operands are not sorted in predecessor order

Status:	NEW

Alias:	None

Product:	libraries
Classification:	Unclassified
Component:	Scalar Optimizations (show other bugs)
Version:	trunk
Hardware:	PC Windows NT

Importance:	P enhancement
Assignee:	Florian Hahn

URL:
Keywords:

Depends on:
Blocks:

Reported:	2017-05-09 11:12 PDT by Davide Italiano
Modified:	2018-12-04 11:11 PST (History)
CC List:	4 users (show)

See Also:
Fixed By Commit(s):

Attachments
Add an attachment (proposed patch, testcase, etc.)

Note You need to log in before you can comment on or make changes to this bug.

Description Davide Italiano 2017-05-09 11:12:05 PDT

; ModuleID = 'small.ll'
source_filename = "meh.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@a = common global i16 0, align 2
@b = common global [1 x i16] zeroinitializer, align 2

; Function Attrs: nounwind uwtable
define i32 @main() #0 {
entry:
  %0 = load i16, i16* @a, align 2, !tbaa !1
  %conv = sext i16 %0 to i32
  %neg = xor i32 %conv, -1
  %conv1 = trunc i32 %neg to i16
  %conv3 = zext i16 %conv1 to i32
  %cmp = icmp slt i32 %conv, %conv3
  br i1 %cmp, label %if.then, label %if.end

if.then:                                          ; preds = %entry
  store i16 %conv1, i16* @a, align 2, !tbaa !1
  br label %if.end

if.end:                                           ; preds = %if.then, %entry
  %c.0 = phi i16 [ %0, %if.then ], [ %conv1, %entry ]
  %1 = load i16, i16* @a, align 2, !tbaa !1
  %tobool = icmp ne i16 %1, 0
  br i1 %tobool, label %if.then8, label %if.end9

if.then8:                                         ; preds = %if.end
  %idxprom = zext i16 %c.0 to i64
  %arrayidx = getelementptr inbounds [1 x i16], [1 x i16]* @b, i64 0, i64 %idxprom
  %2 = load i16, i16* %arrayidx, align 2, !tbaa !1
  store i16 %2, i16* @a, align 2, !tbaa !1
  br label %if.end9

if.end9:                                          ; preds = %if.then8, %if.end
  ret i32 0
}

; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1

; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1

attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }

!llvm.ident = !{!0}

!0 = !{!"clang version 5.0.0 (trunk 302523) (llvm/trunk 302532)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"short", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}


Given the following IR, running jump-threading on it introduces a new PHI which operands have an order inconsistent with the one of the original PHI.

[davide@cupiditate bin]$ ./opt -jump-threading prejump.ll -preserve-ll-uselistorder -S -o - | grep phi
  %1 = phi i16 [ %.pr, %if.endthread-pre-split ], [ %conv1, %if.then ]
  %c.0 = phi i16 [ %0, %if.then ], [ %conv1, %if.endthread-pre-split ]
[davide@cupiditate bin]$ cat prejump.ll |grep phi
  %c.0 = phi i16 [ %0, %if.then ], [ %conv1, %entry ]

Found out while looking at a report from Zhendong Su. Looking at this.

Comment 1 Davide Italiano 2017-05-09 11:26:18 PDT

The issue is the same discussed with Dan.
JumpThreading creates phis and add incoming edges using:

  // Substitute with Phis & remove.
  for (auto *Inst : reverse(ToRemove)) {
    if (!Inst->use_empty()) {
      PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
      NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
      NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
      NewPN->insertBefore(InsertionPoint);
      Inst->replaceAllUsesWith(NewPN);
    }
    Inst->eraseFromParent();

and addIncoming() *always* append to the end of the PHI list:

  /// Add an incoming value to the end of the PHI list
  ///
  void addIncoming(Value *V, BasicBlock *BB) {
    if (getNumOperands() == ReservedSpace)
      growOperands();  // Get more space!
    // Initialize some new operands.
    setNumHungOffUseOperands(getNumOperands() + 1);
    setIncomingValue(getNumOperands() - 1, V);
    setIncomingBlock(getNumOperands() - 1, BB);
  }

I don't see why we can't keep PHIs in sorted (predecessor) order.

Comment 2 Davide Italiano 2017-05-09 11:56:51 PDT

One immediate concern with this is that order might actually matter here, and might be cause of non-determinism (I think).

Every time we create a new PHI instruction and add incoming edges we rely on pointer value comparison and we're not guaranteed that they're always in the some order. 

I'm not sure there's a cheaper way of sorting. Thoughts?

Comment 3 Eli Friedman 2017-05-09 17:39:33 PDT

We could use the same order as predecessors(BB) (from llvm/IR/CFG.h); that's based on the block's use-list.

Comment 4 Davide Italiano 2017-05-31 15:35:49 PDT

Another "interesting" optimization that GCC does on this code.
If both functions are marked as cold, it replaces the second with a jmp to the first.

tinky(int):
        mov     eax, edi
        mov     edx, 36
.L2:
        imul    eax, edi
        dec     edx
        jne     .L2
        ret
winky(int):
        jmp     tinky(int)

llvm should be able to do the same using merge identical functions, but the pass is currently disabled by default at any optimization level.

Comment 5 Davide Italiano 2017-05-31 15:36:04 PDT

ops, sorry wrong PR, please ignore :)