ovmold created this revision.
ovmold added a reviewer: craig.topper.
ovmold added a project: clang.
Herald added a subscriber: cfe-commits.

In the 'detectCTLZIdiom' function support for loops that use LSHR instruction 
instead of ASHR has been added.

The problem is that for the following piece of code no '@llvm.ctlz' instruction 
has been generated in the resulting test.ll file when compiling as "clang -S 
-O3 -march=core-avx2 -emit-llvm test.c". The reason for this is that the LSHR 
instruction is used instead of ASHR in the LLVM IR when we get into the 
'detectCTLZIdiom' function.




int lzcnt(int x) {

   int count = 0;
   while (x > 0)  {
        count++;
        x = x >> 1;
   }
  return count;

}

int main() {

  int x;
  scanf("%d", &x);
  int y = lzcnt(x);
  printf("count  = %d\n", y);
  return 0;

}
=


Repository:
  rC Clang

https://reviews.llvm.org/D48354

Files:
  lib/Transforms/Scalar/LoopIdiomRecognize.cpp
  test/Transforms/LoopIdiom/X86/ctlz.ll

Index: test/Transforms/LoopIdiom/X86/ctlz.ll
===================================================================
--- test/Transforms/LoopIdiom/X86/ctlz.ll
+++ test/Transforms/LoopIdiom/X86/ctlz.ll
@@ -115,6 +115,34 @@
   ret i32 %i.0.lcssa
 }
 
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz_zero_check_lshr(i32 %n) {
+entry:
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
+  %tobool4 = icmp eq i32 %abs_n, 0
+  br i1 %tobool4, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
+  %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
+  %shr = lshr i32 %n.addr.05, 1
+  %inc = add nsw i32 %i.06, 1
+  %tobool = icmp eq i32 %shr, 0
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
+  ret i32 %i.0.lcssa
+}
+
 ; Recognize CTLZ builtin pattern.
 ; Here it will replace the loop -
 ; assume builtin is always profitable.
@@ -157,6 +185,48 @@
   ret i32 %i.0
 }
 
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_lshr(int n)
+; {
+;   n = n >= 0 ? n : -n;
+;   int i = 0;
+;   while(n >>= 1) {
+;     i++;
+;   }
+;   return i;
+; }
+;
+; ALL:  entry
+; ALL:  %0 = lshr i32 %abs_n, 1
+; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
+; ALL-NEXT:  %2 = sub i32 32, %1
+; ALL-NEXT:  %3 = add i32 %2, 1
+; ALL:  %i.0.lcssa = phi i32 [ %2, %while.cond ]
+; ALL:  ret i32 %i.0.lcssa
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz_lshr(i32 %n) {
+entry:
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
+  %shr = lshr i32 %n.addr.0, 1
+  %tobool = icmp eq i32 %shr, 0
+  %inc = add nsw i32 %i.0, 1
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret i32 %i.0
+}
+
 ; Recognize CTLZ builtin pattern.
 ; Here it will replace the loop -
 ; assume builtin is always profitable.
@@ -200,6 +270,49 @@
   ret i32 %i.0
 }
 
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_add_lshr(int n, int i0)
+; {
+;   n = n >= 0 ? n : -n;
+;   int i = i0;
+;   while(n >>= 1) {
+;     i++;
+;   }
+;   return i;
+; }
+;
+; ALL:  entry
+; ALL:  %0 = lshr i32 %abs_n, 1
+; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
+; ALL-NEXT:  %2 = sub i32 32, %1
+; ALL-NEXT:  %3 = add i32 %2, 1
+; ALL-NEXT:  %4 = add i32 %2, %i0
+; ALL:  %i.0.lcssa = phi i32 [ %4, %while.cond ]
+; ALL:  ret i32 %i.0.lcssa
+;
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz_add_lshr(i32 %n, i32 %i0) {
+entry:
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
+  %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
+  %shr = lshr i32 %n.addr.0, 1
+  %tobool = icmp eq i32 %shr, 0
+  %inc = add nsw i32 %i.0, 1
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret i32 %i.0
+}
+
 ; Recognize CTLZ builtin pattern.
 ; Here it will replace the loop -
 ; assume builtin is always profitable.
@@ -245,6 +358,51 @@
   ret i32 %i.0
 }
 
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_sext_lshr(short in)
+; {
+;   int n = in;
+;   if (in < 0)
+;     n = -n;
+;   int i = 0;
+;   while(n >>= 1) {
+;     i++;
+;   }
+;   return i;
+; }
+;
+; ALL:  entry
+; ALL:  %0 = lshr i32 %abs_n, 1
+; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
+; ALL-NEXT:  %2 = sub i32 32, %1
+; ALL-NEXT:  %3 = add i32 %2, 1
+; ALL:  %i.0.lcssa = phi i32 [ %2, %while.cond ]
+; ALL:  ret i32 %i.0.lcssa
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz_sext_lshr(i16 %in) {
+entry:
+  %n = sext i16 %in to i32
+  %c = icmp sgt i16 %in, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
+  %shr = lshr i32 %n.addr.0, 1
+  %tobool = icmp eq i32 %shr, 0
+  %inc = add nsw i32 %i.0, 1
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret i32 %i.0
+}
+
 ; This loop contains a volatile store. If x is initially negative,
 ; the code will be an infinite loop because the ashr will eventually produce
 ; all ones and continue doing so. This prevents the loop from terminating. If
Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp
===================================================================
--- lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -188,8 +188,9 @@
                                PHINode *CntPhi, Value *Var);
   bool recognizeAndInsertCTLZ();
   void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
-                                PHINode *CntPhi, Value *Var, const DebugLoc DL,
-                                bool ZeroCheck, bool IsCntPhiUsedOutsideLoop);
+                                PHINode *CntPhi, Value *Var, Instruction *DefX, 
+                                const DebugLoc DL, bool ZeroCheck, 
+                                bool IsCntPhiUsedOutsideLoop);
 
   /// @}
 };
@@ -1316,8 +1317,8 @@
     return false;
 
   // step 2: detect instructions corresponding to "x.next = x >> 1"
-  // TODO: Support loops that use LShr.
-  if (!DefX || DefX->getOpcode() != Instruction::AShr)
+  // DONE: Support loops that use LShr.
+  if (!DefX || (DefX->getOpcode() != Instruction::AShr && DefX->getOpcode() != Instruction::LShr))
     return false;
   ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
   if (!Shft || !Shft->isOne())
@@ -1401,8 +1402,8 @@
 
   // Make sure the initial value can't be negative otherwise the ashr in the
   // loop might never reach zero which would make the loop infinite.
-  // TODO: Support loops that use lshr and wouldn't need this check.
-  if (!isKnownNonNegative(InitX, *DL))
+  // DONE: Support loops that use lshr and wouldn't need this check.
+  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL))
     return false;
 
   // If we check X != 0 before entering the loop we don't need a zero
@@ -1434,7 +1435,7 @@
     return false;
 
   const DebugLoc DL = DefX->getDebugLoc();
-  transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck,
+  transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX, DL, ZeroCheck,
                            IsCntPhiUsedOutsideLoop);
   return true;
 }
@@ -1548,7 +1549,7 @@
 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
 void LoopIdiomRecognize::transformLoopToCountable(
     BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
-    const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
+    Instruction *DefX, const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
   BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
 
   // Step 1: Insert the CTLZ instruction at the end of the preheader block
@@ -1559,10 +1560,14 @@
   Builder.SetCurrentDebugLocation(DL);
   Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
 
-  if (IsCntPhiUsedOutsideLoop)
-    InitXNext = Builder.CreateAShr(InitX,
+  if (IsCntPhiUsedOutsideLoop){
+    if(DefX->getOpcode() == Instruction::AShr)
+      InitXNext = Builder.CreateAShr(InitX,
                                    ConstantInt::get(InitX->getType(), 1));
-  else
+    else if(DefX->getOpcode() == Instruction::LShr)
+      InitXNext = Builder.CreateLShr(InitX,
+                                   ConstantInt::get(InitX->getType(), 1));
+  }else
     InitXNext = InitX;
   CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
   Count = Builder.CreateSub(
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to