[llvm-commits] [llvm] r166649 - in /llvm/trunk: lib/Transforms/Vectorize/LoopVectorize.cpp test/Transforms/LoopVectorize/reduction.ll

Nadav Rotem nrotem at apple.com
Wed Oct 24 17:08:41 PDT 2012


Author: nadav
Date: Wed Oct 24 19:08:41 2012
New Revision: 166649

URL: http://llvm.org/viewvc/llvm-project?rev=166649&view=rev
Log:
Add support for additional reduction variables: AND, OR, XOR.

Patch by Paul Redmond <paul.redmond at intel.com>.


Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/reduction.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=166649&r1=166648&r2=166649&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Oct 24 19:08:41 2012
@@ -208,7 +208,10 @@
   enum ReductionKind {
     NoReduction = -1, /// Not a reduction.
     IntegerAdd  = 0,  /// Sum of numbers.
-    IntegerMult = 1  /// Product of numbers.
+    IntegerMult = 1,  /// Product of numbers.
+    IntegerOr   = 2,  /// Bitwise or logical OR of numbers.
+    IntegerAnd  = 3,  /// Bitwise or logical AND of numbers.
+    IntegerXor  = 4   /// Bitwise or logical XOR of numbers.
   };
 
   /// This POD struct holds information about reduction variables.
@@ -981,14 +984,28 @@
     // Extract the first scalar.
     Value *Scalar0 =
       Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
-    // Extract and sum the remaining vector elements.
+    // Extract and reduce the remaining vector elements.
     for (unsigned i=1; i < VF; ++i) {
       Value *Scalar1 =
         Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
-      if (RdxDesc.Kind == LoopVectorizationLegality::IntegerAdd) {
-        Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
-      } else {
-        Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+      switch (RdxDesc.Kind) {
+        case LoopVectorizationLegality::IntegerAdd:
+          Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
+          break;
+        case LoopVectorizationLegality::IntegerMult:
+          Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+          break;
+        case LoopVectorizationLegality::IntegerOr:
+          Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
+          break;
+        case LoopVectorizationLegality::IntegerAnd:
+          Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
+          break;
+        case LoopVectorizationLegality::IntegerXor:
+          Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
+          break;
+        default:
+          llvm_unreachable("Unknown reduction operation");
       }
     }
 
@@ -1099,7 +1116,19 @@
         continue;
       }
       if (AddReductionVar(Phi, IntegerMult)) {
-        DEBUG(dbgs() << "LV: Found an Mult reduction PHI."<< *Phi <<"\n");
+        DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
+        continue;
+      }
+      if (AddReductionVar(Phi, IntegerOr)) {
+        DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
+        continue;
+      }
+      if (AddReductionVar(Phi, IntegerAnd)) {
+        DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
+        continue;
+      }
+      if (AddReductionVar(Phi, IntegerXor)) {
+        DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
         continue;
       }
 
@@ -1373,6 +1402,12 @@
     case Instruction::UDiv:
     case Instruction::SDiv:
       return Kind == IntegerMult;
+    case Instruction::And:
+      return Kind == IntegerAnd;
+    case Instruction::Or:
+      return Kind == IntegerOr;
+    case Instruction::Xor:
+      return Kind == IntegerXor;
     }
 }
 

Modified: llvm/trunk/test/Transforms/LoopVectorize/reduction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/reduction.ll?rev=166649&r1=166648&r2=166649&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/reduction.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/reduction.ll Wed Oct 24 19:08:41 2012
@@ -149,4 +149,83 @@
   ret i32 %sum.0.lcssa
 }
 
+;CHECK: @reduction_and
+;CHECK: and <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
 
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %and = and i32 %add, %result.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ]
+  ret i32 %result.0.lcssa
+}
+
+;CHECK: @reduction_or
+;CHECK: or <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %or = or i32 %add, %result.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ]
+  ret i32 %result.0.lcssa
+}
+
+;CHECK: @reduction_xor
+;CHECK: xor <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %xor = xor i32 %add, %result.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
+  ret i32 %result.0.lcssa
+}





More information about the llvm-commits mailing list