summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2018-08-01 01:31:30 +0000
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2018-08-01 01:31:30 +0000
commitb47f061f5bbb2f6026c3de201dcfec66b322d086 (patch)
tree7d8987a1c8b36f330ca92b259dc4e858583e7ca6 /include
parent5311a6ea832105c29ada5c730b22b84456dc5965 (diff)
AMDGPU: Add clamp bit to dot intrinsics
Differential Revision: https://reviews.llvm.org/D49874 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338470 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include')
-rw-r--r--include/llvm/IR/IntrinsicsAMDGPU.td35
1 files changed, 21 insertions, 14 deletions
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 8555db01645..9f361410b9b 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1191,7 +1191,7 @@ def int_amdgcn_ds_bpermute :
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
-// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c)
+// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_fdot2 :
GCCBuiltin<"__builtin_amdgcn_fdot2">,
@@ -1200,12 +1200,13 @@ def int_amdgcn_fdot2 :
[
llvm_v2f16_ty, // %a
llvm_v2f16_ty, // %b
- llvm_float_ty // %c
+ llvm_float_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c)
+// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_sdot2 :
GCCBuiltin<"__builtin_amdgcn_sdot2">,
@@ -1214,12 +1215,13 @@ def int_amdgcn_sdot2 :
[
llvm_v2i16_ty, // %a
llvm_v2i16_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c)
+// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_udot2 :
GCCBuiltin<"__builtin_amdgcn_udot2">,
@@ -1228,12 +1230,13 @@ def int_amdgcn_udot2 :
[
llvm_v2i16_ty, // %a
llvm_v2i16_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c)
+// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
def int_amdgcn_sdot4 :
GCCBuiltin<"__builtin_amdgcn_sdot4">,
@@ -1242,12 +1245,13 @@ def int_amdgcn_sdot4 :
[
llvm_i32_ty, // %a
llvm_i32_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c)
+// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
def int_amdgcn_udot4 :
GCCBuiltin<"__builtin_amdgcn_udot4">,
@@ -1256,12 +1260,13 @@ def int_amdgcn_udot4 :
[
llvm_i32_ty, // %a
llvm_i32_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c)
+// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
def int_amdgcn_sdot8 :
@@ -1271,12 +1276,13 @@ def int_amdgcn_sdot8 :
[
llvm_i32_ty, // %a
llvm_i32_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c)
+// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
def int_amdgcn_udot8 :
@@ -1286,7 +1292,8 @@ def int_amdgcn_udot8 :
[
llvm_i32_ty, // %a
llvm_i32_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;