diff options
author | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2018-08-01 01:31:30 +0000 |
---|---|---|
committer | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2018-08-01 01:31:30 +0000 |
commit | b47f061f5bbb2f6026c3de201dcfec66b322d086 (patch) | |
tree | 7d8987a1c8b36f330ca92b259dc4e858583e7ca6 /include | |
parent | 5311a6ea832105c29ada5c730b22b84456dc5965 (diff) |
AMDGPU: Add clamp bit to dot intrinsics
Differential Revision: https://reviews.llvm.org/D49874
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338470 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include')
-rw-r--r-- | include/llvm/IR/IntrinsicsAMDGPU.td | 35 |
1 files changed, 21 insertions, 14 deletions
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 8555db01645..9f361410b9b 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1191,7 +1191,7 @@ def int_amdgcn_ds_bpermute : // Deep learning intrinsics. //===----------------------------------------------------------------------===// -// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c) +// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2 : GCCBuiltin<"__builtin_amdgcn_fdot2">, @@ -1200,12 +1200,13 @@ def int_amdgcn_fdot2 : [ llvm_v2f16_ty, // %a llvm_v2f16_ty, // %b - llvm_float_ty // %c + llvm_float_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c) +// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_sdot2 : GCCBuiltin<"__builtin_amdgcn_sdot2">, @@ -1214,12 +1215,13 @@ def int_amdgcn_sdot2 : [ llvm_v2i16_ty, // %a llvm_v2i16_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c) +// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_udot2 : GCCBuiltin<"__builtin_amdgcn_udot2">, @@ -1228,12 +1230,13 @@ def int_amdgcn_udot2 : [ llvm_v2i16_ty, // %a llvm_v2i16_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c) +// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_sdot4 : GCCBuiltin<"__builtin_amdgcn_sdot4">, @@ -1242,12 +1245,13 @@ def int_amdgcn_sdot4 : [ llvm_i32_ty, // %a llvm_i32_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c) +// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_udot4 : GCCBuiltin<"__builtin_amdgcn_udot4">, @@ -1256,12 +1260,13 @@ def int_amdgcn_udot4 : [ llvm_i32_ty, // %a llvm_i32_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c) +// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_sdot8 : @@ -1271,12 +1276,13 @@ def int_amdgcn_sdot8 : [ llvm_i32_ty, // %a llvm_i32_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c) +// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_udot8 : @@ -1286,7 +1292,8 @@ def int_amdgcn_udot8 : [ llvm_i32_ty, // %a llvm_i32_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; |