diff options
author | Akira Hatanaka <ahatanaka@apple.com> | 2017-12-09 00:02:37 +0000 |
---|---|---|
committer | Akira Hatanaka <ahatanaka@apple.com> | 2017-12-09 00:02:37 +0000 |
commit | def73313d797164f5042301c02a14cde75afb66d (patch) | |
tree | 7b8401b3ba0fa8fc875487c2063c1da00eb85b1a /test/CodeGenCXX | |
parent | 6afb63d7c872f5de9402d6d65a6567d1d3a76b0d (diff) |
[CodeGen][X86] Fix handling of __fp16 vectors.
This commit fixes a bug in IRGen where it generates completely broken
code for __fp16 vectors on X86. For example when the following code is
compiled:
half4 hv0, hv1, hv2; // these are vectors of __fp16.
void foo221() {
hv0 = hv1 + hv2;
}
clang generates the following IR, in which two i16 vectors are added:
@hv1 = common global <4 x i16> zeroinitializer, align 8
@hv2 = common global <4 x i16> zeroinitializer, align 8
@hv0 = common global <4 x i16> zeroinitializer, align 8
define void @foo221() {
%0 = load <4 x i16>, <4 x i16>* @hv1, align 8
%1 = load <4 x i16>, <4 x i16>* @hv2, align 8
%add = add <4 x i16> %0, %1
store <4 x i16> %add, <4 x i16>* @hv0, align 8
ret void
}
To fix the bug, this commit uses the code committed in r314056, which
modified clang to promote and truncate __fp16 vectors to and from float
vectors in the AST. It also fixes another IRGen bug where a short value
is assigned to an __fp16 variable without any integer-to-floating-point
conversion, as shown in the following example:
__fp16 a;
short b;
void foo1() {
a = b;
}
@b = common global i16 0, align 2
@a = common global i16 0, align 2
define void @foo1() #0 {
%0 = load i16, i16* @b, align 2
store i16 %0, i16* @a, align 2
ret void
}
rdar://problem/20625184
Differential Revision: https://reviews.llvm.org/D40112
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@320215 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGenCXX')
-rw-r--r-- | test/CodeGenCXX/float16-declarations.cpp | 21 | ||||
-rw-r--r-- | test/CodeGenCXX/fp16-mangle.cpp | 4 |
2 files changed, 9 insertions, 16 deletions
diff --git a/test/CodeGenCXX/float16-declarations.cpp b/test/CodeGenCXX/float16-declarations.cpp index b97f9aa852..87ef139f86 100644 --- a/test/CodeGenCXX/float16-declarations.cpp +++ b/test/CodeGenCXX/float16-declarations.cpp @@ -11,16 +11,14 @@ namespace { // CHECK-DAG: @_ZN12_GLOBAL__N_13f1nE = internal global half 0xH0000, align 2 _Float16 f2n = 33.f16; -// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2 -// CHECK-X86-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global i16 20512, align 2 +// CHECK-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2 _Float16 arr1n[10]; // CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 2 // CHECK-X86-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 16 _Float16 arr2n[] = { 1.2, 3.0, 3.e4 }; -// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2 -// CHECK-X86-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x i16] [i16 15565, i16 16896, i16 30547], align 2 +// CHECK-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2 const volatile _Float16 func1n(const _Float16 &arg) { return arg + f2n + arr1n[4] - arr2n[1]; @@ -35,16 +33,14 @@ _Float16 f1f; // CHECK-X86-DAG: @f1f = global half 0xH0000, align 2 _Float16 f2f = 32.4; -// CHECK-AARCH64-DAG: @f2f = global half 0xH500D, align 2 -// CHECK-X86-DAG: @f2f = global i16 20493, align 2 +// CHECK-DAG: @f2f = global half 0xH500D, align 2 _Float16 arr1f[10]; // CHECK-AARCH64-DAG: @arr1f = global [10 x half] zeroinitializer, align 2 // CHECK-X86-DAG: @arr1f = global [10 x half] zeroinitializer, align 16 _Float16 arr2f[] = { -1.2, -3.0, -3.e4 }; -// CHECK-AARCH64-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2 -// CHECK-X86-DAG: @arr2f = global [3 x i16] [i16 -17203, i16 -15872, i16 -2221], align 2 +// CHECK-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2 _Float16 func1f(_Float16 arg); @@ -110,11 +106,9 @@ int main(void) { // CHECK-DAG: call void @_ZN2C1C2EDF16_(%class.C1* %{{.*}}, half %{{.*}}) S1<_Float16> s1 = { 132.f16 }; -// CHECK-AARCH64-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2 -// CHECK-X86-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant { i16 } { i16 22560 }, align 2 +// CHECK-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2 // CHECK-DAG: [[S1:%[0-9]+]] = bitcast %struct.S1* %{{.*}} to i8* -// CHECK-AARCH64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false) -// CHECK-X86-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* bitcast ({ i16 }* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false) +// CHECK-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false) _Float16 f4l = func1n(f1l) + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) + func1t(f1l) + s1.mem2 - f1n + f2n; @@ -129,8 +123,7 @@ int main(void) { // CHECK-DAG: store half [[INC]], half* %{{.*}}, align 2 _Float16 arr1l[] = { -1.f16, -0.f16, -11.f16 }; -// CHECK-AARCH64-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2 -// CHECK-X86-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x i16] [i16 -17408, i16 -32768, i16 -13952], align 2 +// CHECK-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2 float cvtf = f2n; //CHECK-DAG: [[H2F:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to float diff --git a/test/CodeGenCXX/fp16-mangle.cpp b/test/CodeGenCXX/fp16-mangle.cpp index bd5a319411..5827fd549d 100644 --- a/test/CodeGenCXX/fp16-mangle.cpp +++ b/test/CodeGenCXX/fp16-mangle.cpp @@ -4,9 +4,9 @@ template <typename T, typename U> struct S { static int i; }; template <> int S<__fp16, __fp16>::i = 3; -// CHECK-LABEL: define void @_Z1fPDh(i16* %x) +// CHECK-LABEL: define void @_Z1fPDh(half* %x) void f (__fp16 *x) { } -// CHECK-LABEL: define void @_Z1gPDhS_(i16* %x, i16* %y) +// CHECK-LABEL: define void @_Z1gPDhS_(half* %x, half* %y) void g (__fp16 *x, __fp16 *y) { } |