summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGary Oblock <gary@amperecomputing.com>2020-09-23 16:19:05 -0700
committerGary Oblock <gary@amperecomputing.com>2020-09-23 16:19:05 -0700
commitc8b48ea1752a5011d8b3feab84cf9c825ee49f0e (patch)
tree947bc72f296a98d664d32a02979c9a0aae119267
parent3341a716f05de15d4da6afaa92e3c3ec37eb23d7 (diff)
A version of the performance qualification that's all there
and compiles correctly. Note, the compiler is in a horrible state and what I've done isn't testable.
-rw-r--r--gcc/ipa-str-reorg-instance-interleave.c313
-rw-r--r--gcc/ipa-structure-reorg.c7
-rw-r--r--gcc/ipa-structure-reorg.h2
3 files changed, 214 insertions, 108 deletions
diff --git a/gcc/ipa-str-reorg-instance-interleave.c b/gcc/ipa-str-reorg-instance-interleave.c
index c417926af64..d53d290f980 100644
--- a/gcc/ipa-str-reorg-instance-interleave.c
+++ b/gcc/ipa-str-reorg-instance-interleave.c
@@ -47,19 +47,22 @@ along with GCC; see the file COPYING3. If not see
#include "cfghooks.h"
#include "function.h"
#include "cfgloop.h"
+#include "wide-int.h"
static void wrangle_ssa_type( tree, Info_t*);
//static bool print_internals (gimple *, void *);
static void str_reorg_instance_interleave_qual_part ( Info *);
static void str_reorg_instance_interleave_type_part ( Info *);
static void header ( bool);
+static double cut_off_eq_single_pool( double);
+static double alignment_effect( unsigned HOST_WIDE_INT);
static void create_new_types ( Info_t *);
static void create_a_new_type ( Info_t *, tree);
static unsigned int reorg_perf_qual ( Info *);
static tree find_coresponding_field ( tree, tree);
static void remove_default_def ( tree, struct function *);
static void set_lhs_for ( gimple *, tree);
-
+static basic_block make_bb ( char *, basic_block);
// These are local to this file by design
#define REORG_SP_PTR_PREFIX "_reorg_SP_ptr_type_"
@@ -714,7 +717,7 @@ str_reorg_instance_interleave_trans ( Info *info)
basic_block prev_bb = before_bb;
// FROM failure_bb = create_empty_block(prev_bb)
- basic_block failure_bb = create_empty_bb ( prev_bb);
+ basic_block failure_bb = make_bb ( "failure_bb", prev_bb);
// I need to set the count to zero and there doesn't
// seem to be direct way of doing this...
failure_bb->count = prev_bb->count - prev_bb->count;
@@ -772,7 +775,7 @@ str_reorg_instance_interleave_trans ( Info *info)
field;
field = DECL_CHAIN( field))
{
- basic_block new_bb = create_empty_bb ( prev_order);
+ basic_block new_bb = make_bb ( "new_bb", prev_order);
new_bb->count = prev_order->count;
// Nope! Don't do this.
//set_immediate_dominator ( CDI_DOMINATORS, new_bb, prev_bb);
@@ -941,7 +944,7 @@ str_reorg_instance_interleave_trans ( Info *info)
// create basic block for success
//
// FROM success_bb = create_empty_block(prev_bb_order);
- basic_block success_bb = create_empty_bb ( prev_bb);
+ basic_block success_bb = make_bb ( "succ_bb", prev_bb);
success_bb->count = prev_bb->count;
// NOTE, it seems I shouldn't be attempting
@@ -1880,29 +1883,29 @@ str_reorg_instance_interleave_type_part ( Info *info)
// Typse for performance qualification
typedef struct reorg_bb_info reorg_bb_info_t;
- struct reorg_bb_info {
- basic_block *bb;
- };
+struct reorg_bb_info {
+ basic_block *bb;
+};
- typedef struct perf_bb_info perf_bb_info_t;
- typedef struct acc_info acc_info_t;
- typedef struct var_info var_info_t;
-
- struct var_info {
- varpool_node *var;
- sbitmap *bits;
- double count;
- };
-
- struct acc_info {
- varpool_node *v;
- int field_num;
- };
-
- struct perf_bb_info {
- std::vector <var_info_t*> *vari;
- basic_block *gcc_bb;
- };
+typedef struct perf_bb_info perf_bb_info_t;
+typedef struct acc_info acc_info_t;
+typedef struct var_info var_info_t;
+
+struct var_info {
+ varpool_node *var;
+ sbitmap *bits;
+ double count;
+};
+
+struct acc_info {
+ varpool_node *v;
+ int field_num;
+};
+
+struct perf_loop_info {
+ std::vector <var_info_t*> *vari;
+ class loop *gcc_loop;
+};
static void account_for_use( tree, std::vector <acc_info_t> *);
static bool is_array_access( tree);
@@ -1923,6 +1926,17 @@ reorg_perf_qual ( Info *info)
(*(info->reorg_type))[i].do_instance_interleave = true;
}
#else
+
+ // This dom sequence is broken and I'm stalled on it.
+ // I used the sequence before but the upstream code (not
+ // my pass) is broken.
+ if ( dom_info_available_p ( CDI_DOMINATORS) )
+ {
+ free_dominance_info ( CDI_DOMINATORS);
+ }
+
+ calculate_dominance_info (CDI_DOMINATORS);
+
// We are doing a quick and dirty version of performance
// qualification for testing purposes and possibly the
// initial version of for the main branch.
@@ -1950,25 +1964,48 @@ reorg_perf_qual ( Info *info)
// Ulgy GCC idiom with global pointer to current function.
push_cfun ( func);
// TBD
+ std::vector<perf_loop_info> loop_perf;
+ loop_perf.reserve ( number_of_loops ( func));
class loop *loop;
FOR_EACH_LOOP_FN ( func, loop, LI_ONLY_INNERMOST )
{
+ loop_perf [ loop->num ].vari = new std::vector<var_info_t*>; // ???
+ loop_perf [ loop->num ].gcc_loop = loop;
size_t num_bbs = loop->num_nodes;
basic_block *bbs = get_loop_body ( loop);
- // This stuff probably doesn't matter
- #if 0
- int *bbsort = XNEWVEC ( int, num_bbs);
- reorg_bb_info_t *rbbs =
- XNEWVEC ( reorg_bb_info_t, num_bbs);
- topsort( bbs, bbsort, loop->num_nodes);
- for( i = 0; i < num_bbs; i++ ) {
- rbbs[i].bb = &bbs[bbsort[i]];
- }
- map <int,perf_bb_info_t> bbmap;
- #endif
-
// TBD Stuff here
+ for ( unsigned i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs [i];
+ for ( auto gsi = gsi_start_bb ( bb); !gsi_end_p ( gsi); gsi_next ( &gsi) )
+ {
+ gimple *stmt = gsi_stmt ( gsi);
+ if ( contains_a_reorgtype ( stmt, info) != NULL )
+ {
+ DEBUG_A("examine: ");
+ DEBUG_F ( print_gimple_stmt, stderr, stmt, 0);
+ INDENT(4);
+ unsigned n_ops = gimple_num_ops( stmt);
+ tree op;
+ unsigned ith_op;
+ for ( ith_op = 0; i < n_ops; i++ )
+ {
+ op = gimple_op ( stmt, ith_op);
+ ReorgType_t *tri = tree_contains_a_reorgtype (op, info);
+ if ( tri != NULL )
+ {
+ DEBUG_A("");
+ DEBUG_F(print_reorg, stderr, 0, tri);
+ DEBUG(", ");
+ DEBUG_F(flexible_print, stderr, op, 1, (dump_flags_t)0);
+ }
+ }
+ INDENT(-4);
+
+ }
+ }
+ }continue; // Testing above here
// Obtain loop count by looking at all the block counts.
unsigned max_count = 0;
@@ -1984,76 +2021,95 @@ reorg_perf_qual ( Info *info)
// loop. TBD But perf_bb is per loop so we need something similar
// per loop.
- std::vector <var_info_t*> pv = bb->vari;
- for( auto pvi = pv->begin (); pvi != pv->end (); pv = pvi++ ) { // 676
- tree base_type = base_type_of( pvi->var->decl);
- ReorgType_t *ri = get_reorgtype_info( base_type, info);
- // Reorg accounting
- if( ri != NULL ) {
- double reorg_nca = 0.0;
- int nf = number_of_fields_of( base_type);
- int fi;
- for( fi = 0; fi < nf; fi++ ) { // 684
- if( bitmap_bit_p( fi, pv->bits) ) {
- int fld_width = field_width( base_type, fi);
- reorg_nca += pvi->count * alignment_effect( fld_width);
- }
- }
- ri->reorg_perf += reorg_nca;
- } // 699
-
- // regular accounting
- double regular_nca = 0.0;
- sbitmap *cache_model = sbitmap_alloc(1);
- // TBD NOTE, pv steps on the pv above.
- vector <var_info_t*> pv2 = perf_bb->vari;
- for( auto pv2i = pv2->begin (); pv2i != pv2->end; pv2i++ ) { // 704
- tree base_type = base_type_of( pv2i->var->decl);
- // create a tiny model of the cache big
- // enough for this record.
- int len =
- ((length( base_type) + L1_CACHE_LINE_SIZE -1)
- /
- L1_CACHE_LINE_SIZE)
- +
- 1;
- cache_model = sbitmap_resize( cache_model, len, 0);
- int nf = number_of_fields_of( base_type);
- int nrbo = number of record base offsets
- double accum = 0.0;
- for( rboi = 0; rboi < nrbo; rboi++ ) {
- base_offset = offset_for( rboi);
- // Access accounting
- int fi;
- for( fi = 0; fi < nf; fi++ ) {
- if( bitmap_bit_p( fi, pv2i->bits) ) {
- int fld_width = field_width( base_type, fi);
- int fld_offset = field_offset( base_type, fi);
- int chari;
- for( chari = 0; chari < fld_width; chari++ ) {
- int loc = (chari + field_offset + base_offset)
- /
- L1_CACHE_LINE_SIZE;
- bitmap_set_bit(cache_model, loc);
+ std::vector <var_info_t*> *pv = loop_perf [ loop->num].vari;
+ for ( auto pvi = pv->begin (); pvi != pv->end (); pvi++ )
+ { // 676
+ tree base_type = base_type_of( ( *pvi)->var->decl);
+ ReorgType_t *ri = get_reorgtype_info ( base_type, info);
+ // Reorg accounting
+ if( ri != NULL )
+ {
+ double reorg_nca = 0.0;
+ int fi;
+ tree field;
+ for( field = TYPE_FIELDS ( ri->gcc_type), fi = 0;
+ field;
+ field = DECL_CHAIN ( field), fi++ ) // 684
+ {
+ if ( bitmap_bit_p ( *(*pvi)->bits, fi) )
+ {
+ unsigned HOST_WIDE_INT fld_width =
+ tree_to_uhwi ( DECL_SIZE ( field));
+ reorg_nca += max_count * alignment_effect ( fld_width);
+ }
}
+ ri->instance_interleave.reorg_perf += reorg_nca;
+ } // 699
+
+ // regular accounting
+ double regular_nca = 0.0;
+ sbitmap cache_model = sbitmap_alloc(1);
+ // TBD NOTE, pv steps on the pv above.
+ std::vector <var_info_t*> *pv2 = loop_perf[ loop->num].vari;
+ for( auto pv2i = pv2->begin (); pv2i != pv2->end (); pv2i++ ) { // 704
+ tree base_type = base_type_of ( (*pv2i)->var->decl);
+ // create a tiny model of the cache big
+ // enough for this record.
+ unsigned HOST_WIDE_INT len =
+ (( tree_to_uhwi ( DECL_SIZE ( base_type))
+ +
+ param_l1_cache_line_size -1)
+ /
+ param_l1_cache_line_size)
+ +
+ 1;
+ cache_model = sbitmap_resize( cache_model, (unsigned) len, 0);
+ double accum = 0.0;
+ int nrbo = 0;
+ for ( auto field_ex = TYPE_FIELDS ( base_type);
+ field_ex;
+ field_ex = DECL_CHAIN ( field_ex) )
+ {
+ nrbo++;
+ unsigned HOST_WIDE_INT base_offset =
+ tree_to_uhwi ( DECL_FIELD_OFFSET( field_ex));
+ // Access accounting
+ int fi = 0;
+ for ( auto field = TYPE_FIELDS ( base_type);
+ field;
+ field = DECL_CHAIN ( field), fi++)
+ {
+ if ( bitmap_bit_p ( *(*pv2i)->bits, fi) )
+ {
+ unsigned HOST_WIDE_INT fld_width, fld_offset;
+ fld_width = tree_to_uhwi ( DECL_SIZE ( field));
+ fld_offset = tree_to_uhwi ( DECL_FIELD_OFFSET ( field));
+ int chari;
+ for ( chari = 0; chari < fld_width; chari++ )
+ {
+ int loc = (chari + fld_offset + base_offset)
+ /
+ param_l1_cache_line_size;
+ bitmap_set_bit ( cache_model, loc);
+ }
+ }
+ }
+ accum += bitmap_count_bits ( cache_model);
+ bitmap_clear ( cache_model);
}
- }
- accum += popcount( cache_model);
- bitmap_clear( cache_model);
+ regular_nca += accum / nrbo;
+
+ } // 739
+ sbitmap_free ( cache_model);
+
+ if( ri != NULL ) {
+ ri->instance_interleave.regular_perf += regular_nca;
+ cache_accesses_noreorg += regular_nca;
+ } else {
+ cache_accesses += regular_nca;
}
- regular_nca += accum/nrbo;
-
- } // 739
- sbitmap_free( cache_model);
-
- if( ri != NULL ) {
- ri->regular_perf += regular_nca;
- cache_accesses_noreorg += regular_nca;
- } else {
- cache_accesses += regular_nca;
- }
- } // end for each prop_var 748
-
+ } // end for each prop_var 748
+
} //
pop_cfun ();
@@ -2120,11 +2176,44 @@ reorg_perf_qual ( Info *info)
{
reorgi->do_instance_interleave = false;
}
-
}
+
+ free_dominance_info ( CDI_DOMINATORS);
+
#endif
}
+#define SINGLE_POOL_SLOPE \
+ ((SINGLE_POOL_RAW_DO_IT_ALWAYS - SINGLE_POOL_RAW_SKIP_IT) \
+ / \
+ (SINGLE_POOL_ABS_DO_IT_ALWAYS - SINGLE_POOL_ABS_SKIP_IT))
+
+#define SINGLE_POOL_INTERSECT \
+ (SINGLE_POOL_RAW_SKIP_IT \
+ - \
+ SINGLE_POOL_SLOPE * SINGLE_POOL_ABS_SKIP_IT)
+
+static double
+cut_off_eq_single_pool( double x)
+{
+ return SINGLE_POOL_SLOPE * x + SINGLE_POOL_INTERSECT;
+}
+
+static double
+alignment_effect( unsigned HOST_WIDE_INT width )
+{
+ unsigned HOST_WIDE_INT times = param_l1_cache_line_size / width; // ??
+ unsigned HOST_WIDE_INT rem = param_l1_cache_line_size % width;
+ if( rem == 0 ) {
+ return 1.0;
+ }
+ unsigned HOST_WIDE_INT m, n, g;
+ g = gcd( param_l1_cache_line_size, width);
+ m = param_l1_cache_line_size / g;
+ n = width / g;
+ return 1.0 + (n - 1.0)/m;
+}
+
static void
header ( bool initialize )
{
@@ -2435,3 +2524,15 @@ remove_default_def ( tree default_def, struct function *func)
}
}
}
+
+static basic_block
+make_bb ( char *msg, basic_block prev_bb )
+{
+ basic_block ret = create_empty_bb ( prev_bb);
+ DEBUG_A( "make_bb ( %s, <bb %d>/%p ): <bb %d>/%p, prev: <bb %d>/%p, next: <bb %d>/%p\n",
+ msg, prev_bb->index, prev_bb,
+ ret->index, ret,
+ ret->prev_bb->index, ret->prev_bb,
+ ret->next_bb->index, ret->next_bb);
+ return ret;
+}
diff --git a/gcc/ipa-structure-reorg.c b/gcc/ipa-structure-reorg.c
index 9b3fdf95211..7df8f5846d4 100644
--- a/gcc/ipa-structure-reorg.c
+++ b/gcc/ipa-structure-reorg.c
@@ -116,6 +116,12 @@ int debug_indenting = 0;
static unsigned int
ipa_structure_reorg ( void)
{
+ // Here to test the sanity of the compiler (yes, it's insane.)
+ //if ( dom_info_available_p ( CDI_DOMINATORS) )
+ // {
+ // free_dominance_info ( CDI_DOMINATORS);
+ // }
+
std::vector <ReorgType_t> Reorg_Type;
std::vector <ReorgType_t> Saved_Reorg_Type;
std::vector <ProgDecl_t> Prog_Decl;
@@ -260,7 +266,6 @@ final_debug_info ( Info *info)
static unsigned int
reorg_analysis ( Info *info)
{
-
// TODO:
// Gary, this main "analysis" method seems to have a lot of
// instance interleave specific code. Shouldn't this method
diff --git a/gcc/ipa-structure-reorg.h b/gcc/ipa-structure-reorg.h
index 8454c42dd89..286e57c2380 100644
--- a/gcc/ipa-structure-reorg.h
+++ b/gcc/ipa-structure-reorg.h
@@ -253,7 +253,7 @@ extern bool print_internals (gimple *, void *);
// defined marcos in the code. However, some of uses
// should obviously be converted to dump file information.
-#define DEBUGGING 0
+#define DEBUGGING 1
#if DEBUGGING
enum Display {
Show_nothing,