summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGary Oblock <gary@amperecomputing.com>2020-09-10 12:23:29 -0700
committerGary Oblock <gary@amperecomputing.com>2020-09-10 12:23:29 -0700
commit345086a053ea0921ea01225ef0e69c3956af99c0 (patch)
treeb3f3700b432226d2b472330d9ce8358ec8ed8a6d
parentaaafa91d98dd64130398c992b61ec42cff160c9a (diff)
Some preliminary code for performance qualification of
instance interleaving.
-rw-r--r--gcc/ipa-str-reorg-instance-interleave.c283
-rw-r--r--gcc/ipa-structure-reorg.h5
2 files changed, 273 insertions, 15 deletions
diff --git a/gcc/ipa-str-reorg-instance-interleave.c b/gcc/ipa-str-reorg-instance-interleave.c
index 16633d4f87c..c417926af64 100644
--- a/gcc/ipa-str-reorg-instance-interleave.c
+++ b/gcc/ipa-str-reorg-instance-interleave.c
@@ -89,14 +89,11 @@ static void set_lhs_for ( gimple *, tree);
1111 bad failure: walk return bogus op
*/
-
-#define TOGSSA1 1
-#define TOGSSA2 1
-#define TOGUPD1 1
-#define TOGUPD2 1
-// ignore TOGUPD3
-#define TOGUPD3 1
-#define TOGUPD4 1
+// These are dummy values tha alway result the reorganization
+#define SINGLE_POOL_RAW_SKIP_IT 0.0
+#define SINGLE_POOL_RAW_DO_IT_ALWAYS 0.0
+#define SINGLE_POOL_ABS_SKIP_IT 0.0
+#define SINGLE_POOL_ABS_DO_IT_ALWAYS 0.0
int
str_reorg_instance_interleave_qual ( Info *info)
@@ -1649,9 +1646,7 @@ str_reorg_instance_interleave_trans ( Info *info)
// Should update_stmt be called here?
// It does not seem either harm or help so I'll
// leave it in.
-#if TOGUPD3
update_stmt ( use_stmt);
-#endif
}
// Modify the LHS too
// TBD This code needs to be more general.
@@ -1664,13 +1659,10 @@ str_reorg_instance_interleave_trans ( Info *info)
set_lhs_for ( def, new_ssa_name);
- #if TOGUPD4
update_stmt ( def);
- #endif
+
// This is where we know that ssa_name needs to be replaced
- #if TOGSSA2
release_ssa_name_fn ( func, ssa_name);
- #endif
}
INDENT(-4);
@@ -1885,20 +1877,252 @@ str_reorg_instance_interleave_type_part ( Info *info)
create_new_types ( info);
}
+// Typse for performance qualification
+
+typedef struct reorg_bb_info reorg_bb_info_t;
+ struct reorg_bb_info {
+ basic_block *bb;
+ };
+
+ typedef struct perf_bb_info perf_bb_info_t;
+ typedef struct acc_info acc_info_t;
+ typedef struct var_info var_info_t;
+
+ struct var_info {
+ varpool_node *var;
+ sbitmap *bits;
+ double count;
+ };
+
+ struct acc_info {
+ varpool_node *v;
+ int field_num;
+ };
+
+ struct perf_bb_info {
+ std::vector <var_info_t*> *vari;
+ basic_block *gcc_bb;
+ };
+
+static void account_for_use( tree, std::vector <acc_info_t> *);
+static bool is_array_access( tree);
+
static unsigned int
reorg_perf_qual ( Info *info)
{
+ #if 1
// TBD use design in doc but mark ReorgTypes
// (do_instance_interleave) that qualify instead of deleting them
// unless both dead field elimination and field reorderig are not
// viable (use do_dead_field_elim and do_field_reorder in
// Reorg_type_t.)
-
+
// For the mean time assume if a ReorgType made it here then it's qualified.
for ( int i = 0; i < info->reorg_type->size (); i++ )
{
(*(info->reorg_type))[i].do_instance_interleave = true;
}
+ #else
+ // We are doing a quick and dirty version of performance
+ // qualification for testing purposes and possibly the
+ // initial version of for the main branch.
+ auto reorg_types = info->reorg_type;
+
+ // These are floating point numbers because of the fractional
+ // accesses associated the probabilistic approach taken
+ // below. By taken below I refer to full algorithm and the
+ // quick and dirty initial version.
+
+ // reorg was not possible for these
+ double cache_accesses = 0.0;
+
+ // reorg possible for these. This doesn't mean it is
+ // profitable or even legal for all the types lumped
+ // in together here. However, the sum of this and
+ // cache_accesses should account for all the array
+ // accesses in the program.
+ double cache_accesses_noreorg = 0.0;
+
+ // Perf Analysis
+ struct cgraph_node *node;
+ FOR_EACH_FUNCTION_WITH_GIMPLE_BODY ( node) {
+ struct function *func = DECL_STRUCT_FUNCTION ( node->decl);
+ // Ulgy GCC idiom with global pointer to current function.
+ push_cfun ( func);
+ // TBD
+ class loop *loop;
+ FOR_EACH_LOOP_FN ( func, loop, LI_ONLY_INNERMOST )
+ {
+ size_t num_bbs = loop->num_nodes;
+ basic_block *bbs = get_loop_body ( loop);
+
+ // This stuff probably doesn't matter
+ #if 0
+ int *bbsort = XNEWVEC ( int, num_bbs);
+ reorg_bb_info_t *rbbs =
+ XNEWVEC ( reorg_bb_info_t, num_bbs);
+ topsort( bbs, bbsort, loop->num_nodes);
+ for( i = 0; i < num_bbs; i++ ) {
+ rbbs[i].bb = &bbs[bbsort[i]];
+ }
+ map <int,perf_bb_info_t> bbmap;
+ #endif
+
+ // TBD Stuff here
+
+ // Obtain loop count by looking at all the block counts.
+ unsigned max_count = 0;
+ for ( unsigned i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs [i];
+ max_count = MAX( max_count, bb->count.value ());
+ }
+ DEBUG_L("max_count = %d, nb_iterations_estimate = %ld\n",
+ max_count, loop->nb_iterations_estimate);
+
+ // Originally this was done per bb but now it has to be per
+ // loop. TBD But perf_bb is per loop so we need something similar
+ // per loop.
+
+ std::vector <var_info_t*> pv = bb->vari;
+ for( auto pvi = pv->begin (); pvi != pv->end (); pv = pvi++ ) { // 676
+ tree base_type = base_type_of( pvi->var->decl);
+ ReorgType_t *ri = get_reorgtype_info( base_type, info);
+ // Reorg accounting
+ if( ri != NULL ) {
+ double reorg_nca = 0.0;
+ int nf = number_of_fields_of( base_type);
+ int fi;
+ for( fi = 0; fi < nf; fi++ ) { // 684
+ if( bitmap_bit_p( fi, pv->bits) ) {
+ int fld_width = field_width( base_type, fi);
+ reorg_nca += pvi->count * alignment_effect( fld_width);
+ }
+ }
+ ri->reorg_perf += reorg_nca;
+ } // 699
+
+ // regular accounting
+ double regular_nca = 0.0;
+ sbitmap *cache_model = sbitmap_alloc(1);
+ // TBD NOTE, pv steps on the pv above.
+ vector <var_info_t*> pv2 = perf_bb->vari;
+ for( auto pv2i = pv2->begin (); pv2i != pv2->end; pv2i++ ) { // 704
+ tree base_type = base_type_of( pv2i->var->decl);
+ // create a tiny model of the cache big
+ // enough for this record.
+ int len =
+ ((length( base_type) + L1_CACHE_LINE_SIZE -1)
+ /
+ L1_CACHE_LINE_SIZE)
+ +
+ 1;
+ cache_model = sbitmap_resize( cache_model, len, 0);
+ int nf = number_of_fields_of( base_type);
+ int nrbo = number of record base offsets
+ double accum = 0.0;
+ for( rboi = 0; rboi < nrbo; rboi++ ) {
+ base_offset = offset_for( rboi);
+ // Access accounting
+ int fi;
+ for( fi = 0; fi < nf; fi++ ) {
+ if( bitmap_bit_p( fi, pv2i->bits) ) {
+ int fld_width = field_width( base_type, fi);
+ int fld_offset = field_offset( base_type, fi);
+ int chari;
+ for( chari = 0; chari < fld_width; chari++ ) {
+ int loc = (chari + field_offset + base_offset)
+ /
+ L1_CACHE_LINE_SIZE;
+ bitmap_set_bit(cache_model, loc);
+ }
+ }
+ }
+ accum += popcount( cache_model);
+ bitmap_clear( cache_model);
+ }
+ regular_nca += accum/nrbo;
+
+ } // 739
+ sbitmap_free( cache_model);
+
+ if( ri != NULL ) {
+ ri->regular_perf += regular_nca;
+ cache_accesses_noreorg += regular_nca;
+ } else {
+ cache_accesses += regular_nca;
+ }
+ } // end for each prop_var 748
+
+
+ } //
+ pop_cfun ();
+ }
+
+ // TBD Somebody somewhere needs to compute:
+ // reorg_perf
+ // regular_perf
+ // cache_accesses
+ // cache_accesses_noreorg
+
+ double total_cache_accesses =
+ cache_accesses + cache_accesses_noreorg;
+
+ info->total_cache_accesses = total_cache_accesses;
+
+ //
+ // Decide which reorgTypes fail performance qualification
+ //
+
+ // We have the total accesses per type. Use that to see
+ // if the type qualifies.
+ for ( auto reorgi = reorg_types->begin ();
+ reorgi != reorg_types->end (); reorgi++ )
+ {
+ double with_opt = reorgi->instance_interleave.reorg_perf;
+ double wihtout_opt = reorgi->instance_interleave.regular_perf;
+ double raw_effect = with_opt/wihtout_opt;
+ double absolute_effect =
+ (wihtout_opt - with_opt) / total_cache_accesses;
+
+ // Note, there would need to be a multi-pool case here if
+ // that is every done.
+
+ // If the relative effect is small enough don't bother.
+ if ( raw_effect < SINGLE_POOL_RAW_SKIP_IT )
+ {
+ reorgi->do_instance_interleave = false;
+ continue;
+ }
+ // the relative effect is big enough do it anyway
+ // otherwise look at the absolute effect.
+ if ( raw_effect >= SINGLE_POOL_RAW_DO_IT_ALWAYS )
+ {
+ reorgi->do_instance_interleave = true;
+ continue;
+ }
+ if ( absolute_effect < SINGLE_POOL_ABS_SKIP_IT )
+ {
+ reorgi->do_instance_interleave = false;
+ continue;
+ }
+ if ( absolute_effect >= SINGLE_POOL_ABS_DO_IT_ALWAYS )
+ {
+ reorgi->do_instance_interleave = true;
+ continue;
+ }
+
+ // We fitted a linear equation to the corners of the
+ // effects above and use it to determine when
+ // to disqualify a type
+ double cut_off = cut_off_eq_single_pool ( absolute_effect);
+ if ( raw_effect < cut_off )
+ {
+ reorgi->do_instance_interleave = false;
+ }
+
+ }
+ #endif
}
static void
@@ -1919,6 +2143,35 @@ header ( bool initialize )
}
}
+// TBD I have doubts that this what is really needed
+bool
+is_array_access( tree acc)
+{
+ tree type = TREE_TYPE ( acc);
+ if( TREE_CODE( type) == ARRAY_TYPE )
+ return true;
+ while( POINTER_TYPE_P( type) ) {
+ type = TREE_TYPE( type);
+ if( TREE_CODE( type) == ARRAY_TYPE )
+ return true;
+ }
+ return false;
+}
+
+static void
+account_for_use( tree acc, std::vector <acc_info_t> *acc_info)
+{
+ // determine element of access
+ // find field access number i
+ // find var v
+ varpool_node *v;
+ int i;
+ // TBD
+ acc_info_t ai = { v, i};
+ acc_info->push_back( ai);
+}
+
+
// create_new_types has to crawl "all" the
// types, create new types and transform
// other types that must be changed.
diff --git a/gcc/ipa-structure-reorg.h b/gcc/ipa-structure-reorg.h
index 2abd5ddfa40..8454c42dd89 100644
--- a/gcc/ipa-structure-reorg.h
+++ b/gcc/ipa-structure-reorg.h
@@ -62,6 +62,11 @@ typedef struct ReorgType ReorgType_t;
// I have a set of trees that do not escape
// and I would like to use that for the moment.
// Not sure how it will evolve in the future...
+// REPLY: Erick, I'm not sure I follow what you
+// are asking. In my mind there should
+// be one reorg type for every unique type and
+// if some N types are equivalent then there should
+// be only one reorg type for all of them.
struct ReorgType {
unsigned id;
bool delete_me;