diff options
author | Gary Oblock <gary@amperecomputing.com> | 2020-09-23 16:19:05 -0700 |
---|---|---|
committer | Gary Oblock <gary@amperecomputing.com> | 2020-09-23 16:19:05 -0700 |
commit | c8b48ea1752a5011d8b3feab84cf9c825ee49f0e (patch) | |
tree | 947bc72f296a98d664d32a02979c9a0aae119267 | |
parent | 3341a716f05de15d4da6afaa92e3c3ec37eb23d7 (diff) |
A version of the performance qualification that's all there
and compiles correctly.
Note, the compiler is in a horrible state and what I've done isn't
testable.
-rw-r--r-- | gcc/ipa-str-reorg-instance-interleave.c | 313 | ||||
-rw-r--r-- | gcc/ipa-structure-reorg.c | 7 | ||||
-rw-r--r-- | gcc/ipa-structure-reorg.h | 2 |
3 files changed, 214 insertions, 108 deletions
diff --git a/gcc/ipa-str-reorg-instance-interleave.c b/gcc/ipa-str-reorg-instance-interleave.c index c417926af64..d53d290f980 100644 --- a/gcc/ipa-str-reorg-instance-interleave.c +++ b/gcc/ipa-str-reorg-instance-interleave.c @@ -47,19 +47,22 @@ along with GCC; see the file COPYING3. If not see #include "cfghooks.h" #include "function.h" #include "cfgloop.h" +#include "wide-int.h" static void wrangle_ssa_type( tree, Info_t*); //static bool print_internals (gimple *, void *); static void str_reorg_instance_interleave_qual_part ( Info *); static void str_reorg_instance_interleave_type_part ( Info *); static void header ( bool); +static double cut_off_eq_single_pool( double); +static double alignment_effect( unsigned HOST_WIDE_INT); static void create_new_types ( Info_t *); static void create_a_new_type ( Info_t *, tree); static unsigned int reorg_perf_qual ( Info *); static tree find_coresponding_field ( tree, tree); static void remove_default_def ( tree, struct function *); static void set_lhs_for ( gimple *, tree); - +static basic_block make_bb ( char *, basic_block); // These are local to this file by design #define REORG_SP_PTR_PREFIX "_reorg_SP_ptr_type_" @@ -714,7 +717,7 @@ str_reorg_instance_interleave_trans ( Info *info) basic_block prev_bb = before_bb; // FROM failure_bb = create_empty_block(prev_bb) - basic_block failure_bb = create_empty_bb ( prev_bb); + basic_block failure_bb = make_bb ( "failure_bb", prev_bb); // I need to set the count to zero and there doesn't // seem to be direct way of doing this... failure_bb->count = prev_bb->count - prev_bb->count; @@ -772,7 +775,7 @@ str_reorg_instance_interleave_trans ( Info *info) field; field = DECL_CHAIN( field)) { - basic_block new_bb = create_empty_bb ( prev_order); + basic_block new_bb = make_bb ( "new_bb", prev_order); new_bb->count = prev_order->count; // Nope! Don't do this. //set_immediate_dominator ( CDI_DOMINATORS, new_bb, prev_bb); @@ -941,7 +944,7 @@ str_reorg_instance_interleave_trans ( Info *info) // create basic block for success // // FROM success_bb = create_empty_block(prev_bb_order); - basic_block success_bb = create_empty_bb ( prev_bb); + basic_block success_bb = make_bb ( "succ_bb", prev_bb); success_bb->count = prev_bb->count; // NOTE, it seems I shouldn't be attempting @@ -1880,29 +1883,29 @@ str_reorg_instance_interleave_type_part ( Info *info) // Typse for performance qualification typedef struct reorg_bb_info reorg_bb_info_t; - struct reorg_bb_info { - basic_block *bb; - }; +struct reorg_bb_info { + basic_block *bb; +}; - typedef struct perf_bb_info perf_bb_info_t; - typedef struct acc_info acc_info_t; - typedef struct var_info var_info_t; - - struct var_info { - varpool_node *var; - sbitmap *bits; - double count; - }; - - struct acc_info { - varpool_node *v; - int field_num; - }; - - struct perf_bb_info { - std::vector <var_info_t*> *vari; - basic_block *gcc_bb; - }; +typedef struct perf_bb_info perf_bb_info_t; +typedef struct acc_info acc_info_t; +typedef struct var_info var_info_t; + +struct var_info { + varpool_node *var; + sbitmap *bits; + double count; +}; + +struct acc_info { + varpool_node *v; + int field_num; +}; + +struct perf_loop_info { + std::vector <var_info_t*> *vari; + class loop *gcc_loop; +}; static void account_for_use( tree, std::vector <acc_info_t> *); static bool is_array_access( tree); @@ -1923,6 +1926,17 @@ reorg_perf_qual ( Info *info) (*(info->reorg_type))[i].do_instance_interleave = true; } #else + + // This dom sequence is broken and I'm stalled on it. + // I used the sequence before but the upstream code (not + // my pass) is broken. + if ( dom_info_available_p ( CDI_DOMINATORS) ) + { + free_dominance_info ( CDI_DOMINATORS); + } + + calculate_dominance_info (CDI_DOMINATORS); + // We are doing a quick and dirty version of performance // qualification for testing purposes and possibly the // initial version of for the main branch. @@ -1950,25 +1964,48 @@ reorg_perf_qual ( Info *info) // Ulgy GCC idiom with global pointer to current function. push_cfun ( func); // TBD + std::vector<perf_loop_info> loop_perf; + loop_perf.reserve ( number_of_loops ( func)); class loop *loop; FOR_EACH_LOOP_FN ( func, loop, LI_ONLY_INNERMOST ) { + loop_perf [ loop->num ].vari = new std::vector<var_info_t*>; // ??? + loop_perf [ loop->num ].gcc_loop = loop; size_t num_bbs = loop->num_nodes; basic_block *bbs = get_loop_body ( loop); - // This stuff probably doesn't matter - #if 0 - int *bbsort = XNEWVEC ( int, num_bbs); - reorg_bb_info_t *rbbs = - XNEWVEC ( reorg_bb_info_t, num_bbs); - topsort( bbs, bbsort, loop->num_nodes); - for( i = 0; i < num_bbs; i++ ) { - rbbs[i].bb = &bbs[bbsort[i]]; - } - map <int,perf_bb_info_t> bbmap; - #endif - // TBD Stuff here + for ( unsigned i = 0; i < loop->num_nodes; i++) + { + basic_block bb = bbs [i]; + for ( auto gsi = gsi_start_bb ( bb); !gsi_end_p ( gsi); gsi_next ( &gsi) ) + { + gimple *stmt = gsi_stmt ( gsi); + if ( contains_a_reorgtype ( stmt, info) != NULL ) + { + DEBUG_A("examine: "); + DEBUG_F ( print_gimple_stmt, stderr, stmt, 0); + INDENT(4); + unsigned n_ops = gimple_num_ops( stmt); + tree op; + unsigned ith_op; + for ( ith_op = 0; i < n_ops; i++ ) + { + op = gimple_op ( stmt, ith_op); + ReorgType_t *tri = tree_contains_a_reorgtype (op, info); + if ( tri != NULL ) + { + DEBUG_A(""); + DEBUG_F(print_reorg, stderr, 0, tri); + DEBUG(", "); + DEBUG_F(flexible_print, stderr, op, 1, (dump_flags_t)0); + } + } + INDENT(-4); + + } + } + }continue; // Testing above here // Obtain loop count by looking at all the block counts. unsigned max_count = 0; @@ -1984,76 +2021,95 @@ reorg_perf_qual ( Info *info) // loop. TBD But perf_bb is per loop so we need something similar // per loop. - std::vector <var_info_t*> pv = bb->vari; - for( auto pvi = pv->begin (); pvi != pv->end (); pv = pvi++ ) { // 676 - tree base_type = base_type_of( pvi->var->decl); - ReorgType_t *ri = get_reorgtype_info( base_type, info); - // Reorg accounting - if( ri != NULL ) { - double reorg_nca = 0.0; - int nf = number_of_fields_of( base_type); - int fi; - for( fi = 0; fi < nf; fi++ ) { // 684 - if( bitmap_bit_p( fi, pv->bits) ) { - int fld_width = field_width( base_type, fi); - reorg_nca += pvi->count * alignment_effect( fld_width); - } - } - ri->reorg_perf += reorg_nca; - } // 699 - - // regular accounting - double regular_nca = 0.0; - sbitmap *cache_model = sbitmap_alloc(1); - // TBD NOTE, pv steps on the pv above. - vector <var_info_t*> pv2 = perf_bb->vari; - for( auto pv2i = pv2->begin (); pv2i != pv2->end; pv2i++ ) { // 704 - tree base_type = base_type_of( pv2i->var->decl); - // create a tiny model of the cache big - // enough for this record. - int len = - ((length( base_type) + L1_CACHE_LINE_SIZE -1) - / - L1_CACHE_LINE_SIZE) - + - 1; - cache_model = sbitmap_resize( cache_model, len, 0); - int nf = number_of_fields_of( base_type); - int nrbo = number of record base offsets - double accum = 0.0; - for( rboi = 0; rboi < nrbo; rboi++ ) { - base_offset = offset_for( rboi); - // Access accounting - int fi; - for( fi = 0; fi < nf; fi++ ) { - if( bitmap_bit_p( fi, pv2i->bits) ) { - int fld_width = field_width( base_type, fi); - int fld_offset = field_offset( base_type, fi); - int chari; - for( chari = 0; chari < fld_width; chari++ ) { - int loc = (chari + field_offset + base_offset) - / - L1_CACHE_LINE_SIZE; - bitmap_set_bit(cache_model, loc); + std::vector <var_info_t*> *pv = loop_perf [ loop->num].vari; + for ( auto pvi = pv->begin (); pvi != pv->end (); pvi++ ) + { // 676 + tree base_type = base_type_of( ( *pvi)->var->decl); + ReorgType_t *ri = get_reorgtype_info ( base_type, info); + // Reorg accounting + if( ri != NULL ) + { + double reorg_nca = 0.0; + int fi; + tree field; + for( field = TYPE_FIELDS ( ri->gcc_type), fi = 0; + field; + field = DECL_CHAIN ( field), fi++ ) // 684 + { + if ( bitmap_bit_p ( *(*pvi)->bits, fi) ) + { + unsigned HOST_WIDE_INT fld_width = + tree_to_uhwi ( DECL_SIZE ( field)); + reorg_nca += max_count * alignment_effect ( fld_width); + } } + ri->instance_interleave.reorg_perf += reorg_nca; + } // 699 + + // regular accounting + double regular_nca = 0.0; + sbitmap cache_model = sbitmap_alloc(1); + // TBD NOTE, pv steps on the pv above. + std::vector <var_info_t*> *pv2 = loop_perf[ loop->num].vari; + for( auto pv2i = pv2->begin (); pv2i != pv2->end (); pv2i++ ) { // 704 + tree base_type = base_type_of ( (*pv2i)->var->decl); + // create a tiny model of the cache big + // enough for this record. + unsigned HOST_WIDE_INT len = + (( tree_to_uhwi ( DECL_SIZE ( base_type)) + + + param_l1_cache_line_size -1) + / + param_l1_cache_line_size) + + + 1; + cache_model = sbitmap_resize( cache_model, (unsigned) len, 0); + double accum = 0.0; + int nrbo = 0; + for ( auto field_ex = TYPE_FIELDS ( base_type); + field_ex; + field_ex = DECL_CHAIN ( field_ex) ) + { + nrbo++; + unsigned HOST_WIDE_INT base_offset = + tree_to_uhwi ( DECL_FIELD_OFFSET( field_ex)); + // Access accounting + int fi = 0; + for ( auto field = TYPE_FIELDS ( base_type); + field; + field = DECL_CHAIN ( field), fi++) + { + if ( bitmap_bit_p ( *(*pv2i)->bits, fi) ) + { + unsigned HOST_WIDE_INT fld_width, fld_offset; + fld_width = tree_to_uhwi ( DECL_SIZE ( field)); + fld_offset = tree_to_uhwi ( DECL_FIELD_OFFSET ( field)); + int chari; + for ( chari = 0; chari < fld_width; chari++ ) + { + int loc = (chari + fld_offset + base_offset) + / + param_l1_cache_line_size; + bitmap_set_bit ( cache_model, loc); + } + } + } + accum += bitmap_count_bits ( cache_model); + bitmap_clear ( cache_model); } - } - accum += popcount( cache_model); - bitmap_clear( cache_model); + regular_nca += accum / nrbo; + + } // 739 + sbitmap_free ( cache_model); + + if( ri != NULL ) { + ri->instance_interleave.regular_perf += regular_nca; + cache_accesses_noreorg += regular_nca; + } else { + cache_accesses += regular_nca; } - regular_nca += accum/nrbo; - - } // 739 - sbitmap_free( cache_model); - - if( ri != NULL ) { - ri->regular_perf += regular_nca; - cache_accesses_noreorg += regular_nca; - } else { - cache_accesses += regular_nca; - } - } // end for each prop_var 748 - + } // end for each prop_var 748 + } // pop_cfun (); @@ -2120,11 +2176,44 @@ reorg_perf_qual ( Info *info) { reorgi->do_instance_interleave = false; } - } + + free_dominance_info ( CDI_DOMINATORS); + #endif } +#define SINGLE_POOL_SLOPE \ + ((SINGLE_POOL_RAW_DO_IT_ALWAYS - SINGLE_POOL_RAW_SKIP_IT) \ + / \ + (SINGLE_POOL_ABS_DO_IT_ALWAYS - SINGLE_POOL_ABS_SKIP_IT)) + +#define SINGLE_POOL_INTERSECT \ + (SINGLE_POOL_RAW_SKIP_IT \ + - \ + SINGLE_POOL_SLOPE * SINGLE_POOL_ABS_SKIP_IT) + +static double +cut_off_eq_single_pool( double x) +{ + return SINGLE_POOL_SLOPE * x + SINGLE_POOL_INTERSECT; +} + +static double +alignment_effect( unsigned HOST_WIDE_INT width ) +{ + unsigned HOST_WIDE_INT times = param_l1_cache_line_size / width; // ?? + unsigned HOST_WIDE_INT rem = param_l1_cache_line_size % width; + if( rem == 0 ) { + return 1.0; + } + unsigned HOST_WIDE_INT m, n, g; + g = gcd( param_l1_cache_line_size, width); + m = param_l1_cache_line_size / g; + n = width / g; + return 1.0 + (n - 1.0)/m; +} + static void header ( bool initialize ) { @@ -2435,3 +2524,15 @@ remove_default_def ( tree default_def, struct function *func) } } } + +static basic_block +make_bb ( char *msg, basic_block prev_bb ) +{ + basic_block ret = create_empty_bb ( prev_bb); + DEBUG_A( "make_bb ( %s, <bb %d>/%p ): <bb %d>/%p, prev: <bb %d>/%p, next: <bb %d>/%p\n", + msg, prev_bb->index, prev_bb, + ret->index, ret, + ret->prev_bb->index, ret->prev_bb, + ret->next_bb->index, ret->next_bb); + return ret; +} diff --git a/gcc/ipa-structure-reorg.c b/gcc/ipa-structure-reorg.c index 9b3fdf95211..7df8f5846d4 100644 --- a/gcc/ipa-structure-reorg.c +++ b/gcc/ipa-structure-reorg.c @@ -116,6 +116,12 @@ int debug_indenting = 0; static unsigned int ipa_structure_reorg ( void) { + // Here to test the sanity of the compiler (yes, it's insane.) + //if ( dom_info_available_p ( CDI_DOMINATORS) ) + // { + // free_dominance_info ( CDI_DOMINATORS); + // } + std::vector <ReorgType_t> Reorg_Type; std::vector <ReorgType_t> Saved_Reorg_Type; std::vector <ProgDecl_t> Prog_Decl; @@ -260,7 +266,6 @@ final_debug_info ( Info *info) static unsigned int reorg_analysis ( Info *info) { - // TODO: // Gary, this main "analysis" method seems to have a lot of // instance interleave specific code. Shouldn't this method diff --git a/gcc/ipa-structure-reorg.h b/gcc/ipa-structure-reorg.h index 8454c42dd89..286e57c2380 100644 --- a/gcc/ipa-structure-reorg.h +++ b/gcc/ipa-structure-reorg.h @@ -253,7 +253,7 @@ extern bool print_internals (gimple *, void *); // defined marcos in the code. However, some of uses // should obviously be converted to dump file information. -#define DEBUGGING 0 +#define DEBUGGING 1 #if DEBUGGING enum Display { Show_nothing, |