diff options
Diffstat (limited to 'gcc/ipa-str-reorg-instance-interleave.c')
-rw-r--r-- | gcc/ipa-str-reorg-instance-interleave.c | 1257 |
1 files changed, 1131 insertions, 126 deletions
diff --git a/gcc/ipa-str-reorg-instance-interleave.c b/gcc/ipa-str-reorg-instance-interleave.c index 1d0213f1427..769e65a55ae 100644 --- a/gcc/ipa-str-reorg-instance-interleave.c +++ b/gcc/ipa-str-reorg-instance-interleave.c @@ -19,21 +19,24 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +#include <vector> +#include <map> +#include <set> +#include <list> +#include <algorithm> #include "config.h" #include "system.h" #include "coretypes.h" #include "backend.h" #include "tree.h" #include "tree-ssa.h" +#include "tree-ssa-loop-ivopts.h" #include "tree-dfa.h" #include "gimple.h" #include "tree-pass.h" #include "cgraph.h" #include "gimple-iterator.h" #include "pretty-print.h" -#include <vector> -#include <map> -#include <set> #include "ipa-structure-reorg.h" #include "dumpfile.h" #include "tree-pretty-print.h" @@ -49,13 +52,26 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "wide-int.h" +typedef struct acc_base_info acc_base_info_t; +typedef struct acc_info acc_info_t; +typedef struct varInfo varInfo_t; + static void wrangle_ssa_type( tree, Info_t*); //static bool print_internals (gimple *, void *); static void str_reorg_instance_interleave_qual_part ( Info *); static void str_reorg_instance_interleave_type_part ( Info *); static void header ( bool); +static void print_var_infos ( FILE *, std::vector<varInfo_t> &); +static void compress_acc_infos ( std::vector <acc_info_t>); +static void print_acc_info ( FILE *, acc_info_t *); +static void print_acc_infos ( FILE *, std::vector <acc_info_t>); +static bool acc_lt ( const acc_info_t&, const acc_info_t&); +static bool acc_eq ( const acc_info_t&, const acc_info_t&); +static bool all_but_field_eq ( const acc_info_t&, const acc_info_t&); static double cut_off_eq_single_pool( double); static double alignment_effect( unsigned HOST_WIDE_INT); +static void tell_me_about_ssa_name ( tree, int); +static void analyze_access ( tree , acc_info_t *); static void create_new_types ( Info_t *); static void create_a_new_type ( Info_t *, tree); static unsigned int reorg_perf_qual ( Info *); @@ -120,6 +136,9 @@ str_reorg_instance_interleave_trans ( Info *info) print_program ( info->reorg_dump_file, PRINT_FORMAT, 4, info); } + fprintf ( stderr, "Bypassing str_reorg_instance_interleave_trans for experiment\n"); + return 0; + DEBUG ("INTERNALS PRINT\n"); DEBUG_F (apply_to_all_gimple, print_internals, true, (void *)info); @@ -255,7 +274,7 @@ str_reorg_instance_interleave_trans ( Info *info) tree ro_side = ro_on_left ? lhs : rhs; tree nonro_side = ro_on_left ? rhs : lhs; - switch ( recognize_op ( ro_side, info) ) // "a->f" + switch ( recognize_op ( ro_side, true, info) ) // "a->f" { case ReorgOpT_Indirect: { @@ -1688,10 +1707,23 @@ str_reorg_instance_interleave_trans ( Info *info) fprintf ( info->reorg_dump_file, "End of str_reorg_instance_interleave_trans:\n"); print_program ( info->reorg_dump_file, PRINT_FORMAT, 4, info); } - + // TBD Should this be a diagnostic or not? DEBUG ("INTERNALS PRINT\n"); DEBUG_F (apply_to_all_gimple, print_internals, true, (void *)info); + + // Spin through all the functions and recompute the dominace info. + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY ( node) { + struct function *func = DECL_STRUCT_FUNCTION ( node->decl); + push_cfun ( func); + if ( dom_info_available_p ( CDI_DOMINATORS) ) + { + free_dominance_info ( CDI_DOMINATORS); + } + calculate_dominance_info (CDI_DOMINATORS); + pop_cfun (); + } + return 0; } // Note, the following code might be a bit overly simplistic. @@ -1888,31 +1920,63 @@ struct reorg_bb_info { }; typedef struct perf_bb_info perf_bb_info_t; -typedef struct acc_info acc_info_t; -typedef struct var_info var_info_t; -struct var_info { - varpool_node *var; - sbitmap *bits; - double count; +struct acc_base_info { + bool a_def_def; + bool a_decl; + bool a_func; + bool has_induct_var_acc; + bool multi_induct; + bool complicated; + // TBD Note could look at sign of operation for the + // induction. Variables moving forward are different (cache access + // wise) that those moving backward do the sort/compress shouldn't + // lump them together. + tree acc_base; + tree induct_base; + gimple *function; }; -struct acc_info { - varpool_node *v; - int field_num; +struct varInfo { + // Varpool_nodes are a pain to get at so I'll just + // use the first entry in a run of access info enties + // where all of the information but the field is the + // same. + //varpool_node *var; + acc_info_t *rep_access; + // This seems bit map scheme seems tedious and unnecessay. + // just use the fields + // sbitmap *bits; + std::list<tree> fields; + // The count doesn't vary in the simplified scheme + //double count; }; -struct perf_loop_info { - std::vector <var_info_t*> *vari; - class loop *gcc_loop; +struct acc_info { + // trying to get to the varpool seems too hard + // so I'll try for he decl + //varpool_node *v; + tree access; + tree field; // int field_num; + acc_base_info_t base_info; + ReorgType_t *reorg; }; -static void account_for_use( tree, std::vector <acc_info_t> *); +//struct perf_loop_info { +// std::vector <varInfo_t*> *vari; +// class loop *gcc_loop; +//}; + +static void account_for_access( tree, tree, std::vector <acc_info_t> *, Info_t *); static bool is_array_access( tree); static unsigned int reorg_perf_qual ( Info *info) { + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file, "Doing Performance Qualification\n"); + } DEBUG_L("reorg_perf_qual:\n"); #if 1 // TBD use design in doc but mark ReorgTypes @@ -1926,7 +1990,8 @@ reorg_perf_qual ( Info *info) { (*(info->reorg_type))[i].do_instance_interleave = true; } - #else + #endif + #if 1 // We are doing a quick and dirty version of performance // qualification for testing purposes and possibly the // initial version of for the main branch. @@ -1949,8 +2014,18 @@ reorg_perf_qual ( Info *info) // Perf Analysis struct cgraph_node *node; + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY ( node) { struct function *func = DECL_STRUCT_FUNCTION ( node->decl); + + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file, "Function: "); + print_generic_expr ( info->reorg_dump_file, + TREE_TYPE(TREE_TYPE( func->decl)), + (dump_flags_t)0); + } + // Ulgy GCC idiom with global pointer to current function. // However, the dominace calculations other things need it. push_cfun ( func); @@ -1960,156 +2035,622 @@ reorg_perf_qual ( Info *info) free_dominance_info ( CDI_DOMINATORS); } calculate_dominance_info (CDI_DOMINATORS); - + + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file," Function: %s\n", + lang_hooks.decl_printable_name ( func->decl, 2)); + } + + // TBD - std::vector<perf_loop_info> loop_perf; - loop_perf.reserve ( number_of_loops ( func)); + //std::vector<perf_loop_info> loop_perf; + //loop_perf.reserve ( number_of_loops ( func)); class loop *loop; + bool missing_cases = false; FOR_EACH_LOOP_FN ( func, loop, LI_ONLY_INNERMOST ) { - loop_perf [ loop->num ].vari = new std::vector<var_info_t*>; // ??? - loop_perf [ loop->num ].gcc_loop = loop; + //We don't need these + //loop_perf [ loop->num ].vari = new std::vector<varInfo_t*>; // ??? + //loop_perf [ loop->num ].gcc_loop = loop; + + std::vector<acc_info_t> acc_info; + std::vector<varInfo_t> var_info; + size_t num_bbs = loop->num_nodes; basic_block *bbs = get_loop_body ( loop); - // TBD Stuff here + // For the basic blocks in the the loop for ( unsigned i = 0; i < loop->num_nodes; i++) { basic_block bb = bbs [i]; + //DEBUG_A("BB %i:\n", bb->index); + INDENT(4); for ( auto gsi = gsi_start_bb ( bb); !gsi_end_p ( gsi); gsi_next ( &gsi) ) { gimple *stmt = gsi_stmt ( gsi); - if ( contains_a_reorgtype ( stmt, info) != NULL ) + //DEBUG_A("examine: "); + //DEBUG_F ( print_gimple_stmt, stderr, stmt, TDF_DETAILS); + INDENT(4); + unsigned n_ops = gimple_num_ops( stmt); + tree op; + unsigned ith_op; + for ( ith_op = 0; ith_op < n_ops; ith_op++ ) { - DEBUG_A("examine: "); - DEBUG_F ( print_gimple_stmt, stderr, stmt, 0); - INDENT(4); - unsigned n_ops = gimple_num_ops( stmt); - tree op; - unsigned ith_op; - for ( ith_op = 0; i < n_ops; i++ ) + op = gimple_op ( stmt, ith_op); + // It's lieing about the number of operands... so... + if ( op == NULL ) continue; + //DEBUG_A("op[%d]: %p", ith_op, op); + //DEBUG_F(flexible_print, stderr, op, 0, (dump_flags_t)0); + ReorgType_t *tri = tree_contains_a_reorgtype ( op, info); + enum ReorgOpTrans optran = recognize_op ( op, false, info); + // TBD This is where we need to remember + // each germane access + const char *s = optrans_to_str( optran); + //DEBUG_A(", %s\n", s); + if ( tri != NULL ) { - op = gimple_op ( stmt, ith_op); - ReorgType_t *tri = tree_contains_a_reorgtype (op, info); - if ( tri != NULL ) - { - DEBUG_A(""); - DEBUG_F(print_reorg, stderr, 0, tri); - DEBUG(", "); - DEBUG_F(flexible_print, stderr, op, 1, (dump_flags_t)0); - } + //DEBUG(", "); + //DEBUG_F(print_reorg, stderr, 0, tri); + } + else + //DEBUG("\n"); + switch ( optran) + { + case ReorgOpT_Indirect: + { + // TBD + // Is the var an induction variable for this loop? + // If so find the assocaite varpool_node and push + // it and the field onto var_acc_info; + tree op_var = TREE_OPERAND( op, 0); + tree op_field = TREE_OPERAND( op, 1); + // Since doesn't have an easily exposed mechanism + // for induction variable I'm hand waving here. + if ( !expr_invariant_in_loop_p ( loop, op_var) ) + { + account_for_access ( op_var, op_field, &acc_info, info); + } + } + break; + case ReorgOpT_Array: + { + // TBD + // Is the var an induction variable for this loop? + // If so find the assocaite varpool_node and push + // it and the field onto var_acc_info; + tree op_var = TREE_OPERAND( op, 0); + tree op_field = TREE_OPERAND( op, 1); + // Since doesn't have an easily exposed mechanism + // for induction variable I'm hand waving here. + if ( !expr_invariant_in_loop_p ( loop, op_var) ) + { + account_for_access ( op_var, op_field, &acc_info, info); + } + } + case ReorgOpT_AryDir: + case ReorgOpT_Deref: // ?? + missing_cases = true; } - INDENT(-4); - } + INDENT(-4); } - }continue; // Testing above here + INDENT(-4); + } + + //DEBUG_L("Dumping acc_info:\n"); + //for ( auto aci = acc_info.begin (); aci != acc_info.end (); aci++ ) + // { + // DEBUG_A("variable:\n"); + // DEBUG_F( tell_me_about_ssa_name, (*aci).access, debug_indenting + 4); + // DEBUG_A("field: "); + // DEBUG_F( flexible_print, stderr, (*aci).field, 1, (dump_flags_t)0); + // } + //DEBUG_A("before sort: \n"); + //DEBUG_F(print_acc_infos, stderr, acc_info ); + + // Sort and compact the access infos. + stable_sort ( acc_info.begin (), acc_info.end (), acc_lt); + + //DEBUG_A("before compress: \n"); + //DEBUG_F(print_acc_infos, stderr, acc_info ); + + // Sort and compact the access infos. + std::stable_sort ( acc_info.begin (), acc_info.end (), acc_lt); + + compress_acc_infos ( acc_info ); + + DEBUG_A("after compress: \n"); + DEBUG_F(print_acc_infos, stderr, acc_info ); + // Obtain loop count by looking at all the block counts. - unsigned max_count = 0; + unsigned loop_count = 0; for ( unsigned i = 0; i < loop->num_nodes; i++) { basic_block bb = bbs [i]; - max_count = MAX( max_count, bb->count.value ()); + loop_count = MAX( loop_count, bb->count.value ()); } - DEBUG_L("max_count = %d, nb_iterations_estimate = %ld\n", - max_count, loop->nb_iterations_estimate); + DEBUG_L("loop_count = %d, nb_iterations_estimate = %ld\n", + loop_count, loop->nb_iterations_estimate); + + // Create the variable infos + varInfo_t var_entry; + var_entry.rep_access = &acc_info[0]; + unsigned len = acc_info.size (); + if ( len == 1 ) + { + var_entry.fields.push_front ( acc_info[0].field); + } + else + { + unsigned i, j; + for ( i = 0, j = 1; j < len; j++ ) + { + acc_info_t *a_of_i = &acc_info[i]; + acc_info_t *a_of_j = &acc_info[j]; + var_entry.fields.push_front ( a_of_i->field); + if ( !all_but_field_eq ( *a_of_i, *a_of_j ) ) + { + var_info.push_back( var_entry); + var_entry.rep_access = a_of_j; + var_entry.fields.clear (); + a_of_i = a_of_j; + } + } + } + var_info.push_back( var_entry); + + print_var_infos ( stderr, var_info); // Originally this was done per bb but now it has to be per // loop. TBD But perf_bb is per loop so we need something similar // per loop. - std::vector <var_info_t*> *pv = loop_perf [ loop->num].vari; - for ( auto pvi = pv->begin (); pvi != pv->end (); pvi++ ) +#if 1 + for ( auto pvi = var_info.begin (); pvi != var_info.end (); pvi++ ) { // 676 - tree base_type = base_type_of( ( *pvi)->var->decl); - ReorgType_t *ri = get_reorgtype_info ( base_type, info); + //tree base_type = base_type_of( pvi->rep_access.access); + ReorgType_t *ri = pvi->rep_access->reorg; // Reorg accounting if( ri != NULL ) { double reorg_nca = 0.0; - int fi; - tree field; - for( field = TYPE_FIELDS ( ri->gcc_type), fi = 0; - field; - field = DECL_CHAIN ( field), fi++ ) // 684 + + for ( auto fldi = pvi->fields.begin (); fldi != pvi->fields.end (); fldi++ ) { - if ( bitmap_bit_p ( *(*pvi)->bits, fi) ) + unsigned HOST_WIDE_INT fld_width = + tree_to_uhwi ( DECL_SIZE ( *fldi)); + reorg_nca += loop_count * alignment_effect ( fld_width); + } + ri->instance_interleave.reorg_perf += reorg_nca; + } // 699 + +// add regular accounting look 2229 2321 + // regular accounting + double regular_nca = 0.0; + sbitmap cache_model = sbitmap_alloc(1); + + for( auto pv2i = var_info.begin (); pv2i != var_info.end (); pv2i++ ) + { // 704 + tree access = pv2i->rep_access->base_info.acc_base; + tree base_type; // = base_type_of ( access); + if ( pv2i->rep_access->reorg != NULL ) { + base_type = pv2i->rep_access->reorg->gcc_type; + } + else + { + if ( TREE_TYPE ( access ) != NULL ) + { + base_type = base_type_of ( access); + } + else { - unsigned HOST_WIDE_INT fld_width = - tree_to_uhwi ( DECL_SIZE ( field)); - reorg_nca += max_count * alignment_effect ( fld_width); + gcc_assert (0); + } + } + + bool base_type_isa_decl = DECL_P ( base_type ); + + // create a tiny model of the cache big + // enough for this record. + tree base_type_size = base_type_isa_decl ? + DECL_SIZE ( base_type ) + : + TYPE_SIZE ( base_type); + + unsigned HOST_WIDE_INT len = + (( tree_to_uhwi ( base_type_size) + + + param_l1_cache_line_size -1) + / + param_l1_cache_line_size) + + + 1; + + // TBD Does this clear the bits??? It needs to. + // Each bit represents a cache line. + cache_model = sbitmap_resize( cache_model, (unsigned) len, 0); + double accum = 0.0; + int nrbo = 0; + if ( base_type_isa_decl ) + { + for ( auto field_ex = TYPE_FIELDS ( base_type); + field_ex; + field_ex = DECL_CHAIN ( field_ex) ) + { + nrbo++; + // Looking back on my design I don't have a clue + // why this is here and what it does. Sigh... + unsigned HOST_WIDE_INT base_offset = + tree_to_uhwi ( DECL_FIELD_OFFSET( field_ex)); + // Access accounting + + for ( auto fldi = pv2i->fields.begin (); + fldi != pv2i->fields.end (); fldi++ ) + { + tree field = *fldi; + unsigned HOST_WIDE_INT fld_width, fld_offset; + fld_width = tree_to_uhwi ( DECL_SIZE ( field)); + fld_offset = tree_to_uhwi ( DECL_FIELD_OFFSET ( field)); + int chari; + for ( chari = 0; chari < fld_width; chari++ ) + { + int loc = (chari + fld_offset + base_offset) + / + param_l1_cache_line_size; + bitmap_set_bit ( cache_model, loc); + } + } + + accum += bitmap_count_bits ( cache_model); + bitmap_clear ( cache_model); } } + else + { + nrbo = 1; + accum++; + } + double amount = accum / nrbo; + regular_nca += amount; + } // 739 + sbitmap_free ( cache_model); + + if( ri != NULL ) { + ri->instance_interleave.regular_perf += regular_nca; + cache_accesses_noreorg += regular_nca; + } else { + cache_accesses += regular_nca; + } + } // end for prop_var + +#else + for ( auto pvi = var_info.begin (); pvi != var_info.end (); pvi++ ) + { // 676 + //tree base_type = base_type_of( pvi->rep_access.access); + ReorgType_t *ri = pvi->rep_access->reorg; + // Reorg accounting + if( ri != NULL ) + { + double reorg_nca = 0.0; + + for ( auto fldi = pvi->fields.begin (); fldi != pvi->fields.end (); fldi++ ) + { + unsigned HOST_WIDE_INT fld_width = + tree_to_uhwi ( DECL_SIZE ( *fldi)); + reorg_nca += loop_count * alignment_effect ( fld_width); + } ri->instance_interleave.reorg_perf += reorg_nca; +////////////////// DIFFERENCES START HERE } // 699 // regular accounting double regular_nca = 0.0; sbitmap cache_model = sbitmap_alloc(1); - // TBD NOTE, pv steps on the pv above. - std::vector <var_info_t*> *pv2 = loop_perf[ loop->num].vari; - for( auto pv2i = pv2->begin (); pv2i != pv2->end (); pv2i++ ) { // 704 - tree base_type = base_type_of ( (*pv2i)->var->decl); - // create a tiny model of the cache big - // enough for this record. - unsigned HOST_WIDE_INT len = - (( tree_to_uhwi ( DECL_SIZE ( base_type)) - + - param_l1_cache_line_size -1) - / - param_l1_cache_line_size) - + - 1; - cache_model = sbitmap_resize( cache_model, (unsigned) len, 0); - double accum = 0.0; - int nrbo = 0; - for ( auto field_ex = TYPE_FIELDS ( base_type); - field_ex; - field_ex = DECL_CHAIN ( field_ex) ) - { - nrbo++; - unsigned HOST_WIDE_INT base_offset = - tree_to_uhwi ( DECL_FIELD_OFFSET( field_ex)); - // Access accounting - int fi = 0; - for ( auto field = TYPE_FIELDS ( base_type); - field; - field = DECL_CHAIN ( field), fi++) - { - if ( bitmap_bit_p ( *(*pv2i)->bits, fi) ) - { - unsigned HOST_WIDE_INT fld_width, fld_offset; - fld_width = tree_to_uhwi ( DECL_SIZE ( field)); - fld_offset = tree_to_uhwi ( DECL_FIELD_OFFSET ( field)); - int chari; - for ( chari = 0; chari < fld_width; chari++ ) - { - int loc = (chari + fld_offset + base_offset) - / - param_l1_cache_line_size; - bitmap_set_bit ( cache_model, loc); - } - } - } - accum += bitmap_count_bits ( cache_model); - bitmap_clear ( cache_model); - } - regular_nca += accum / nrbo; + + for( auto pv2i = var_info.begin (); pv2i != var_info.end (); pv2i++ ) + { // 704 + tree access = pv2i->rep_access->base_info.acc_base; + tree base_type; // = base_type_of ( access); + if ( pv2i->rep_access->reorg != NULL ) { + base_type = pv2i->rep_access->reorg->gcc_type; + } + else + { + if ( TREE_TYPE ( access ) != NULL ) + { + base_type = base_type_of ( access); + } + else + { + gcc_assert (0); + } + } + + Bool base_type_isa_decl = DECL_P ( base_type ); + + // create a tiny model of the cache big + // enough for this record. + tree base_type_size = base_type_isa_decl ? + DECL_SIZE ( base_type ) + : + TYPE_SIZE ( base_type); + + unsigned HOST_WIDE_INT len = + (( tree_to_uhwi ( base_type_size) + + + param_l1_cache_line_size -1) + / + param_l1_cache_line_size) + + + 1; - } // 739 + // TBD Does this clear the bits??? It needs to. + // Each bit represents a cache line. + cache_model = sbitmap_resize( cache_model, (unsigned) len, 0); + double accum = 0.0; + int nrbo = 0; + if ( base_type_isa_decl ) + { + for ( auto field_ex = TYPE_FIELDS ( base_type); + field_ex; + field_ex = DECL_CHAIN ( field_ex) ) + { + nrbo++; + // Looking back on my design I don't have a clue + // why this is here and what it does. Sigh... + unsigned HOST_WIDE_INT base_offset = + tree_to_uhwi ( DECL_FIELD_OFFSET( field_ex)); + // Access accounting + + for ( auto fldi = pv2i->fields.begin (); + fldi != pv2i->fields.end (); fldi++ ) + { + tree field = *fldi; + unsigned HOST_WIDE_INT fld_width, fld_offset; + fld_width = tree_to_uhwi ( DECL_SIZE ( field)); + fld_offset = tree_to_uhwi ( DECL_FIELD_OFFSET ( field)); + int chari; + for ( chari = 0; chari < fld_width; chari++ ) + { + int loc = (chari + fld_offset + base_offset) + / + param_l1_cache_line_size; + bitmap_set_bit ( cache_model, loc); + } + } + + accum += bitmap_count_bits ( cache_model); + bitmap_clear ( cache_model); + } + } + else + { + nrbo = 1; + accum++; + } + double amount = accum / nrbo; + regular_nca += amount; + } // 739 sbitmap_free ( cache_model); if( ri != NULL ) { ri->instance_interleave.regular_perf += regular_nca; cache_accesses_noreorg += regular_nca; + } // 699 + + // regular accounting + double regular_nca = 0.0; + sbitmap cache_model = sbitmap_alloc(1); + + for( auto pv2i = var_info.begin (); pv2i != var_info.end (); pv2i++ ) + { // 704 + tree access = pv2i->rep_access->base_info.acc_base; + tree base_type; // = base_type_of ( access); + if ( pv2i->rep_access->reorg != NULL ) + { + base_type = pv2i->rep_access->reorg->gcc_type; + } + else + { + if ( TREE_TYPE ( access ) != NULL ) + { + base_type = base_type_of ( access); + } + else + { + gcc_assert (0); + } + } + + bool base_type_isa_decl = DECL_P ( base_type ); + + // create a tiny model of the cache big + // enough for this record. + tree base_type_size = base_type_isa_decl ? + DECL_SIZE ( base_type ) + : + TYPE_SIZE ( base_type); + + unsigned HOST_WIDE_INT len = + (( tree_to_uhwi ( base_type_size) + + + param_l1_cache_line_size -1) + / + param_l1_cache_line_size) + + + 1; + + // TBD Does this clear the bits??? It needs to. + // Each bit represents a cache line. + cache_model = sbitmap_resize( cache_model, (unsigned) len, 0); + double accum = 0.0; + int nrbo = 0; + if ( base_type_isa_decl ) + { + for ( auto field_ex = TYPE_FIELDS ( base_type); + field_ex; + field_ex = DECL_CHAIN ( field_ex) ) + { + nrbo++; + // Looking back on my design I don't have a clue + // why this is here and what it does. Sigh... + unsigned HOST_WIDE_INT base_offset = + tree_to_uhwi ( DECL_FIELD_OFFSET( field_ex)); + // Access accounting + + for ( auto fldi = pv2i->fields.begin (); + fldi != pv2i->fields.end (); fldi++ ) + { + tree field = *fldi; + unsigned HOST_WIDE_INT fld_width, fld_offset; + fld_width = tree_to_uhwi ( DECL_SIZE ( field)); + fld_offset = tree_to_uhwi ( DECL_FIELD_OFFSET ( field)); + int chari; + for ( chari = 0; chari < fld_width; chari++ ) + { + int loc = (chari + fld_offset + base_offset) + / + param_l1_cache_line_size; + bitmap_set_bit ( cache_model, loc); + } + } + + accum += bitmap_count_bits ( cache_model); + bitmap_clear ( cache_model); + } + } + else + { + nrbo = 1; + accum++; + } + double amount = accum / nrbo; + regular_nca += amount; + } // 739 + sbitmap_free ( cache_model); + + if( ri != NULL ) { + ri->instance_interleave.regular_perf += regular_nca; + cache_accesses_noreorg += regular_nca; + ///////////// DIFFERENCE NOTED HERE (small, next 4 lines not sane) + ri->instance_interleave.reorg_perf += reorg_nca; + //////////// DIFFERENCE NOTED HERE + + } // 699 + + // regular accounting + double regular_nca = 0.0; + sbitmap cache_model = sbitmap_alloc(1); + + for( auto pv2i = var_info.begin (); pv2i != var_info.end (); pv2i++ ) + { // 704 + tree access = pv2i->rep_access->base_info.acc_base; + tree base_type; // = base_type_of ( access); + if ( pv2i->rep_access->reorg != NULL ) + { + base_type = pv2i->rep_access->reorg->gcc_type; + } + else + { + if ( TREE_TYPE ( access ) != NULL ) + { + base_type = base_type_of ( access); + } + else + { + gcc_assert (0); + } + } + + bool base_type_isa_decl = DECL_P ( base_type ); + + // create a tiny model of the cache big + // enough for this record. + tree base_type_size = base_type_isa_decl ? + DECL_SIZE ( base_type ) + : + TYPE_SIZE ( base_type); + + unsigned HOST_WIDE_INT len = + (( tree_to_uhwi ( base_type_size) + + + param_l1_cache_line_size -1) + / + param_l1_cache_line_size) + + + 1; + + // TBD Does this clear the bits??? It needs to. + // Each bit represents a cache line. + cache_model = sbitmap_resize( cache_model, (unsigned) len, 0); + double accum = 0.0; + int nrbo = 0; + if ( base_type_isa_decl ) + { + for ( auto field_ex = TYPE_FIELDS ( base_type); + field_ex; + field_ex = DECL_CHAIN ( field_ex) ) + { + nrbo++; + // Looking back on my design I don't have a clue + // why this is here and what it does. Sigh... + unsigned HOST_WIDE_INT base_offset = + tree_to_uhwi ( DECL_FIELD_OFFSET( field_ex)); + // Access accounting + + for ( auto fldi = pv2i->fields.begin (); + fldi != pv2i->fields.end (); fldi++ ) + { + tree field = *fldi; + unsigned HOST_WIDE_INT fld_width, fld_offset; + fld_width = tree_to_uhwi ( DECL_SIZE ( field)); + fld_offset = tree_to_uhwi ( DECL_FIELD_OFFSET ( field)); + int chari; + for ( chari = 0; chari < fld_width; chari++ ) + { + int loc = (chari + fld_offset + base_offset) + / + param_l1_cache_line_size; + bitmap_set_bit ( cache_model, loc); + } + } + + accum += bitmap_count_bits ( cache_model); + bitmap_clear ( cache_model); + } + } + else + { + nrbo = 1; + accum++; + } + double amount = accum / nrbo; + regular_nca += amount; + } // 739 + sbitmap_free ( cache_model); + + if( ri != NULL ) { + ri->instance_interleave.regular_perf += regular_nca; + cache_accesses_noreorg += regular_nca; + //////////// DIFFERENCE NOTED HERE + // Code after here likely sane. + // Code above lines - 2618 same as - 2488 } else { cache_accesses += regular_nca; } } // end for each prop_var 748 - - - } // +#endif + } // + + if ( info->show_perf_qualify && missing_cases ) + { + fprintf ( info->reorg_dump_file, + " Ignored unimplemented cases when finding accesses.\n"); + } + + free_dominance_info ( CDI_DOMINATORS); pop_cfun (); } @@ -2124,6 +2665,11 @@ reorg_perf_qual ( Info *info) info->total_cache_accesses = total_cache_accesses; + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file, + "Decide which reorgTypes fail performance qualification\n"); + } // // Decide which reorgTypes fail performance qualification // @@ -2145,6 +2691,15 @@ reorg_perf_qual ( Info *info) // If the relative effect is small enough don't bother. if ( raw_effect < SINGLE_POOL_RAW_SKIP_IT ) { + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file, "Disqualified: "); + flexible_print ( stderr, reorgi->gcc_type, 0, (dump_flags_t)0); + fprintf ( info->reorg_dump_file, ": Very small effect: "); + fprintf ( info->reorg_dump_file, + "raw_effect %e < SINGLE_POOL_RAW_SKIP_IT %e\n", + raw_effect, SINGLE_POOL_RAW_SKIP_IT); + } reorgi->do_instance_interleave = false; continue; } @@ -2152,16 +2707,48 @@ reorg_perf_qual ( Info *info) // otherwise look at the absolute effect. if ( raw_effect >= SINGLE_POOL_RAW_DO_IT_ALWAYS ) { + + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file, "Qualified: "); + flexible_print ( stderr, reorgi->gcc_type, 0, (dump_flags_t)0); + fprintf ( info->reorg_dump_file, ": Large raw effect: "); + fprintf ( info->reorg_dump_file, + "raw_effect %e >= SINGLE_POOL_RAW_DO_IT_ALWAYS %e\n", + raw_effect, SINGLE_POOL_RAW_DO_IT_ALWAYS); + } + reorgi->do_instance_interleave = true; continue; } if ( absolute_effect < SINGLE_POOL_ABS_SKIP_IT ) { + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file, "Disqualified: "); + flexible_print ( stderr, reorgi->gcc_type, 0, (dump_flags_t)0); + fprintf ( info->reorg_dump_file, ": Very small absolute effect: "); + fprintf ( info->reorg_dump_file, + "absolute_effect %e < SINGLE_POOL_ABS_SKIP_IT %e\n", + absolute_effect, SINGLE_POOL_ABS_SKIP_IT); + } + reorgi->do_instance_interleave = false; continue; } if ( absolute_effect >= SINGLE_POOL_ABS_DO_IT_ALWAYS ) { + + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file, "Qualified: "); + flexible_print ( stderr, reorgi->gcc_type, 0, (dump_flags_t)0); + fprintf ( info->reorg_dump_file, ": Large absolute effect: "); + fprintf ( info->reorg_dump_file, + "absolute_effect %e >= SINGLE_POOL_ABS_DO_IT_ALWAYS %e\n", + absolute_effect, SINGLE_POOL_ABS_DO_IT_ALWAYS); + } + reorgi->do_instance_interleave = true; continue; } @@ -2172,13 +2759,219 @@ reorg_perf_qual ( Info *info) double cut_off = cut_off_eq_single_pool ( absolute_effect); if ( raw_effect < cut_off ) { + + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file, "Disqualified: "); + flexible_print ( stderr, reorgi->gcc_type, 0, (dump_flags_t)0); + fprintf ( info->reorg_dump_file, ": Failed cut off equations: "); + fprintf ( info->reorg_dump_file, + "raw_effect %e < cut_off %e\n", + raw_effect, cut_off); + } + reorgi->do_instance_interleave = false; - } + continue; + } + + if ( info->show_perf_qualify ) + { + fprintf ( info->reorg_dump_file, "Qualified: "); + flexible_print ( stderr, reorgi->gcc_type, 0, (dump_flags_t)0); + fprintf ( info->reorg_dump_file, ": Passed cut off equations"); + fprintf ( info->reorg_dump_file, + "raw_effect %e >= cut_off %e\n", + raw_effect, cut_off); + } + } + #endif +} - free_dominance_info ( CDI_DOMINATORS); +static void +print_var_info ( FILE *file, varInfo_t &vinfo) +{ + print_acc_info ( file, vinfo.rep_access ); + for ( auto fi = vinfo.fields.begin (); fi != vinfo.fields.end (); fi++ ) + { + if ( fi != vinfo.fields.begin () ) fprintf ( stderr,", "); + flexible_print ( stderr, *fi, 0, (dump_flags_t)0); + } + fprintf ( stderr,"\n"); +} - #endif +static void +print_var_infos ( FILE *file, std::vector<varInfo_t> &vinfo) +{ + fprintf( stderr, "print_var_infos:\n"); + for ( auto vi = vinfo.begin (); vi != vinfo.end (); vi++ ) + { + print_var_info ( file, *vi); + } +} + +static void +compress_acc_infos ( std::vector <acc_info_t> ainfo ) +{ + unsigned len = ainfo.size (); + if ( len == 1 ) return; + unsigned i, j; + for ( i = j = 1; j < len; j++ ) + { + ainfo[i] = ainfo[j]; + if ( !acc_eq ( ainfo[i], ainfo[i - 1]) ) i++; + } + if ( i == j ) return; + ainfo.resize ( len - (j -i)); +} + +static void +print_acc_info ( FILE *file, acc_info_t *ainfo ) +{ + fprintf ( file, "%s%s%s%s%s%s\n", + ainfo->base_info.a_def_def ? ", deflt_def" : "", + ainfo->base_info.a_decl ? ", decl" : "", + ainfo->base_info.a_func ? ", a_func" : "", + ainfo->base_info.has_induct_var_acc ? ", induct" : "", + ainfo->base_info.multi_induct ? ", multi" : "", + ainfo->base_info.complicated ? ", complicated" : ""); + fprintf ( file, " base var "); + flexible_print ( stderr, ainfo->base_info.acc_base, 0, (dump_flags_t)0); + if ( ainfo->base_info.has_induct_var_acc ) + { + fprintf ( file, ", induc var "); + flexible_print ( stderr, ainfo->base_info.acc_base, + 0, (dump_flags_t)0); + } + fprintf ( file, ", field "); + flexible_print ( stderr, ainfo->field, 0, (dump_flags_t)0); + if ( ainfo->reorg ) + { + fprintf ( file, ", reorg of "); + flexible_print ( stderr, ainfo->reorg->gcc_type, + 0, (dump_flags_t)0); + } + fprintf ( file, "\n"); +} + +static void +print_acc_infos ( FILE *file, std::vector <acc_info_t> ainfo ) +{ + fprintf ( file, "print_acc_infos:\n"); + unsigned i; + unsigned len = ainfo.size (); + + for ( i = 0; i < len; i++ ) + { + fprintf ( file, "[%d] ", i); + print_acc_info ( file, &ainfo[i]); + } +} + + +// decls < default defs < defined by function +static bool +acc_lt_acc_category ( const acc_info_t& a, const acc_info_t& b ) +{ + int ord_a = a.base_info.a_decl ? 1 : a.base_info.a_def_def ? 2 : 3; + int ord_b = b.base_info.a_decl ? 1 : b.base_info.a_def_def ? 2 : 3; + if ( ord_a < ord_b ) return true; + if ( ord_a > ord_b ) return false; + switch ( ord_a ) { + case 1: + // The field isn't there for decls, it's the index. Ignoring the + // index is harmless. This is becauseif if we take an arbitary + // small number of iterations of a loop, then all the permutations + // of the accesses in those iterations touch the same number of + // cache lines (just in a different order.) + return false; + case 2: + { + if ( a.access < b.access ) return true; + if ( a.access > b.access ) return false; + return a.field < b.field; + } + case 3: + { + if ( a.base_info.function < b.base_info.function ) return true; + if ( a.base_info.function > b.base_info.function ) return false; + return a.field < b.field; + } + } +} + +// Complicated is less than noncomplicated and null reorgs are less than non +// null reorgs. +static bool +acc_lt ( const acc_info_t& a, const acc_info_t& b ) +{ + if ( a.base_info.complicated && !b.base_info.complicated ) return true; + if ( !a.base_info.complicated && !b.base_info.complicated ) return false; + if ( a.reorg == NULL ) + { + if ( b.reorg != NULL ) return true; + // compare non_reorg_bit + return acc_lt_acc_category ( a, b); + } + else + { + if ( b.reorg == NULL ) return false; + if ( a.reorg < b.reorg ) return true; + if ( a.reorg > b.reorg ) return false; + // compare non_reorg_bit + return acc_lt_acc_category ( a, b); + } +} + +static bool +acc_eq ( const acc_info_t& a, const acc_info_t& b ) +{ + // nothing complicated is equal to anything else. Being complicated + // is basically saying that little is really know about it and it's + // difficult if not meaningless to analyze in depth given this + // framework. + if ( a.base_info.complicated || b.base_info.complicated ) return false; + if ( a.reorg != b.reorg ) return false; + int ord_a = a.base_info.a_decl ? 1 : a.base_info.a_def_def ? 2 : 3; + int ord_b = b.base_info.a_decl ? 1 : b.base_info.a_def_def ? 2 : 3; + if ( ord_a != ord_b ) return false; + switch ( ord_a ) { + case 1: + case 2: + { + if ( a.access != b.access ) return false; + } + case 3: + { + if ( a.base_info.function != b.base_info.function ) return false; + } + } + return a.field == b.field; +} + +static bool +all_but_field_eq ( const acc_info_t& a, const acc_info_t& b ) +{ + // nothing complicated is equal to anything else. Being complicated + // is basically saying that little is really know about it and it's + // difficult if not meaningless to analyze in depth given this + // framework. + if ( a.base_info.complicated || b.base_info.complicated ) return false; + if ( a.reorg != b.reorg ) return false; + int ord_a = a.base_info.a_decl ? 1 : a.base_info.a_def_def ? 2 : 3; + int ord_b = b.base_info.a_decl ? 1 : b.base_info.a_def_def ? 2 : 3; + if ( ord_a != ord_b ) return false; + switch ( ord_a ) { + case 1: + case 2: + { + return a.access == b.access; + } + case 3: + { + return a.base_info.function == b.base_info.function; + } + } } #define SINGLE_POOL_SLOPE \ @@ -2246,18 +3039,230 @@ is_array_access( tree acc) } static void -account_for_use( tree acc, std::vector <acc_info_t> *acc_info) +account_for_access ( tree access, tree field, std::vector <acc_info_t> *acc_info, Info_t *info) { - // determine element of access - // find field access number i - // find var v - varpool_node *v; - int i; - // TBD - acc_info_t ai = { v, i}; + DEBUG_A("account_for_use var: "); + DEBUG_F(flexible_print, stderr, access, 0, (dump_flags_t)0); + DEBUG(", field: "); + DEBUG_F(flexible_print, stderr, field, 1, (dump_flags_t)0); + + // assert might eventually make sense but not yet + //gcc_assert ( TREE_CODE ( ssa_var) == SSA_NAME); + acc_info_t ai; + //ai.v = SSA_NAME_VAR ( ssa_var); + ai.access = access; // TBD We need to see if we can find the decl + ai.field = field; + ai.reorg = tree_contains_a_reorgtype ( access, info); + analyze_access ( access, &ai); + // don't count this acces if there is no associated induction variable + if ( !ai.base_info.has_induct_var_acc ) return; + // Otherwise add the access acc_info->push_back( ai); } +static void +tmasn_helper ( tree t, int indent, std::set<tree> *already ) +{ + DEBUG_A(""); + fprintf( stderr, "%*s", indent, " "); + indent += 4; + flexible_print ( stderr, t, 0, (dump_flags_t)0); + if ( already->find (t) != already->end () ) + { + fprintf( stderr, " <Induction>\n"); + return; + } + else + { + fprintf( stderr, "\n"); + } + DEBUG_L("code: %s\n", code_str(TREE_CODE (t))); + if ( TREE_CODE (t) == SSA_NAME ) + { + already->insert (t); + gimple *stmt = SSA_NAME_DEF_STMT (t); + fprintf( stderr, "%*sSSA_NAME defined in: ", indent - 4, " "); + print_gimple_stmt( stderr, stmt, TDF_DETAILS); + if ( gimple_code ( stmt) == GIMPLE_PHI ) + { + gphi *phi_stmt = dyn_cast <gphi *> ( stmt); + for (int i = 0; i < gimple_phi_num_args (phi_stmt); i++) + { + tree *arg = gimple_phi_arg_def_ptr (phi_stmt, i); + tmasn_helper ( *arg, indent, already); + } + } + else + { + bool a_ass = gimple_code ( stmt) == GIMPLE_ASSIGN; + bool a_call = gimple_code ( stmt) == GIMPLE_CALL; + // This was being triggered an add: op = op + op + //gcc_assert ( a_ass || a_call ); + + if ( a_call ) + { + for ( int i = 0; i < gimple_call_num_args ( stmt); i++ ) + { + tmasn_helper ( gimple_call_arg ( stmt, i) , indent, already); + } + } + else + { + // Note, start with one to skip lhs op. + for ( int i = 1; i < gimple_num_ops ( stmt); i++ ) + { + tmasn_helper ( gimple_op ( stmt, i) , indent, already); + } + } + } + return; + } + if ( DECL_P ( t) ) + { + return; + } + if ( TREE_CODE ( t) == MEM_REF ) + { + tree t_0 = TREE_OPERAND ( t, 0); + fprintf( stderr, "%*sMEM_REF t_0: ", indent - 4, " "); + flexible_print ( stderr, t_0, 1, (dump_flags_t)0); + tmasn_helper ( t_0 , indent, already); + return; + } + if ( TREE_CODE ( t) == INTEGER_CST ) return; + fprintf ( stderr, "unanticipated TREE_CODE\n"); + gcc_assert ( 0); +} + +static void +tell_me_about_ssa_name ( tree ssa_name, int indent) +{ + fprintf(stderr,"about:\n"); + std::set<tree> already; + tmasn_helper ( ssa_name, indent, &already); +} + +static void +an_ac_helper ( tree t, int indent, std::set<tree> *already, acc_info_t *ainfo ) +{ + acc_base_info_t *binfo = &ainfo->base_info; + DEBUG_A("%*s", indent, " "); + indent += 4; + DEBUG_F( flexible_print, stderr, t, 0, (dump_flags_t)0); + if ( already->find (t) != already->end () ) + { + DEBUG(" <Induction>\n"); + binfo->multi_induct = + binfo->multi_induct || binfo->has_induct_var_acc; + binfo->induct_base = t; + binfo->has_induct_var_acc = true; + return; + } + else + { + DEBUG("\n"); + } + DEBUG_A("%*scode: %s\n", indent, " ", code_str(TREE_CODE (t))); + if ( TREE_CODE (t) == SSA_NAME ) + { + already->insert (t); + gimple *stmt = SSA_NAME_DEF_STMT (t); + DEBUG_A("%*sSSA_NAME defined in: ", indent, " "); + DEBUG_F(print_gimple_stmt, stderr, stmt, TDF_DETAILS); + if ( SSA_NAME_IS_DEFAULT_DEF ( t ) ) + { + binfo->acc_base = t; + binfo->complicated = + binfo->complicated || binfo->a_def_def || binfo->a_decl || binfo->a_func; + binfo->a_def_def = true; + } + if ( gimple_code ( stmt) == GIMPLE_PHI ) + { + gphi *phi_stmt = dyn_cast <gphi *> ( stmt); + for (int i = 0; i < gimple_phi_num_args (phi_stmt); i++) + { + tree *arg = gimple_phi_arg_def_ptr (phi_stmt, i); + an_ac_helper ( *arg, indent, already, ainfo); + } + } + else + { + bool a_ass = gimple_code ( stmt) == GIMPLE_ASSIGN; + bool a_call = gimple_code ( stmt) == GIMPLE_CALL; + // This was being triggered an add: op = op + op + //gcc_assert ( a_ass || a_call ); + + if ( a_call ) + { + binfo->acc_base = t; + binfo->complicated = + binfo->complicated || binfo->a_def_def || binfo->a_decl || binfo->a_func; + binfo->a_func = true; + binfo->function = stmt; + // Question, do we want to walk the call arguements??? + // Because how do the arguments effect the return value? + // It's basically unknow so we shouldn't walk the + // arguemets. + // + for ( int i = 0; i < gimple_call_num_args ( stmt); i++ ) + { + an_ac_helper ( gimple_call_arg ( stmt, i), indent, already, ainfo); + } + } + else + { + // Note, start with one to skip lhs op. + for ( int i = 1; i < gimple_num_ops ( stmt); i++ ) + { + an_ac_helper ( gimple_op ( stmt, i), indent, already, ainfo); + } + } + } + return; + } + if ( DECL_P ( t) ) + { + binfo->acc_base = t; + binfo->complicated = + binfo->complicated || binfo->a_def_def || binfo->a_decl || binfo->a_func; + binfo->a_decl = true; + + DEBUG_A("field (index) for a_decl: "); + DEBUG_F(flexible_print, stderr, ainfo->field, 1, (dump_flags_t)0); + + an_ac_helper ( ainfo->field, indent, already, ainfo); + return; + } + if ( TREE_CODE ( t) == MEM_REF ) + { + tree t_0 = TREE_OPERAND ( t, 0); + DEBUG_A("%*sMEM_REF t_0: ", indent, " "); + DEBUG_F(flexible_print, stderr, t_0, 1, (dump_flags_t)0); + an_ac_helper ( t_0 , indent, already, ainfo); + return; + } + if ( TREE_CODE ( t) == INTEGER_CST ) return; + fprintf ( stderr, "Unanticipated TREE_CODE\n"); + gcc_assert ( 0); +} + +static void +analyze_access ( tree access, acc_info_t *acc_info) +{ + acc_base_info_t *base_info = &acc_info->base_info; + DEBUG_A("analyze_access:\n"); + base_info->a_def_def = false; + base_info->a_decl = false; + base_info->a_func = false; + base_info->has_induct_var_acc = false; + base_info->multi_induct = false; + base_info->complicated = false; + base_info->acc_base = NULL; + base_info->induct_base = NULL; + std::set<tree> already; + an_ac_helper ( access, 4, &already, acc_info); +} + // create_new_types has to crawl "all" the // types, create new types and transform |