#include "config.h" #include "system.h" #include "coretypes.h" #include "backend.h" #include "tree.h" #include "gimple-expr.h" #include "predict.h" #include "alloc-pool.h" #include "tree-pass.h" #include "cgraph.h" #include "diagnostic.h" #include "fold-const.h" #include "gimple-fold.h" #include "symbol-summary.h" #include "tree-vrp.h" #include "ipa-prop.h" #include "tree-pretty-print.h" #include "tree-inline.h" #include "ipa-fnsummary.h" #include "ipa-utils.h" #include "tree-ssa-ccp.h" #include "stringpool.h" #include "attribs.h" #include "tree-ssa-alias.h" #include "tree-ssanames.h" #include "gimple.h" #include "cfg.h" #include "gimple-iterator.h" #include "gimple-ssa.h" #include "compare-types.h" #include "types-inlines.h" #include #include #include "collect-types.h" #include "name-types.h" //#define FUZZ_MODE 1 namespace type_playground { enum type_comparison_func_enum { EQ_POINTER = 0, EQ_IDENTIFIER, EQ_MAIN_VARIANT, EQ_CANONICAL, EQ_STRUCTURAL, EQ_CANONICAL_STRICT, EQ_COMPARE, EQ_END }; static unsigned const type_comparisons = EQ_END; static const char* names[type_comparisons] = { "EQ_POINTER", "EQ_IDENTIFIER", "EQ_MAIN_VARIANT", "EQ_CANONICAL", "EQ_CANONICAL_STRICT", "EQ_STRUCTURAL", "EQ_COMPARE" }; typedef bool (*type_comparison_func_t)(const_tree, const_tree); static type_comparison_func_t comparisons[type_comparisons] = { eq_pointer, eq_identifier, eq_main_variant, eq_canonical, eq_canonical_internal, eq_type_structural, eq_type_compare }; } static void collect_types_from_expr(const_tree expr, ptrset_t &types); inline void is_gimple_code(gimple *stmt, const enum gimple_code ex_code) { gcc_assert(stmt); const enum gimple_code ob_code = gimple_code(stmt); const bool succeeds = ex_code == ob_code; gcc_assert(succeeds); } inline void is_gimple_rhs_class(gimple *stmt, const enum gimple_rhs_class ex_class) { gcc_assert(stmt); is_gimple_code(stmt, GIMPLE_ASSIGN); const enum gimple_rhs_class ob_class = gimple_assign_rhs_class(stmt); const bool succeeds = ex_class == ob_class; gcc_assert(succeeds); } static void collect_types_from_op(const_tree expr, ptrset_t &types, unsigned n) { gcc_assert(expr); const_tree op_n = TREE_OPERAND(expr, n); gcc_assert(op_n); collect_types_from_expr(op_n, types); } static void collect_types_from_op0(const_tree expr, ptrset_t &types, const enum tree_code ex_code) { assert_is_type(expr, ex_code); collect_types_from_op(expr, types, 0); } static void collect_types_from_op1(const_tree expr, ptrset_t &types, const enum tree_code ex_code) { assert_is_type(expr, ex_code); collect_types_from_op(expr, types, 0); collect_types_from_op(expr, types, 1); } static void collect_types_from_addr_expr(const_tree expr, ptrset_t &types) { collect_types_from_op0(expr, types, ADDR_EXPR); } static void collect_types_from_component_ref(const_tree expr, ptrset_t &types) { collect_types_from_op1(expr, types, COMPONENT_REF); } static void collect_types_from_mem_ref(const_tree expr, ptrset_t &types) { collect_types_from_op1(expr, types, MEM_REF); } static void collect_types_from_array_ref(const_tree expr, ptrset_t &types) { collect_types_from_op1(expr, types, ARRAY_REF); } static void collect_types_from_leaf_expr(const_tree expr, ptrset_t &types, const enum tree_code ex_code) { assert_is_type(expr, ex_code); const_tree type = TREE_TYPE(expr); gcc_assert(type); collect_types(type, types); } static void collect_types_from_ssa_name(const_tree expr, ptrset_t &types) { collect_types_from_leaf_expr(expr, types, SSA_NAME); } static void collect_types_from_var_decl(const_tree expr, ptrset_t &types) { collect_types_from_leaf_expr(expr, types, VAR_DECL); } static void collect_types_from_field_decl(const_tree expr, ptrset_t &types) { collect_types_from_leaf_expr(expr, types, FIELD_DECL); } static void collect_types_from_integer_cst(const_tree expr, ptrset_t &types) { collect_types_from_leaf_expr(expr, types, INTEGER_CST); } static void collect_types_from_constructor_array(const_tree expr, ptrset_t &types) { assert_is_type(expr, CONSTRUCTOR); const_tree type = TREE_TYPE(expr); assert_is_type(type, ARRAY_TYPE); //TODO: Collect types from here #ifdef FUZZ_MODE gcc_unreachable(); #endif } static void collect_types_from_constructor_record_or_union(const_tree expr, ptrset_t &types) { assert_is_type(expr, CONSTRUCTOR); const_tree type = TREE_TYPE(expr); const enum tree_code code = TREE_CODE(type); const bool is_record = RECORD_TYPE == code; const bool is_union = UNION_TYPE == code; const bool is_qual_union = QUAL_UNION_TYPE == code; const bool is_valid_input = is_record || is_union || is_qual_union; gcc_assert(is_valid_input); //TODO: Collect types from here #ifdef FUZZ_MODE gcc_unreachable(); #endif } static void collect_types_from_constructor(const_tree expr, ptrset_t &types) { // https://gcc.gnu.org/onlinedocs/gccint/Unary-and-Binary-Expressions.html#Unary-and-Binary-Expressions // These nodes represent the brace-enclosed initializers for a structure or an array. // They contain a sequence of component values made out of a vector of constructor_elt, which is a (INDEX, VALUE) pair. // If the TREE_TYPE of the CONSTRUCTOR is a RECORD_TYPE, UNION_TYPE or QUAL_UNION_TYPE then // the INDEX of each node in the sequence will be a FIELD_DECL and the VALUE will be the expression used to initialize that field. // If the TREE_TYPE of the CONSTRUCTOR is an ARRAY_TYPE, then the INDEX of // each node in the sequence will be an INTEGER_CST or a RANGE_EXPR of // two INTEGER_CSTs. A single INTEGER_CST indicates which element of // the array is being assigned to. A RANGE_EXPR indicates an inclusive range // of elements to initialize. In both cases the VALUE is the // corresponding initializer. It is re-evaluated for each element of // a RANGE_EXPR. If the INDEX is NULL_TREE, then the initializer // is for the next available array element. assert_is_type(expr, CONSTRUCTOR); const_tree type = TREE_TYPE(expr); gcc_assert(type); const enum tree_code code = TREE_CODE(type); switch (code) { case RECORD_TYPE: case UNION_TYPE: case QUAL_UNION_TYPE: case ARRAY_TYPE: break; default: // TODO: I should fuzz this, I got a constructor of an integer type? #ifdef FUZZ_MODE gcc_unreachable(); #endif return; break; } const bool is_array = ARRAY_TYPE == code; is_array ? collect_types_from_constructor_array(expr, types) : collect_types_from_constructor_record_or_union(expr, types); } static void collect_types_from_parm_decl(const_tree expr, ptrset_t &types) { collect_types_from_leaf_expr(expr, types, PARM_DECL); } static void collect_types_from_real_cst(const_tree expr, ptrset_t &types) { collect_types_from_leaf_expr(expr, types, REAL_CST); } static void collect_types_from_string_cst(const_tree expr, ptrset_t &types) { collect_types_from_leaf_expr(expr, types, STRING_CST); } static void collect_types_from_bit_field_ref(const_tree expr, ptrset_t &types) { // TODO: How to collect types from bit_field_ref? #ifdef FUZZ_MODE gcc_unreachable(); #endif } static void collect_types_from_view_convert_expr(const_tree expr, ptrset_t &types) { // VIEW_CONVERT_EXPR is used to interpret the bit representation // of a register as a register of a different type. // It is unspecified if this is allowed to change the // register size. If disallowed this case needs to go // through an integer mode and an intermediate BIT_FIELD_REF. // https://gcc.gnu.org/wiki/MemRef //TODO: How to collect types from VIEW_CONVERT_EXPR? #ifdef FUZZ_MODE gcc_unreachable(); #endif } static void collect_types_from_result_decl(const_tree expr, ptrset_t &types) { collect_types_from_leaf_expr(expr, types, RESULT_DECL); } static void collect_types_from_expr(const_tree expr, ptrset_t &types) { // These are the codes I saw using csmith to fuzz. gcc_assert(expr); const_tree type = TREE_TYPE(expr); gcc_assert(type); collect_types(type, types); const enum tree_code code = TREE_CODE(expr); switch (code) { case ADDR_EXPR: collect_types_from_addr_expr(expr, types); break; case BIT_FIELD_REF: collect_types_from_bit_field_ref(expr, types); break; case ARRAY_REF: collect_types_from_array_ref(expr, types); break; case MEM_REF: collect_types_from_mem_ref(expr, types); break; case COMPONENT_REF: collect_types_from_component_ref(expr, types); break; case SSA_NAME: collect_types_from_ssa_name(expr, types); break; case VAR_DECL: collect_types_from_var_decl(expr, types); break; case FIELD_DECL: collect_types_from_field_decl(expr, types); break; case VIEW_CONVERT_EXPR: collect_types_from_view_convert_expr(expr, types); break; case INTEGER_CST: collect_types_from_integer_cst(expr, types); break; case CONSTRUCTOR: collect_types_from_constructor(expr, types); break; case RESULT_DECL: collect_types_from_result_decl(expr, types); break; case PARM_DECL: collect_types_from_parm_decl(expr, types); break; case REAL_CST: collect_types_from_real_cst(expr, types); break; case STRING_CST: collect_types_from_string_cst(expr, types); break; default: log("tree_code: %s\n", get_tree_code_name(code)); gcc_unreachable(); break; } } static void collect_types_from_stmt_assign_lhs(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_ASSIGN); const_tree lhs = gimple_assign_lhs(stmt); gcc_assert(lhs); collect_types_from_expr(lhs, types); } static void collect_types_from_stmt_assign_rhs3(gimple *stmt, ptrset_t &types) { is_gimple_rhs_class(stmt, GIMPLE_TERNARY_RHS); const_tree rhs = gimple_assign_rhs3(stmt); gcc_assert(rhs); collect_types_from_expr(rhs, types); } static void collect_types_from_stmt_assign_rhs2(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_ASSIGN); const enum gimple_rhs_class gclass = gimple_assign_rhs_class(stmt); const bool is_ternary = GIMPLE_TERNARY_RHS == gclass; const bool is_binary = GIMPLE_BINARY_RHS == gclass; const bool is_valid_input = is_ternary || is_binary; gcc_assert(is_valid_input); const_tree rhs = gimple_assign_rhs2(stmt); gcc_assert(rhs); collect_types_from_expr(rhs, types); } static void collect_types_from_stmt_assign_rhs1(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_ASSIGN); const enum gimple_rhs_class gclass = gimple_assign_rhs_class(stmt); const bool is_ternary = GIMPLE_TERNARY_RHS == gclass; const bool is_binary = GIMPLE_BINARY_RHS == gclass; const bool is_unary = GIMPLE_UNARY_RHS == gclass; const bool is_single = GIMPLE_SINGLE_RHS == gclass; const bool is_valid_input = is_ternary || is_binary || is_unary || is_single; gcc_assert(is_valid_input); const_tree rhs = gimple_assign_rhs1(stmt); gcc_assert(rhs); collect_types_from_expr(rhs, types); } static void collect_types_from_stmt_assign_rhs(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_ASSIGN); const enum gimple_rhs_class gclass = gimple_assign_rhs_class(stmt); switch (gclass) { case GIMPLE_TERNARY_RHS: collect_types_from_stmt_assign_rhs3(stmt, types); /* fall-through */ case GIMPLE_BINARY_RHS: collect_types_from_stmt_assign_rhs2(stmt, types); /* fall-through */ case GIMPLE_UNARY_RHS: case GIMPLE_SINGLE_RHS: collect_types_from_stmt_assign_rhs1(stmt, types); break; default: gcc_unreachable(); break; } } static void collect_types_from_stmt_assign(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_ASSIGN); collect_types_from_stmt_assign_lhs(stmt, types); collect_types_from_stmt_assign_rhs(stmt, types); } static void collect_types_from_stmt_call_lhs(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_CALL); const_tree lhs = gimple_call_lhs(stmt); if (!lhs) return; collect_types_from_expr(lhs, types); } static void collect_types_from_stmt_call_rhs(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_CALL); unsigned num_args = gimple_call_num_args(stmt); for (unsigned i = 0; i < num_args; i++) { const_tree arg_i = gimple_call_arg(stmt, i); collect_types_from_expr(arg_i, types); } } static void collect_types_from_stmt_call(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_CALL); collect_types_from_stmt_call_lhs(stmt, types); collect_types_from_stmt_call_rhs(stmt, types); } static void collect_types_from_stmt_cond_lhs(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_COND); const_tree lhs = gimple_cond_lhs(stmt); gcc_assert(lhs); collect_types_from_expr(lhs, types); } static void collect_types_from_stmt_cond_rhs(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_COND); const_tree rhs = gimple_cond_rhs(stmt); gcc_assert(rhs); collect_types_from_expr(rhs, types); } static void collect_types_from_stmt_cond(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_COND); collect_types_from_stmt_cond_lhs(stmt, types); collect_types_from_stmt_cond_rhs(stmt, types); } static void collect_types_from_stmt_return(gimple *stmt, ptrset_t &types) { is_gimple_code(stmt, GIMPLE_RETURN); const_tree retval = gimple_return_retval(stmt); gcc_assert(retval); collect_types_from_expr(retval, types); } static void collect_types_from_stmt(gimple *stmt, ptrset_t &types) { gcc_assert(stmt); const enum gimple_code code = gimple_code(stmt); switch (code) { case GIMPLE_ASSIGN: collect_types_from_stmt_assign(stmt, types); break; case GIMPLE_CALL: collect_types_from_stmt_call(stmt, types); break; case GIMPLE_COND: collect_types_from_stmt_cond(stmt, types); break; case GIMPLE_RETURN: collect_types_from_stmt_return(stmt, types); break; case GIMPLE_LABEL: case GIMPLE_PREDICT: #ifdef FUZZ_MODE gcc_unreachable(); #endif break; default: { const char* name = gimple_code_name[code]; log("gimple code name %s\n", name); gcc_unreachable(); } break; } } static void collect_types_from_cnode_decl(cgraph_node *cnode, ptrset_t &types) { gcc_assert(cnode); const_tree decl = cnode->decl; gcc_assert(decl); const_tree decl_type = TREE_TYPE(decl); gcc_assert(decl_type); function *func = DECL_STRUCT_FUNCTION (decl); gcc_assert(func); push_cfun(func); // This will collect return, arguments and decl_type itself collect_types(decl_type, types); pop_cfun(); } static void collect_types_from_cnode_locals(cgraph_node *cnode, ptrset_t &types) { gcc_assert(cnode); const_tree decl = cnode->decl; gcc_assert(decl); function *func = DECL_STRUCT_FUNCTION (decl); gcc_assert(func); int i = 0; tree var_decl = NULL; FOR_EACH_LOCAL_DECL(func, i, var_decl) { gcc_assert(var_decl); const_tree var_decl_type = TREE_TYPE(var_decl); collect_types(var_decl_type, types); } } static void collect_types_from_cnode_ssa_names(cgraph_node *cnode, ptrset_t &types) { gcc_assert(cnode); const_tree decl = cnode->decl; gcc_assert(decl); function *func = DECL_STRUCT_FUNCTION (decl); gcc_assert(func); size_t i = 0; tree ssa_name = NULL; push_cfun(func); FOR_EACH_SSA_NAME(i, ssa_name, cfun) { gcc_assert(ssa_name); const_tree ssa_name_type = TREE_TYPE(ssa_name); collect_types(ssa_name_type, types); } pop_cfun(); } static void collect_types_from_bb(basic_block bb, ptrset_t &types) { gcc_assert(bb); for (auto gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { gimple *stmt = gsi_stmt(gsi); gcc_assert(stmt); collect_types_from_stmt(stmt, types); } } static void collect_types_from_cnode_bb(cgraph_node *cnode, ptrset_t &types) { gcc_assert(cnode); cnode->get_untransformed_body(); tree decl = cnode->decl; gcc_assert(decl); function *func = DECL_STRUCT_FUNCTION(decl); gcc_assert(func); basic_block bb = NULL; push_cfun(func); FOR_EACH_BB_FN(bb, func) { gcc_assert(bb); collect_types_from_bb(bb, types); } pop_cfun(); } static void collect_types_from_global(varpool_node *vnode, ptrset_t &types) { gcc_assert(vnode); struct ipa_ref *ref = NULL; for (unsigned i = 0; vnode->iterate_referring(i, ref); i++) { tree decl = vnode->decl; gcc_assert(decl); collect_types_from_var_decl(decl, types); } } static void collect_types_from_globals(ptrset_t &types) { varpool_node *vnode = NULL; FOR_EACH_VARIABLE (vnode) { collect_types_from_global(vnode, types); } } static void collect_types_from_functions_with_gimple_body(cgraph_node *cnode, ptrset_t &types) { gcc_assert(cnode); collect_types_from_cnode_decl(cnode, types); collect_types_from_cnode_locals(cnode, types); collect_types_from_cnode_ssa_names(cnode, types); collect_types_from_cnode_bb(cnode, types); } static void print_types_in_set(ptrset_t &types) { for (auto it = types.points_to_record.cbegin(); it != types.points_to_record.cend(); ++it) { const_tree type = *it; std::string name = type_to_string(type); log("name: %s\n", name.c_str()); } } static const bool filter_comparisons_functions_char(const char* function_name) { // Everything should run if (!flag_tp_comparison_functions) return true; const size_t buffer_size = 1024; const size_t cli_length = strlen(flag_tp_comparison_functions); gcc_assert(buffer_size > cli_length); char whitelist[buffer_size]; strcpy(whitelist, flag_tp_comparison_functions); char* saveptr = whitelist; char* token = NULL; while (token = strtok_r(saveptr, ",", &saveptr)) { const bool name_allowed = strcmp(token, function_name) == 0; if (name_allowed) return true; } return false; } static const bool filter_comparisons_functions_int(unsigned function_id) { if (!flag_tp_comparison_functions) return true; const char* function_name = type_playground::names[function_id]; return filter_comparisons_functions_char(function_name); } static const bool filter_comparisons_functions(type_playground::type_comparison_func_t func) { if (!flag_tp_comparison_functions) return true; for (unsigned i = 0; i < type_playground::type_comparisons; i++) { if (func != type_playground::comparisons[i]) continue; return filter_comparisons_functions_int(i); } return false; } static void compare_types_in_set(ptrset_t &types) { for (auto i = types.points_to_record.cbegin(); i != types.points_to_record.cend(); ++i) { for (auto j = types.points_to_record.cbegin(); j != types.points_to_record.cend(); ++j) { const_tree a = *i; const_tree b = *j; log("%s x %s = ", type_to_string(a).c_str(), type_to_string(b).c_str()); for (unsigned k = 0; k < type_playground::type_comparisons; k++) { type_playground::type_comparison_func_t comparison = type_playground::comparisons[k]; const bool allowed_to_run = filter_comparisons_functions(comparison); if (!allowed_to_run) continue; const bool result = comparison(a, b); log("%s, ", result ? "t" : "f"); } log("\n"); } } } static void filter_out_types_in_set(ptrset_t &types) { // compare everything if (!flag_tp_types_compared) return; const size_t buffer_size = 1024; const size_t cli_length = strlen(flag_tp_comparison_functions); gcc_assert(buffer_size > cli_length); char whitelist[buffer_size]; strcpy(whitelist, flag_tp_types_compared); bool name_allowed = false; for (auto it = types.points_to_record.cbegin(); it != types.points_to_record.cend(); name_allowed ? ++it : it = types.points_to_record.erase(it)) { const_tree type = *it; std::string observed_name = get_type_identifier(type); char* saveptr = whitelist; char* expected_name = NULL; name_allowed = false; while (expected_name = strtok_r(saveptr, ",", &saveptr)) { name_allowed |= strcmp(expected_name, observed_name.c_str()) == 0; } } } static void collect_types(ptrset_t &types) { collect_types_from_globals(types); cgraph_node *node = NULL; FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node) { node->get_untransformed_body(); collect_types_from_functions_with_gimple_body(node, types); // We still need to collect types from // the function signatures of functions without gimple bodies... } } static unsigned int iphw_execute() { //test_type_equality::run_tests(); //test_naming_types::run_tests(); ptrset_t types; collect_types(types); filter_out_types_in_set(types); compare_types_in_set(types); return 0; } namespace { const pass_data pass_data_ipa_escape_analysis = { SIMPLE_IPA_PASS, "escape-analysis", OPTGROUP_NONE, TV_NONE, (PROP_cfg | PROP_ssa), 0, 0, TODO_rebuild_alias, 0, }; class pass_ipa_escape_analysis : public simple_ipa_opt_pass { public: pass_ipa_escape_analysis (gcc::context *ctx) : simple_ipa_opt_pass(pass_data_ipa_escape_analysis, ctx) {} virtual bool gate(function*) { return flag_ipa_escape_analysis; } virtual unsigned execute (function*) { return iphw_execute(); } }; } // anon namespace simple_ipa_opt_pass* make_pass_ipa_escape_analysis (gcc::context *ctx) { return new pass_ipa_escape_analysis (ctx); }