Projects
Mega:23.03
gcc
_service:tar_scm:0027-Autoprefetch-Support-auto...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0027-Autoprefetch-Support-auto-feedback-prefetch.patch of Package gcc
From 6b944bed1158d3454b1db27aeab4ec1f2b8e5866 Mon Sep 17 00:00:00 2001 From: huangxiaoquan <huangxiaoquan1@huawei.com> Date: Thu, 27 Jan 2022 18:24:53 +0800 Subject: [PATCH 27/28] [Autoprefetch] Support auto feedback prefetch 1.Add option -fprefetch-loop-arrays=[value]. 2.A prefetch distance analysis algorithm based on branch weight is proposed to improve the accuracy of prefetch distance. 3.Propose automatic feedback prefetching: use the cache-miss profile information to guide the insertion of prefetching instructions. --- gcc/auto-profile.c | 5 +- gcc/common.opt | 5 + gcc/opts.c | 7 + gcc/params.opt | 16 + gcc/tree-ssa-loop-prefetch.c | 735 ++++++++++++++++++++++++++++++++++- 5 files changed, 748 insertions(+), 20 deletions(-) diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c index e6164b91b..f221978fc 100644 --- a/gcc/auto-profile.c +++ b/gcc/auto-profile.c @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see #include "config.h" #define INCLUDE_MAP #define INCLUDE_SET +#define INCLUDE_ALGORITHM +#define INCLUDE_VECTOR #include "system.h" #include "coretypes.h" #include "backend.h" @@ -49,9 +51,6 @@ along with GCC; see the file COPYING3. If not see #include "auto-profile.h" #include "tree-pretty-print.h" #include "gimple-pretty-print.h" -#include <map> -#include <vector> -#include <algorithm> /* The following routines implements AutoFDO optimization. diff --git a/gcc/common.opt b/gcc/common.opt index 37cbbd8c0..9488bd90f 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2201,6 +2201,11 @@ fprefetch-loop-arrays Common Report Var(flag_prefetch_loop_arrays) Init(-1) Optimization Generate prefetch instructions, if available, for arrays in loops. +fprefetch-loop-arrays= +Common Joined RejectNegative UInteger Var(prefetch_level) Init(0) IntegerRange(0, 3) +Generate prefetch instructions, if available, for arrays in loops. The prefetch +level can control the optimize level to array prefetch. + fprofile Common Report Var(profile_flag) Enable basic program profiling code. diff --git a/gcc/opts.c b/gcc/opts.c index 7a39f618b..f49f5ee58 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1747,6 +1747,8 @@ set_cache_misses_profile_params (struct gcc_options *opts, struct gcc_options *opts_set) { SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1); + SET_OPTION_IF_UNSET (opts, opts_set, prefetch_level, 2); + SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 100); } /* -f{,no-}sanitize{,-recover}= suboptions. */ @@ -2645,6 +2647,11 @@ common_handle_option (struct gcc_options *opts, SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_bit_cp, value); break; + case OPT_fprefetch_loop_arrays_: + opts->x_prefetch_level = value; + opts->x_flag_prefetch_loop_arrays = true; + break; + case OPT_fpatchable_function_entry_: { char *patch_area_arg = xstrdup (arg); diff --git a/gcc/params.opt b/gcc/params.opt index 2db69cc87..9d1faa7ab 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -968,4 +968,20 @@ Bound on number of runtime checks inserted by the vectorizer's loop versioning f Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Param Optimization Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check. +-param=param-prefetch-func-topn= +Common Joined UInteger Var(param_prefetch_func_topn) Init(3) Param Optimization +TopN functions of cache miss counts to be analyzed in prefetching. + +-param=param-prefetch-ref-topn= +Common Joined UInteger Var(param_prefetch_ref_topn) Init(5) Param Optimization +TopN ref of cache miss counts to be analyzed in prefetching. + +-param=param-high-loop-execution-rate= +Common Joined UInteger Var(param_high_loop_execution_rate) Init(95) IntegerRange(0, 100) Param Optimization +High execution rate loops to be analyzed in prefetch (in%). + +-param=param-prefetch-func-counts-threshold= +Common Joined UInteger Var(param_prefetch_func_counts_threshold) Init(100) Param Optimization +Threshold functions of cache miss counts to be analyzed in prefetching. + ; This comment is to ensure we retain the blank line above. diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index d19ece641..3a5aef0fc 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -18,6 +18,9 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ #include "config.h" +#define INCLUDE_ALGORITHM +#define INCLUDE_MAP +#define INCLUDE_VECTOR #include "system.h" #include "coretypes.h" #include "backend.h" @@ -48,6 +51,11 @@ along with GCC; see the file COPYING3. If not see #include "tree-data-ref.h" #include "diagnostic-core.h" #include "dbgcnt.h" +#include "gimple-pretty-print.h" +#include "tree-cfg.h" +#include "auto-profile.h" +#include "cgraph.h" +#include "print-tree.h" /* This pass inserts prefetch instructions to optimize cache usage during accesses to arrays in loops. It processes loops sequentially and: @@ -253,6 +261,22 @@ struct mem_ref_group #define PREFETCH_MAX_MEM_REFS_PER_LOOP 200 #endif +#ifndef PREFETCH_FUNC_TOPN +#define PREFETCH_FUNC_TOPN param_prefetch_func_topn +#endif + +#ifndef PREFETCH_FUNC_COUNTS_THRESHOLD +#define PREFETCH_FUNC_COUNTS_THRESHOLD param_prefetch_func_counts_threshold +#endif + +#ifndef PREFETCH_REF_TOPN +#define PREFETCH_REF_TOPN param_prefetch_ref_topn +#endif + +#ifndef LOOP_EXECUTION_RATE +#define LOOP_EXECUTION_RATE param_high_loop_execution_rate +#endif + /* The memory reference. */ struct mem_ref @@ -279,6 +303,131 @@ struct mem_ref nontemporal one. */ }; +/* Probability information of basic blocks and branches. */ +struct bb_bp +{ + basic_block bb; + basic_block true_edge_bb; + basic_block false_edge_bb; + float true_edge_prob; + float false_edge_prob; + float bb_prob; +}; + +typedef struct bb_bp bb_bp; + +enum PREFETCH_MODE +{ + ORIGINAL_MODE=0, /* Original prefetch method. */ + REFINE_BB_AHEAD, + /* Prefetch distance algorithm for removing + irrelevant bb. */ + BRANCH_WEIGHTED_AHEAD, + /* Branch weighted prefetch + distance algorithm. */ + INDIRECT_MODE /* Indirect array prefetch mode. */ +}; + +typedef std::map <unsigned int, unsigned int> uid_rank_map; +typedef std::map <location_t, unsigned int> loc_rank_map; +typedef std::vector <std::pair<location_t, gcov_type> > loc_gcov_type_vec; +typedef std::map <location_t, std::vector<gimple *> > loc_gimple_vec_map; + +static loc_rank_map ref_rank; + +/* Callback function for event_count comparison. */ + +static bool +event_count_cmp (std::pair<unsigned int, gcov_type> &a, + std::pair<unsigned int, gcov_type> &b) +{ + return a.second > b.second; +} + +/* Prepared mappings from location to counts and from location + to stmt list. */ + +static void +prepare_loc_count_info (function *fun, loc_gcov_type_vec &ref_sorted, + loc_gimple_vec_map &loc_stmt, event_type event) +{ + basic_block bb = NULL; + gimple_stmt_iterator bsi; + gimple *stmt; + tree lhs = NULL_TREE; + tree rhs = NULL_TREE; + + FOR_EACH_BB_FN (bb, fun) + { + for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) + { + stmt = gsi_stmt (bsi); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + { + continue; + } + if (!gimple_vuse (stmt)) + { + continue; + } + lhs = gimple_assign_lhs (stmt); + rhs = gimple_assign_rhs1 (stmt); + if (REFERENCE_CLASS_P (rhs) || REFERENCE_CLASS_P (lhs)) + { + gcov_type loc_count = + event_get_loc_count (gimple_location (stmt), event); + if (loc_count > 0) + { + /* There may be multiple gimple correspond to the same + location. */ + if (loc_stmt.count (gimple_location (stmt)) == 0) + { + ref_sorted.push_back (std::make_pair (gimple_location (stmt), + loc_count)); + } + loc_stmt[gimple_location (stmt)].push_back (stmt); + } + } + } + } +} + +/* Sort references by event_count and dump loc count information after + sorting. */ + +static void +sort_ref_by_event_count (function *fun, event_type event) +{ + loc_gcov_type_vec ref_sorted; + loc_gimple_vec_map loc_stmt; + + prepare_loc_count_info (fun, ref_sorted, loc_stmt, event); + sort (ref_sorted.begin (), ref_sorted.end (), event_count_cmp); + + for (unsigned i = 0; i < ref_sorted.size (); ++i) + { + ref_rank[ref_sorted[i].first] = i + 1; + /* Print the stmt and count of the topn ref. */ + if (i < PREFETCH_REF_TOPN && dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "stmt: \n"); + for (unsigned j = 0; j < loc_stmt[ref_sorted[i].first].size (); + ++j) + { + print_gimple_stmt (dump_file, + loc_stmt[ref_sorted[i].first][j], 0); + } + gcov_type loc_count = + event_get_loc_count (ref_sorted[i].first, event); + fprintf (dump_file, "stmt loc %u counts is %lu: " + "rank %d in top %d, (candidate analysis)\n\n", + ref_sorted[i].first, loc_count, + ref_rank[ref_sorted[i].first], PREFETCH_REF_TOPN); + } + } + return; +} + /* Dumps information about memory reference */ static void dump_mem_details (FILE *file, tree base, tree step, @@ -479,6 +628,30 @@ idx_analyze_ref (tree base, tree *index, void *data) return true; } +/* Dumps information about ar_data structure. */ + +static void +dump_ar_data_details (FILE *file, tree ref, struct ar_data &ar_data) +{ + print_generic_expr (file, ref, TDF_SLIM); + fprintf (file, "\n"); + if (*(ar_data.step)) + { + fprintf (file, " step "); + if (cst_and_fits_in_hwi (*(ar_data.step))) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + int_cst_value (*(ar_data.step))); + else + print_generic_expr (file, *(ar_data.step), TDF_SLIM); + } + fprintf (file, "\n"); + if (*(ar_data.delta)) + { + fprintf (file, " delta " HOST_WIDE_INT_PRINT_DEC "\n", + *(ar_data.delta)); + } +} + /* Tries to express REF_P in shape &BASE + STEP * iter + DELTA, where DELTA and STEP are integer constants and iter is number of iterations of LOOP. The reference occurs in statement STMT. Strips nonaddressable component @@ -526,7 +699,17 @@ analyze_ref (class loop *loop, tree *ref_p, tree *base, ar_data.stmt = stmt; ar_data.step = step; ar_data.delta = delta; - return for_each_index (base, idx_analyze_ref, &ar_data); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + dump_ar_data_details (dump_file, ref, ar_data); + } + bool idx_flag = for_each_index (base, idx_analyze_ref, &ar_data); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "idx_flag = %d \n\n", idx_flag); + } + return idx_flag; } /* Record a memory reference REF to the list REFS. The reference occurs in @@ -601,6 +784,55 @@ gather_memory_references_ref (class loop *loop, struct mem_ref_group **refs, return true; } +/* Determine whether to collect the memory references based on the + ranking of ref cache miss counts. */ + +static bool +should_gather_memory_references (gimple *stmt) +{ + if (!(profile_exist (CACHE_MISSES))) + { + return true; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "stmt:"); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "\n"); + } + if (ref_rank.count (gimple_location (stmt)) == 0) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "stmt location no found, skip prefetch " + "analysis\n"); + } + return false; + } + gcov_type loc_count = event_get_loc_count (gimple_location (stmt), CACHE_MISSES); + if (ref_rank[gimple_location (stmt)] > PREFETCH_REF_TOPN) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "stmt loc %u counts is %lu:" + "rank %d exceed topn %d, skip prefetch " + "analysis\n", + gimple_location (stmt), loc_count, + ref_rank[gimple_location (stmt)], PREFETCH_REF_TOPN); + } + return false; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "stmt loc %u counts is %lu: rank %d in top %d," + "continue prefetch analysis\n", + gimple_location (stmt), loc_count, + ref_rank[gimple_location (stmt)], PREFETCH_REF_TOPN); + } + return true; +} + /* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to true if there are no other memory references inside the loop. */ @@ -626,6 +858,13 @@ gather_memory_references (class loop *loop, bool *no_other_refs, unsigned *ref_c if (bb->loop_father != loop) continue; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "==== the %dth loop bb body ====\n", i); + gimple_dump_bb (dump_file, bb, 0, dump_flags); + fprintf (dump_file, "\n"); + } + for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) { stmt = gsi_stmt (bsi); @@ -642,20 +881,31 @@ gather_memory_references (class loop *loop, bool *no_other_refs, unsigned *ref_c if (! gimple_vuse (stmt)) continue; + if (!should_gather_memory_references (stmt)) + continue; + lhs = gimple_assign_lhs (stmt); rhs = gimple_assign_rhs1 (stmt); if (REFERENCE_CLASS_P (rhs)) { - *no_other_refs &= gather_memory_references_ref (loop, &refs, - rhs, false, stmt); - *ref_count += 1; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "====> the %dth ref \n", *ref_count); + } + *no_other_refs &= gather_memory_references_ref (loop, &refs, rhs, + false, stmt); + *ref_count += 1; } if (REFERENCE_CLASS_P (lhs)) { - *no_other_refs &= gather_memory_references_ref (loop, &refs, - lhs, true, stmt); - *ref_count += 1; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "====> the %dth ref \n", *ref_count); + } + *no_other_refs &= gather_memory_references_ref (loop, &refs, lhs, + true, stmt); + *ref_count += 1; } } } @@ -1168,9 +1418,9 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead) bool nontemporal = ref->reuse_distance >= L2_CACHE_SIZE_BYTES; if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n", - nontemporal ? " nontemporal" : "", - ref->group->uid, ref->uid); + fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n", + nontemporal ? " nontemporal" : "", + ref->group->uid, ref->uid); bsi = gsi_for_stmt (ref->stmt); @@ -1875,6 +2125,306 @@ insn_to_prefetch_ratio_too_small_p (unsigned ninsns, unsigned prefetch_count, return false; } +/* Obtain the edge probability information of each basic block in the loop. */ + +static float +get_edge_prob (edge e) +{ + /* Limit the minimum probability value. */ + const float MINNUM_PROB = 0.00001f; + float fvalue = 1; + + profile_probability probability = e->probability; + if (probability.initialized_p ()) + { + fvalue = probability.to_reg_br_prob_base () / float (REG_BR_PROB_BASE); + if (fvalue < MINNUM_PROB && probability.to_reg_br_prob_base ()) + { + fvalue = MINNUM_PROB; + } + } + return fvalue; +} + + +/* Dump the bb information in a loop. */ + +static void +dump_loop_bb (struct loop *loop) +{ + basic_block *body = get_loop_body_in_dom_order (loop); + basic_block bb = NULL; + + for (unsigned i = 0; i < loop->num_nodes; i++) + { + bb = body[i]; + if (bb->loop_father != loop) + { + continue; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "===== the %dth loop bb body ======= \n", i); + gimple_dump_bb (dump_file, bb, 0, dump_flags); + fprintf (dump_file, "\n"); + } + } + free (body); +} + + +/* Obtain the branch probability information of each basic block + in the loop. */ + +static void +get_bb_branch_prob (hash_map <basic_block, bb_bp> &bb_branch_prob, + struct loop *loop) +{ + basic_block *body = get_loop_body (loop); + basic_block bb = NULL; + for (unsigned i = 0; i < loop->num_nodes; i++) + { + bb = body[i]; + if (bb->loop_father != loop) + { + continue; + } + bb_bp &branch_prob = bb_branch_prob.get_or_insert (bb); + branch_prob.bb = bb; + branch_prob.true_edge_bb = NULL; + branch_prob.false_edge_bb = NULL; + branch_prob.true_edge_prob = 0; + branch_prob.false_edge_prob = 0; + branch_prob.bb_prob = 0; + + gimple *stmt = last_stmt (bb); + if (stmt && gimple_code (stmt) == GIMPLE_COND) + { + if (EDGE_COUNT (bb->succs) != 2) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "The number of successful edges of bb" + "is abnormal\n"); + continue; + } + edge true_edge = NULL; + edge false_edge = NULL; + extract_true_false_edges_from_block (bb, &true_edge, &false_edge); + + /* If it is exiting bb, and the destination bb of the edge does not + belong to the current loop, the information of the edge is not + recorded. */ + if (true_edge->dest->loop_father == loop) + { + branch_prob.true_edge_bb = true_edge->dest; + branch_prob.true_edge_prob = get_edge_prob (true_edge); + } + if (false_edge->dest->loop_father == loop) + { + branch_prob.false_edge_bb = false_edge->dest; + branch_prob.false_edge_prob = get_edge_prob (false_edge); + } + } + + edge e = find_fallthru_edge (bb->succs); + if (e) + { + branch_prob.true_edge_bb = e->dest; + branch_prob.true_edge_prob = get_edge_prob (e); + } + } +} + +/* Traverse each bb in the loop and prune fake loops. */ + +static bool +traverse_prune_bb_branch (hash_map <basic_block, bb_bp> &bb_branch_prob, + int& max_path, hash_set <basic_block> &path_node, + basic_block current_bb, basic_block latch_bb) +{ + /* Limit the maximum number of analysis paths. */ + if (max_path <= 0 || current_bb == NULL) + return false; + + /* Do not join edges that do not form a complete loop. */ + bb_bp *bb_bp_node = bb_branch_prob.get (current_bb); + if (bb_bp_node == NULL || (bb_bp_node->true_edge_bb == NULL + && bb_bp_node->false_edge_bb == NULL)) + return false; + + if (current_bb == latch_bb) + { + max_path--; + return true; + } + + /* Do not join edges that return to non-dominate nodes. */ + if (path_node.contains (bb_bp_node->true_edge_bb) + || path_node.contains (bb_bp_node->false_edge_bb)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "fake loop: in bb%d\n", current_bb->index); + return false; + } + + path_node.add (current_bb); + if (bb_bp_node->true_edge_bb) + { + if (traverse_prune_bb_branch (bb_branch_prob, max_path, + path_node, bb_bp_node->true_edge_bb, latch_bb) == false) + return false; + } + if (bb_bp_node->false_edge_bb) + { + if (traverse_prune_bb_branch (bb_branch_prob, max_path, + path_node, bb_bp_node->false_edge_bb, latch_bb) == false) + return false; + } + path_node.remove (current_bb); + + max_path--; + return true; +} + +/* Traverse and calculate the probability of basic block. */ + +static void +traverse_calculate_bb_prob (hash_map <basic_block, bb_bp> &bb_branch_prob, + basic_block current_bb, basic_block latch_bb, + float prob) +{ + /* Limit bb block access probability, the probability is + less than 100% and include delta. */ + const float MAX_BB_PROBABILITY = 1.001f; + + if (current_bb == NULL) + { + return; + } + bb_bp *bb_bp_node = bb_branch_prob.get (current_bb); + bb_bp_node->bb_prob += prob; + + gcc_assert (bb_bp_node->bb_prob <= MAX_BB_PROBABILITY); + + if (bb_bp_node == NULL || (bb_bp_node->true_edge_bb == NULL + && bb_bp_node->false_edge_bb == NULL)) + { + return; + } + if (current_bb == latch_bb) + { + return; + } + + bool assign = (bb_bp_node->true_edge_bb && bb_bp_node->false_edge_bb); + if (bb_bp_node->true_edge_bb) + { + float assign_prob = assign ? bb_bp_node->true_edge_prob * prob : prob; + traverse_calculate_bb_prob (bb_branch_prob, + bb_bp_node->true_edge_bb, latch_bb, assign_prob); + } + if (bb_bp_node->false_edge_bb) + { + float assign_prob = assign ? bb_bp_node->false_edge_prob * prob : prob; + traverse_calculate_bb_prob (bb_branch_prob, + bb_bp_node->false_edge_bb, latch_bb, assign_prob); + } + return; +} + +/* Obtain the probability of basic block. */ + +static bool +get_bb_prob (hash_map <basic_block, bb_bp> &bb_branch_prob, struct loop *loop) +{ + /* The upper limit of the branch path in the loop is 10000. */ + const int MAX_BB_BRANCH_PATH = 10000; + + if (loop->header == NULL || loop->latch == NULL + || loop->header == loop->latch) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "get_bb_prob failed: without the header bb or " + "latch bb\n"); + return false; + } + + bb_bp *latch_branch_prob = bb_branch_prob.get (loop->latch); + bb_bp *header_branch_prob = bb_branch_prob.get (loop->header); + if (header_branch_prob == NULL || latch_branch_prob == NULL + || (latch_branch_prob->true_edge_bb != header_branch_prob->bb + && latch_branch_prob->false_edge_bb != header_branch_prob->bb)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "get_bb_prob failed: loop data exception\n"); + return false; + } + + hash_set <basic_block> path_node; + int max_path = MAX_BB_BRANCH_PATH; + if (traverse_prune_bb_branch (bb_branch_prob, max_path, path_node, + header_branch_prob->bb, loop->latch) == false) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "traverse_prune_bb_branch false.\n"); + return false; + } + traverse_calculate_bb_prob (bb_branch_prob, + header_branch_prob->bb, loop->latch, 1); + + return true; +} + +/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS. */ + +static unsigned +estimate_num_loop_insns (struct loop *loop, eni_weights *weights) +{ + basic_block *body = get_loop_body_in_dom_order (loop); + gimple_stmt_iterator gsi; + float size = 0; + basic_block bb = NULL; + hash_map <basic_block, bb_bp> bb_branch_prob; + + if (prefetch_level >= BRANCH_WEIGHTED_AHEAD) + { + get_bb_branch_prob (bb_branch_prob, loop); + if (get_bb_prob (bb_branch_prob, loop) == false) + { + dump_loop_bb (loop); + return 0; + } + } + + for (unsigned i = 0; i < loop->num_nodes; i++) + { + bb = body[i]; + /* For nested loops, the bb of the inner loop is not calculated. */ + if (bb->loop_father != loop) + { + continue; + } + + float size_tmp = 0; + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + size_tmp += estimate_num_insns (gsi_stmt (gsi), weights); + } + + if (prefetch_level >= BRANCH_WEIGHTED_AHEAD) + { + float bb_prob = bb_branch_prob.get (bb)->bb_prob; + size += size_tmp * bb_prob; + } + else + { + size += size_tmp; + } + } + free (body); + + return unsigned (size); +} /* Issue prefetch instructions for array references in LOOP. Returns true if the LOOP was unrolled. */ @@ -1899,7 +2449,15 @@ loop_prefetch_arrays (class loop *loop) /* FIXME: the time should be weighted by the probabilities of the blocks in the loop body. */ - time = tree_num_loop_insns (loop, &eni_time_weights); + + if (prefetch_level >= REFINE_BB_AHEAD) + { + time = estimate_num_loop_insns (loop, &eni_time_weights); + } + else + { + time = tree_num_loop_insns (loop, &eni_time_weights); + } if (time == 0) return false; @@ -1913,7 +2471,14 @@ loop_prefetch_arrays (class loop *loop) if (trip_count_to_ahead_ratio_too_small_p (ahead, est_niter)) return false; - ninsns = tree_num_loop_insns (loop, &eni_size_weights); + if (prefetch_level >= REFINE_BB_AHEAD) + { + ninsns = estimate_num_loop_insns (loop, &eni_size_weights); + } + else + { + ninsns = tree_num_loop_insns (loop, &eni_size_weights); + } /* Step 1: gather the memory references. */ refs = gather_memory_references (loop, &no_other_refs, &mem_ref_count); @@ -1978,10 +2543,49 @@ fail: return unrolled; } +/* Determine if it is a high execution rate loop. */ + +static bool +is_high_exec_rate_loop (struct loop *loop) +{ + vec<edge> exit_edges = get_loop_exit_edges (loop); + if (exit_edges == vNULL) + { + return false; + } + + unsigned i = 0; + gcov_type exit_count = 0; + edge e = NULL; + float loop_exec_rate = 0; + gcov_type header_bb_count = loop->header->count.to_gcov_type (); + FOR_EACH_VEC_ELT (exit_edges, i, e) + { + gcov_type exiting_bb_count = e->src->count.to_gcov_type (); + float exit_edge_prob = get_edge_prob (e); + exit_count += exit_edge_prob * exiting_bb_count; + + loop_exec_rate = 1.0 - ((double) exit_count / header_bb_count); + + if (loop_exec_rate < (float) LOOP_EXECUTION_RATE / 100.0) + { + return false; + } + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "loop with high execution rate: %f >= %f\n\n", + loop_exec_rate, (float) LOOP_EXECUTION_RATE / 100.0); + dump_loop_bb (loop); + } + return true; +} + /* Issue prefetch instructions for array references in loops. */ unsigned int -tree_ssa_prefetch_arrays (void) +tree_ssa_prefetch_arrays (function *fun) { class loop *loop; bool unrolled = false; @@ -2012,6 +2616,12 @@ tree_ssa_prefetch_arrays (void) param_min_insn_to_prefetch_ratio); fprintf (dump_file, " min insn-to-mem ratio: %d \n", param_prefetch_min_insn_to_mem_ratio); + fprintf (dump_file, " prefetch_func_topn: %d \n", + param_prefetch_func_topn); + fprintf (dump_file, " prefetch_ref_topn: %d \n", + param_prefetch_ref_topn); + fprintf (dump_file, " high_loop_execution_rate: %d \n", + LOOP_EXECUTION_RATE); fprintf (dump_file, "\n"); } @@ -2028,13 +2638,42 @@ tree_ssa_prefetch_arrays (void) set_builtin_decl (BUILT_IN_PREFETCH, decl, false); } - FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) + enum li_flags LI = LI_FROM_INNERMOST; + + if (profile_exist (CACHE_MISSES)) + { + LI = LI_ONLY_INNERMOST; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Processing model %d:\n", LI); + } + + if (profile_exist (CACHE_MISSES)) + { + sort_ref_by_event_count (fun, CACHE_MISSES); + } + + FOR_EACH_LOOP (loop, LI) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Processing loop %d:\n", loop->num); + { + fprintf (dump_file, "======================================\n"); + fprintf (dump_file, "Processing loop %d:\n", loop->num); + fprintf (dump_file, "======================================\n"); + flow_loop_dump (loop, dump_file, NULL, 1); + fprintf (dump_file, "\n\n"); + } - unrolled |= loop_prefetch_arrays (loop); + if (profile_exist (CACHE_MISSES)) + { + if (!is_high_exec_rate_loop (loop)) + { + continue; + } + } + unrolled |= loop_prefetch_arrays (loop); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\n\n"); } @@ -2049,6 +2688,56 @@ tree_ssa_prefetch_arrays (void) return todo_flags; } +/* Determine whether to analyze the function according to + the sorting of the function containing cache-miss counts. */ + +static bool +should_analyze_func_p (void) +{ + gcov_type decl_uid = DECL_UID (current_function_decl); + struct rank_info func_rank_info = + event_get_func_rank (decl_uid, CACHE_MISSES); + if (func_rank_info.total == 0) + { + return false; + } + gcov_type func_count = event_get_func_count (decl_uid, CACHE_MISSES); + if (func_count == 0) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "function uid %d cannot find profile data " + "and skip prefetch analysis\n", + decl_uid); + } + return false; + } + if (func_rank_info.rank > PREFETCH_FUNC_TOPN + || func_count < PREFETCH_FUNC_COUNTS_THRESHOLD) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "function uid %d total counts is %lu: " + "rank %d > topn %d, counts %lu < threshold %lu " + "skip prefetch analysis\n", + decl_uid, func_count, + func_rank_info.rank, PREFETCH_FUNC_TOPN, + func_count, PREFETCH_FUNC_COUNTS_THRESHOLD); + } + return false; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "function uid %d total counts is %lu: " + "rank %d in topn %d, counts %lu > threshold %lu " + "continue prefetch analysis\n", + decl_uid, func_count, + func_rank_info.rank, PREFETCH_FUNC_TOPN, + func_count, PREFETCH_FUNC_COUNTS_THRESHOLD); + } + return true; +} + /* Prefetching. */ namespace { @@ -2085,6 +2774,18 @@ pass_loop_prefetch::execute (function *fun) if (number_of_loops (fun) <= 1) return 0; + /* Filter only when combined with cache-miss. When the should_analyze_func_p + analysis fails (for example, the function without cache-miss count), + in order to ensure the accuracy of the prefetch analysis, the function + does not perform native prefetch processing. */ + if (profile_exist (CACHE_MISSES)) + { + if (!should_analyze_func_p ()) + { + return 0; + } + } + if ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) != 0) { static bool warned = false; @@ -2099,7 +2800,7 @@ pass_loop_prefetch::execute (function *fun) return 0; } - return tree_ssa_prefetch_arrays (); + return tree_ssa_prefetch_arrays (fun); } } // anon namespace -- 2.27.0.windows.1
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2