Projects
Mega:23.03
gcc
_service:tar_scm:0078-Loop-distribution-Add-iso...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch of Package gcc
From d334ec1579fb0668da5e23ced3b782d7f6f35d77 Mon Sep 17 00:00:00 2001 From: benniaobufeijiushiji <linda7@huawei.com> Date: Mon, 17 Oct 2022 17:21:57 +0800 Subject: [PATCH 30/35] [Loop-distribution] Add isomorphic stmts analysis Use option -ftree-slp-transpose-vectorize Check if loop is vectorizable before analysis. For unvectorizable loops, try to find isomorphic stmts from grouped load as new seed stmts for distribution. --- gcc/tree-loop-distribution.c | 858 +++++++++++++++++++++++++++++++++++ gcc/tree-vect-loop.c | 37 +- gcc/tree-vectorizer.h | 3 +- 3 files changed, 894 insertions(+), 4 deletions(-) diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 888af4894..c08af6562 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -90,6 +90,8 @@ along with GCC; see the file COPYING3. If not see data reuse. */ #include "config.h" +#define INCLUDE_MAP +#define INCLUDE_ALGORITHM #include "system.h" #include "coretypes.h" #include "backend.h" @@ -115,6 +117,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-vectorizer.h" #include "tree-eh.h" #include "gimple-fold.h" +#include "optabs-tree.h" #define MAX_DATAREFS_NUM \ @@ -183,6 +186,52 @@ struct rdg_vertex #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) +/* Results of isomorphic group analysis. */ +#define UNINITIALIZED (0) +#define ISOMORPHIC (1) +#define HETEROGENEOUS (1 << 1) +#define UNCERTAIN (1 << 2) + +/* Information of a stmt while analyzing isomorphic use in group. */ + +typedef struct _group_info +{ + gimple *stmt; + + /* True if stmt can be a cut point. */ + bool cut_point; + + /* For use_stmt with two rhses, one of which is the lhs of stmt. + If the other is unknown to be isomorphic, mark it uncertain. */ + bool uncertain; + + /* Searching of isomorphic stmt reaches heterogeneous groups or reaches + MEM stmts. */ + bool done; + + _group_info () + { + stmt = NULL; + cut_point = false; + uncertain = false; + done = false; + } +} *group_info; + +/* PAIR of cut points and corresponding profit. */ +typedef std::pair<vec<gimple *> *, int> stmts_profit; + +/* MAP of vector factor VF and corresponding stmts_profit PAIR. */ +typedef std::map<unsigned, stmts_profit> vf_stmts_profit_map; + +/* PAIR of group_num and iteration_num. We consider rhses from the same + group and interation are isomorphic. */ +typedef std::pair<unsigned, unsigned> group_iteration; + +/* An isomorphic stmt is detetmined by lhs of use_stmt, group_num and + the iteration_num when we insert this stmt to this map. */ +typedef std::map<tree, group_iteration> isomer_stmt_lhs; + /* Data dependence type. */ enum rdg_dep_type @@ -640,6 +689,18 @@ class loop_distribution void finalize_partitions (class loop *loop, vec<struct partition *> *partitions, vec<ddr_p> *alias_ddrs); + /* Analyze loop form and if it's vectorizable to decide if we need to + insert temp arrays to distribute it. */ + bool may_insert_temp_arrays (loop_p loop, struct graph *&rdg, + control_dependences *cd); + + /* Reset gimple_uid of GIMPLE_DEBUG and GIMPLE_LABEL to -1. */ + void reset_gimple_uid (loop_p loop); + + bool check_loop_vectorizable (loop_p loop); + + inline void rebuild_rdg (loop_p loop, struct graph *&rdg, + control_dependences *cd); /* Distributes the code from LOOP in such a way that producer statements are placed before consumer statements. Tries to separate only the statements from STMTS into separate loops. Returns the number of @@ -2900,6 +2961,803 @@ loop_distribution::finalize_partitions (class loop *loop, fuse_memset_builtins (partitions); } +/* Gimple uids of GIMPLE_DEBUG and GIMPLE_LABEL were changed during function + vect_analyze_loop, reset them to -1. */ + +void +loop_distribution::reset_gimple_uid (loop_p loop) +{ + basic_block *bbs = get_loop_body_in_custom_order (loop, this, + bb_top_order_cmp_r); + for (int i = 0; i < int (loop->num_nodes); i++) + { + basic_block bb = bbs[i]; + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_debug (stmt) || gimple_code (stmt) == GIMPLE_LABEL) + gimple_set_uid (stmt, -1); + } + } + free (bbs); +} + +bool +loop_distribution::check_loop_vectorizable (loop_p loop) +{ + vec_info_shared shared; + vect_analyze_loop (loop, &shared, true); + loop_vec_info vinfo = loop_vec_info_for_loop (loop); + reset_gimple_uid (loop); + if (vinfo == NULL) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Loop %d no temp array insertion: bad data access pattern," + " unable to generate loop_vinfo.\n", loop->num); + return false; + } + if (vinfo->vectorizable) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d no temp array insertion: original loop" + " can be vectorized without distribution.\n", + loop->num); + delete vinfo; + loop->aux = NULL; + return false; + } + if (vinfo->grouped_loads.length () == 0) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d no temp array insertion: original loop" + " has no grouped loads.\n" , loop->num); + delete vinfo; + loop->aux = NULL; + return false; + } + return true; +} + +inline void +loop_distribution::rebuild_rdg (loop_p loop, struct graph *&rdg, + control_dependences *cd) +{ + free_rdg (rdg); + rdg = build_rdg (loop, cd); + gcc_checking_assert (rdg != NULL); +} + +bool +loop_distribution::may_insert_temp_arrays (loop_p loop, struct graph *&rdg, + control_dependences *cd) +{ + if (!(flag_tree_slp_transpose_vectorize && flag_tree_loop_vectorize)) + return false; + + /* Only loops with two basic blocks HEADER and LATCH are supported. HEADER + is the main body of a LOOP and LATCH is the basic block that controls the + LOOP execution. Size of temp array is determined by loop execution time, + so it must be a const. */ + tree loop_extent = number_of_latch_executions (loop); + if (loop->inner != NULL || loop->num_nodes > 2 + || rdg->n_vertices > param_slp_max_insns_in_bb + || TREE_CODE (loop_extent) != INTEGER_CST) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d: no temp array insertion: bad loop" + " form.\n", loop->num); + return false; + } + + if (loop->dont_vectorize) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d: no temp array insertion: this loop" + " should never be vectorized.\n", + loop->num); + return false; + } + + /* Do not distribute a LOOP that is able to be vectorized without + distribution. */ + if (!check_loop_vectorizable (loop)) + { + rebuild_rdg (loop, rdg, cd); + return false; + } + + rebuild_rdg (loop, rdg, cd); + return true; +} + +/* Return max grouped loads' length if all groupes length satisfy len = 2 ^ n. + Otherwise, return 0. */ + +static unsigned +get_max_vf (loop_vec_info vinfo) +{ + unsigned size = 0; + unsigned max = 0; + stmt_vec_info stmt_info; + unsigned i = 0; + FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info) + { + size = stmt_info->size; + if (!pow2p_hwi (size)) + return 0; + max = size > max ? size : max; + } + return max; +} + +/* Convert grouped_loads from linked list to vector with length vf. Init + group_info of each stmt in the same group and put then into a vector. And + these vectors consist WORKLISTS. We will re-analyze a group if it is + uncertain, so we regard WORKLISTS as a circular queue. */ + +static unsigned +build_queue (loop_vec_info vinfo, unsigned vf, + vec<vec<group_info> *> &worklists) +{ + stmt_vec_info stmt_info; + unsigned i = 0; + group_info ginfo = NULL; + vec<group_info> *worklist = NULL; + FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info) + { + unsigned group_size = stmt_info->size; + stmt_vec_info c_stmt_info = stmt_info; + while (group_size >= vf) + { + vec_alloc (worklist, vf); + for (unsigned j = 0; j < vf; ++j) + { + ginfo = new _group_info (); + ginfo->stmt = c_stmt_info->stmt; + worklist->safe_push (ginfo); + c_stmt_info = c_stmt_info->next_element; + } + worklists.safe_push (worklist); + group_size -= vf; + } + } + return worklists.length (); +} + +static bool +check_same_oprand_type (tree op1, tree op2) +{ + tree type1 = TREE_TYPE (op1); + tree type2 = TREE_TYPE (op2); + if (TREE_CODE (type1) != INTEGER_TYPE && TREE_CODE (type1) != REAL_TYPE) + return false; + + return (TREE_CODE (type1) == TREE_CODE (type2) + && TYPE_UNSIGNED (type1) == TYPE_UNSIGNED (type2) + && TYPE_PRECISION (type1) == TYPE_PRECISION (type2)); +} + +static bool +bit_field_p (gimple *stmt) +{ + unsigned i = 0; + auto_vec<data_reference_p, 2> datarefs_vec; + data_reference_p dr; + if (!find_data_references_in_stmt (NULL, stmt, &datarefs_vec)) + return true; + + FOR_EACH_VEC_ELT (datarefs_vec, i, dr) + { + if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF + && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) + return true; + } + return false; +} + +static inline bool +shift_operation (enum tree_code op) +{ + return op == LSHIFT_EXPR || op == RSHIFT_EXPR || op == LROTATE_EXPR + || op == RROTATE_EXPR; +} + +/* Return relationship between USE_STMT and the first use_stmt of the group. + RHS1 is the lhs of stmt recorded in group_info. If another rhs of use_stmt + is not a constant, return UNCERTAIN and re-check it later. */ + +static unsigned +check_isomorphic (gimple *use_stmt, gimple *first, + tree rhs1, vec<tree> &hetero_lhs) +{ + /* Check same operation. */ + enum tree_code rhs_code_first = gimple_assign_rhs_code (first); + enum tree_code rhs_code_current = gimple_assign_rhs_code (use_stmt); + if (rhs_code_first != rhs_code_current) + return HETEROGENEOUS; + + /* For shift operations, oprands should be equal. */ + if (shift_operation (rhs_code_current)) + { + tree shift_op_first = gimple_assign_rhs2 (first); + tree shift_op_current = gimple_assign_rhs2 (use_stmt); + if (!operand_equal_p (shift_op_first, shift_op_current, 0) + || !TREE_CONSTANT (shift_op_first)) + return HETEROGENEOUS; + + return ISOMORPHIC; + } + /* Type convertion expr or assignment. */ + if (gimple_num_ops (first) == 2) + return (rhs_code_first == NOP_EXPR || rhs_code_first == CONVERT_EXPR + || rhs_code_first == SSA_NAME) ? ISOMORPHIC : HETEROGENEOUS; + + /* We find USE_STMT from lhs of a stmt, denote it as rhs1 of USE_STMT and + the other one as rhs2. Check if define-stmt of current rhs2 is isomorphic + with define-stmt of rhs2 in the first USE_STMT at this group. */ + tree rhs2_first = gimple_assign_rhs1 (use_stmt) == rhs1 + ? gimple_assign_rhs2 (first) : gimple_assign_rhs1 (first); + tree rhs2_curr = gimple_assign_rhs1 (use_stmt) == rhs1 + ? gimple_assign_rhs2 (use_stmt) : gimple_assign_rhs1 (use_stmt); + + if (check_same_oprand_type (rhs2_first, rhs2_curr)) + { + if (TREE_CONSTANT (rhs2_curr)) + return ISOMORPHIC; + else if (hetero_lhs.contains (rhs2_curr)) + return HETEROGENEOUS; + + /* Provisionally set the stmt as uncertain and analyze the whole group + in function CHECK_UNCERTAIN later if all use_stmts are uncertain. */ + return UNCERTAIN; + } + return HETEROGENEOUS; +} + +static bool +unsupported_operations (gimple *stmt) +{ + enum tree_code code = gimple_assign_rhs_code (stmt); + return code == COND_EXPR; +} + +/* Check if the single use_stmt of STMT is isomorphic with the first one's + use_stmt in current group. */ + +static unsigned +check_use_stmt (group_info elmt, gimple *&first, + vec<gimple *> &tmp_stmts, vec<tree> &hetero_lhs) +{ + if (gimple_code (elmt->stmt) != GIMPLE_ASSIGN) + return HETEROGENEOUS; + use_operand_p dummy; + tree lhs = gimple_assign_lhs (elmt->stmt); + gimple *use_stmt = NULL; + single_imm_use (lhs, &dummy, &use_stmt); + /* STMTs with three rhs are not supported, e.g., GIMPLE_COND. */ + if (use_stmt == NULL || gimple_code (use_stmt) != GIMPLE_ASSIGN + || unsupported_operations (use_stmt) || bit_field_p (use_stmt)) + return HETEROGENEOUS; + tmp_stmts.safe_push (use_stmt); + if (first == NULL) + { + first = use_stmt; + return UNINITIALIZED; + } + /* Check if current use_stmt and the first menber's use_stmt in the group + are of the same type. */ + tree first_lhs = gimple_assign_lhs (first); + tree curr_lhs = gimple_assign_lhs (use_stmt); + if (!check_same_oprand_type (first_lhs, curr_lhs)) + return HETEROGENEOUS; + return check_isomorphic (use_stmt, first, lhs, hetero_lhs); +} + +/* Replace stmt field in group with stmts in TMP_STMTS, and insert their + lhs_info to ISOMER_LHS. */ + +static void +update_isomer_lhs (vec<group_info> *group, unsigned group_num, + unsigned iteration, isomer_stmt_lhs &isomer_lhs, + vec<gimple *> tmp_stmts, int &profit, + vec<unsigned> &merged_groups) +{ + group_info elmt = NULL; + /* Do not insert temp array if isomorphic stmts from grouped load have + only casting operations. Once isomorphic calculation has 3 oprands, + such as plus operation, this group can be regarded as cut point. */ + bool operated = (gimple_num_ops (tmp_stmts[0]) == 3); + /* Do not insert temp arrays if search of iosomophic stmts reaches + MEM stmts. */ + bool has_vdef = gimple_vdef (tmp_stmts[0]) != NULL; + bool merge = false; + for (unsigned i = 0; i < group->length (); i++) + { + elmt = (*group)[i]; + elmt->stmt = has_vdef ? NULL : tmp_stmts[i]; + elmt->cut_point = has_vdef ? false : (elmt->cut_point || operated); + elmt->uncertain = false; + elmt->done = has_vdef; + tree lhs = gimple_assign_lhs (tmp_stmts[i]); + if (isomer_lhs.find (lhs) != isomer_lhs.end ()) + { + merge = true; + continue; + } + isomer_lhs[lhs] = std::make_pair (group_num, iteration); + } + if (merge) + { + merged_groups.safe_push (group_num); + profit = 0; + return; + } + enum vect_cost_for_stmt kind = scalar_stmt; + int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); + profit = (tmp_stmts.length () - 1) * scalar_cost; +} + +/* Try to find rhs2 in ISOMER_LHS, if all rhs2 were found and their group_num + and iteration are same, GROUP is isomorphic. */ + +static unsigned +check_isomorphic_rhs (vec<group_info> *group, vec<gimple *> &tmp_stmts, + isomer_stmt_lhs &isomer_lhs) +{ + group_info elmt = NULL; + gimple *stmt = NULL; + unsigned j = 0; + unsigned group_num = -1u; + unsigned iteration = -1u; + tree rhs1 = NULL; + tree rhs2 = NULL; + unsigned status = UNINITIALIZED; + FOR_EACH_VEC_ELT (*group, j, elmt) + { + rhs1 = gimple_assign_lhs (elmt->stmt); + stmt = tmp_stmts[j]; + rhs2 = (rhs1 == gimple_assign_rhs1 (stmt)) + ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt); + isomer_stmt_lhs::iterator iter = isomer_lhs.find (rhs2); + if (iter != isomer_lhs.end ()) + { + if (group_num == -1u) + { + group_num = iter->second.first; + iteration = iter->second.second; + status |= ISOMORPHIC; + continue; + } + if (iter->second.first == group_num + && iter->second.second == iteration) + { + status |= ISOMORPHIC; + continue; + } + return HETEROGENEOUS; + } + else + status |= UNCERTAIN; + } + return status; +} + +/* Update group_info for uncertain groups. */ + +static void +update_uncertain_stmts (vec<group_info> *group, unsigned group_num, + unsigned iteration, vec<gimple *> &tmp_stmts) +{ + unsigned j = 0; + group_info elmt = NULL; + FOR_EACH_VEC_ELT (*group, j, elmt) + { + elmt->uncertain = true; + elmt->done = false; + } +} + +/* Push stmts in TMP_STMTS into HETERO_LHS. */ + +static void +set_hetero (vec<group_info> *group, vec<tree> &hetero_lhs, + vec<gimple *> &tmp_stmts) +{ + group_info elmt = NULL; + unsigned i = 0; + for (i = 0; i < group->length (); i++) + { + elmt = (*group)[i]; + elmt->uncertain = false; + elmt->done = true; + } + gimple *stmt = NULL; + FOR_EACH_VEC_ELT (tmp_stmts, i, stmt) + if (stmt != NULL) + hetero_lhs.safe_push (gimple_assign_lhs (stmt)); +} + +/* Given an uncertain group, TMP_STMTS are use_stmts of stmts in GROUP. + Rhs1 is the lhs of stmt in GROUP, rhs2 is the other rhs of USE_STMT. + + Try to find rhs2 in ISOMER_LHS, if all found rhs2 have same group_num + and iteration, this uncertain group is isomorphic. + + If no rhs matched, this GROUP remains uncertain and update group_info. + + Otherwise, this GROUP is heterogeneous and return true to end analysis + for this group. */ + +static bool +check_uncertain (vec<group_info> *group, unsigned group_num, + unsigned iteration, int &profit, + vec<gimple *> &tmp_stmts, isomer_stmt_lhs &isomer_lhs, + vec<tree> &hetero_lhs, vec<unsigned> &merged_groups) +{ + unsigned status = check_isomorphic_rhs (group, tmp_stmts, isomer_lhs); + bool done = false; + switch (status) + { + case UNCERTAIN: + update_uncertain_stmts (group, group_num, iteration, tmp_stmts); + break; + case ISOMORPHIC: + update_isomer_lhs (group, group_num, iteration, isomer_lhs, + tmp_stmts, profit, merged_groups); + break; + default: + set_hetero (group, hetero_lhs, tmp_stmts); + done = true; + } + return done; +} + +/* Return false if analysis of this group is not finished, e.g., isomorphic or + uncertain. Calculate the profit if vectorized. */ + +static bool +check_group (vec<group_info> *group, unsigned group_num, unsigned iteration, + int &profit, vec<unsigned> &merged_groups, + isomer_stmt_lhs &isomer_lhs, vec<tree> &hetero_lhs) +{ + unsigned j = 0; + group_info elmt = NULL; + gimple *first = NULL; + unsigned res = 0; + /* Record single use stmts in TMP_STMTS and decide whether replace stmts in + ginfo in succeeding processes. */ + auto_vec<gimple *> tmp_stmts; + FOR_EACH_VEC_ELT (*group, j, elmt) + { + if (merged_groups.contains (group_num)) + return true; + res |= check_use_stmt (elmt, first, tmp_stmts, hetero_lhs); + } + + /* Update each group member according to RES. */ + switch (res) + { + case ISOMORPHIC: + update_isomer_lhs (group, group_num, iteration, isomer_lhs, + tmp_stmts, profit, merged_groups); + return false; + case UNCERTAIN: + return check_uncertain (group, group_num, iteration, profit, + tmp_stmts, isomer_lhs, hetero_lhs, + merged_groups); + default: + set_hetero (group, hetero_lhs, tmp_stmts); + return true; + } +} + +/* Return true if all analysises are done except uncertain groups. */ + +static bool +end_of_search (vec<vec<group_info> *> &circular_queue, + vec<unsigned> &merged_groups) +{ + unsigned i = 0; + vec<group_info> *group = NULL; + group_info elmt = NULL; + FOR_EACH_VEC_ELT (circular_queue, i, group) + { + if (merged_groups.contains (i)) + continue; + elmt = (*group)[0]; + /* If there is any isomorphic use_stmts, continue analysis of isomorphic + use_stmts. */ + if (!elmt->done && !elmt->uncertain) + return false; + } + return true; +} + +/* Push valid stmts to STMTS as cutpoints. */ + +static bool +check_any_cutpoints (vec<vec<group_info> *> &circular_queue, + vec<gimple *> *&stmts, vec<unsigned> &merged_groups) +{ + unsigned front = 0; + vec<group_info> *group = NULL; + group_info elmt = NULL; + unsigned max = circular_queue.length () * circular_queue[0]->length (); + vec_alloc (stmts, max); + while (front < circular_queue.length ()) + { + unsigned i = 0; + if (merged_groups.contains (front)) + { + front++; + continue; + } + group = circular_queue[front++]; + FOR_EACH_VEC_ELT (*group, i, elmt) + if (elmt->stmt != NULL && elmt->done && elmt->cut_point) + stmts->safe_push (elmt->stmt); + } + return stmts->length () != 0; +} + +/* Grouped loads are isomorphic. Make pair for group number and iteration, + map load stmt to this pair. We set iteration 0 here. */ + +static void +init_isomer_lhs (vec<vec<group_info> *> &groups, isomer_stmt_lhs &isomer_lhs) +{ + vec<group_info> *group = NULL; + group_info elmt = NULL; + unsigned i = 0; + FOR_EACH_VEC_ELT (groups, i, group) + { + unsigned j = 0; + FOR_EACH_VEC_ELT (*group, j, elmt) + isomer_lhs[gimple_assign_lhs (elmt->stmt)] = std::make_pair (i, 0); + } +} + +/* It's not a strict analysis of load/store profit. Assume scalar and vector + load/store are of the same cost. The result PROFIT equals profit form + vectorizing of scalar loads/stores minus cost of a vectorized load/store. */ + +static int +load_store_profit (unsigned scalar_mem_ops, unsigned vf, unsigned new_mem_ops) +{ + int profit = 0; + enum vect_cost_for_stmt kind = scalar_load; + int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); + profit += (scalar_mem_ops - (scalar_mem_ops / vf)) * scalar_cost; + profit -= new_mem_ops / vf * scalar_cost; + kind = scalar_store; + scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); + profit -= new_mem_ops / vf * scalar_cost; + return profit; +} + +/* Breadth first search the graph consisting of define-use chain starting from + the circular queue initialized by function BUILD_QUEUE. Find single use of + each stmt in group and check if they are isomorphic. Isomorphic is defined + as same rhs type, same operator, and isomorphic calculation of each rhs + starting from load. If another rhs is uncertain to be isomorphic, put it + at the end of circular queue and re-analyze it during the next iteration. + If a group shares the same use_stmt with another group, skip one of them in + succeedor prcoesses as merged. Iterate the circular queue until all + remianing groups heterogeneous or reaches MEN stmts. If all other groups + have finishes the analysis, and the remaining groups are uncertain, + return false to avoid endless loop. */ + +bool +bfs_find_isomer_stmts (vec<vec<group_info> *> &circular_queue, + stmts_profit &profit_pair, unsigned vf, + bool &reach_vdef) +{ + isomer_stmt_lhs isomer_lhs; + auto_vec<tree> hetero_lhs; + auto_vec<unsigned> merged_groups; + vec<group_info> *group = NULL; + /* True if analysis finishes. */ + bool done = false; + int profit_sum = 0; + vec<gimple *> *stmts = NULL; + init_isomer_lhs (circular_queue, isomer_lhs); + for (unsigned i = 1; !done; ++i) + { + unsigned front = 0; + /* Re-initialize DONE to TRUE while a new iteration begins. */ + done = true; + while (front < circular_queue.length ()) + { + int profit = 0; + group = circular_queue[front]; + done &= check_group (group, front, i, profit, merged_groups, + isomer_lhs, hetero_lhs); + profit_sum += profit; + if (profit != 0 && (*group)[0]->stmt == NULL) + { + reach_vdef = true; + return false; + } + ++front; + } + /* Uncertain result, return. */ + if (!done && end_of_search (circular_queue, merged_groups)) + return false; + } + if (check_any_cutpoints (circular_queue, stmts, merged_groups)) + { + profit_pair.first = stmts; + unsigned loads = circular_queue.length () * circular_queue[0]->length (); + profit_pair.second = profit_sum + load_store_profit (loads, vf, + stmts->length ()); + if (profit_pair.second > 0) + return true; + } + return false; +} + +/* Free memory allocated by ginfo. */ + +static void +free_ginfos (vec<vec<group_info> *> &worklists) +{ + vec<group_info> *worklist; + unsigned i = 0; + while (i < worklists.length ()) + { + worklist = worklists[i++]; + group_info ginfo; + unsigned j = 0; + FOR_EACH_VEC_ELT (*worklist, j, ginfo) + delete ginfo; + } +} + +static void +release_tmp_stmts (vf_stmts_profit_map &candi_stmts) +{ + vf_stmts_profit_map::iterator iter; + for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter) + iter->second.first->release (); +} + +/* Choose the group of stmt with maximun profit. */ + +static bool +decide_stmts_by_profit (vf_stmts_profit_map &candi_stmts, vec<gimple *> &stmts) +{ + vf_stmts_profit_map::iterator iter; + int profit = 0; + int max = 0; + vec<gimple *> *tmp = NULL; + for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter) + { + profit = iter->second.second; + if (profit > max) + { + tmp = iter->second.first; + max = profit; + } + } + if (max == 0) + { + release_tmp_stmts (candi_stmts); + return false; + } + unsigned i = 0; + gimple *stmt = NULL; + FOR_EACH_VEC_ELT (*tmp, i, stmt) + stmts.safe_push (stmt); + release_tmp_stmts (candi_stmts); + return stmts.length () != 0; +} + +/* Find isomorphic stmts from grouped loads with vector factor VF. + + Given source code as follows and ignore casting. + + a0 = (a[0] + b[0]) + ((a[4] - b[4]) << 16); + a1 = (a[1] + b[1]) + ((a[5] - b[5]) << 16); + a2 = (a[2] + b[2]) + ((a[6] - b[6]) << 16); + a3 = (a[3] + b[3]) + ((a[7] - b[7]) << 16); + + We get grouped loads in VINFO as + + GROUP_1 GROUP_2 + _1 = *a _11 = *b + _2 = *(a + 1) _12 = *(b + 1) + _3 = *(a + 2) _13 = *(b + 2) + _4 = *(a + 3) _14 = *(b + 3) + _5 = *(a + 4) _15 = *(b + 4) + _6 = *(a + 5) _16 = *(b + 5) + _7 = *(a + 6) _17 = *(b + 6) + _8 = *(a + 7) _18 = *(b + 7) + + First we try VF = 8, we get two worklists + + WORKLIST_1 WORKLIST_2 + _1 = *a _11 = *b + _2 = *(a + 1) _12 = *(b + 1) + _3 = *(a + 2) _13 = *(b + 2) + _4 = *(a + 3) _14 = *(b + 3) + _5 = *(a + 4) _15 = *(b + 4) + _6 = *(a + 5) _16 = *(b + 5) + _7 = *(a + 6) _17 = *(b + 6) + _8 = *(a + 7) _18 = *(b + 7) + + We find _111 = _1 + _11 and _115 = _5 - _15 are not isomorphic, + so we try VF = VF / 2. + + GROUP_1 GROUP_2 + _1 = *a _5 = *(a + 4) + _2 = *(a + 1) _6 = *(a + 5) + _3 = *(a + 2) _7 = *(a + 6) + _4 = *(a + 3) _8 = *(a + 7) + + GROUP_3 GROUP_4 + _11 = *b _15 = *(b + 4) + _12 = *(b + 1) _16 = *(b + 5) + _13 = *(b + 2) _17 = *(b + 6) + _14 = *(b + 3) _18 = *(b + 7) + + We first analyze group_1, and find all operations are isomorphic, then + replace stmts in group_1 with their use_stmts. Group_2 as well. + + GROUP_1 GROUP_2 + _111 = _1 + _11 _115 = _5 - _15 + _112 = _2 + _12 _116 = _6 - _16 + _113 = _3 + _13 _117 = _7 - _17 + _114 = _4 + _14 _118 = _8 - _18 + + When analyzing group_3 and group_4, we find their use_stmts are the same + as group_1 and group_2. So group_3 is regarded as being merged to group_1 + and group_4 being merged to group_2. In future procedures, we will skip + group_3 and group_4. + + We repeat such processing until opreations are not isomorphic or searching + reaches MEM stmts. In our given case, searching end up at a0, a1, a2 and + a3. */ + +static bool +find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts) +{ + unsigned vf = get_max_vf (vinfo); + if (vf == 0) + return false; + auto_vec<vec<group_info> *> circular_queue; + /* Map of vector factor and corresponding vectorizing profit. */ + stmts_profit profit_map; + /* Map of cut_points and vector factor. */ + vf_stmts_profit_map candi_stmts; + bool reach_vdef = false; + while (vf > 2) + { + if (build_queue (vinfo, vf, circular_queue) == 0) + return false; + if (!bfs_find_isomer_stmts (circular_queue, profit_map, vf, reach_vdef)) + { + if (reach_vdef) + { + release_tmp_stmts (candi_stmts); + free_ginfos (circular_queue); + circular_queue.release (); + return false; + } + vf /= 2; + free_ginfos (circular_queue); + circular_queue.release (); + continue; + } + candi_stmts[vf] = profit_map; + free_ginfos (circular_queue); + vf /= 2; + circular_queue.release (); + } + return decide_stmts_by_profit (candi_stmts, stmts); +} + /* Distributes the code from LOOP in such a way that producer statements are placed before consumer statements. Tries to separate only the statements from STMTS into separate loops. Returns the number of diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 7990e31de..1e332d3c5 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2516,9 +2516,11 @@ vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int *n_stmts) Apply a set of analyses on LOOP, and create a loop_vec_info struct for it. The different analyses will record information in the - loop_vec_info struct. */ + loop_vec_info struct. When RESULT_ONLY_P is true, quit analysis + if loop is vectorizable, otherwise, do not delete vinfo.*/ opt_loop_vec_info -vect_analyze_loop (class loop *loop, vec_info_shared *shared) +vect_analyze_loop (class loop *loop, vec_info_shared *shared, + bool result_only_p) { auto_vector_modes vector_modes; @@ -2545,6 +2547,8 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) unsigned n_stmts = 0; machine_mode autodetected_vector_mode = VOIDmode; opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL); + /* Loop_vinfo for loop-distribution pass. */ + opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL); machine_mode next_vector_mode = VOIDmode; poly_uint64 lowest_th = 0; unsigned vectorized_loops = 0; @@ -2633,6 +2637,13 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) if (res) { LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; + /* In loop-distribution pass, we only need to get loop_vinfo, do not + conduct further operations. */ + if (result_only_p) + { + loop->aux = (loop_vec_info) loop_vinfo; + return loop_vinfo; + } vectorized_loops++; /* Once we hit the desired simdlen for the first time, @@ -2724,7 +2735,19 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) } else { - delete loop_vinfo; + /* If current analysis shows LOOP is unable to vectorize, loop_vinfo + will be deleted. If LOOP is under ldist analysis, backup it before + it is deleted and return it if all modes are analyzed and still + fail to vectorize. */ + if (result_only_p && (mode_i == vector_modes.length () + || autodetected_vector_mode == VOIDmode)) + { + fail_loop_vinfo = loop_vinfo; + } + else + { + delete loop_vinfo; + } if (fatal) { gcc_checking_assert (first_loop_vinfo == NULL); @@ -2773,6 +2796,14 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) return first_loop_vinfo; } + /* Return loop_vinfo for ldist if loop is unvectorizable. */ + if (result_only_p && (mode_i == vector_modes.length () + || autodetected_vector_mode == VOIDmode)) + { + loop->aux = (loop_vec_info) fail_loop_vinfo; + return fail_loop_vinfo; + } + return opt_loop_vec_info::propagate_failure (res); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 1c4a6c421..dc8175f00 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1896,7 +1896,8 @@ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, enum tree_code); extern bool needs_fold_left_reduction_p (tree, tree_code); /* Drive for loop analysis stage. */ -extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *); +extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *, + bool result_only_p = false); extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, tree *, bool); -- 2.27.0.windows.1
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2