Projects
openEuler:24.03:SP1:Everything:64G
gcc
_service:tar_scm:0050-Port-IPA-prefetch-to-GCC-...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0050-Port-IPA-prefetch-to-GCC-12.patch of Package gcc
From 7ee50ce44c652e21ca8ad33dc4e175f02b51b072 Mon Sep 17 00:00:00 2001 From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> Date: Fri, 8 Mar 2024 06:50:39 +0800 Subject: [PATCH 18/18] Port IPA prefetch to GCC 12 --- gcc/Makefile.in | 1 + gcc/cgraph.cc | 1 + gcc/cgraph.h | 2 + gcc/common.opt | 8 + gcc/ipa-devirt.cc | 54 +- gcc/ipa-prefetch.cc | 1819 +++++++++++++++++++++++++++++++++++++++++++ gcc/ipa-sra.cc | 8 + gcc/params.opt | 8 + gcc/passes.def | 1 + gcc/timevar.def | 1 + gcc/tree-pass.h | 1 + 11 files changed, 1902 insertions(+), 2 deletions(-) create mode 100644 gcc/ipa-prefetch.cc diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 876000bda..10544e4a9 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1468,6 +1468,7 @@ OBJS = \ ipa-modref.o \ ipa-modref-tree.o \ ipa-predicate.o \ + ipa-prefetch.o \ ipa-profile.o \ ipa-prop.o \ ipa-param-manipulation.o \ diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc index 3734c85db..7d738b891 100644 --- a/gcc/cgraph.cc +++ b/gcc/cgraph.cc @@ -998,6 +998,7 @@ cgraph_node::create_indirect_edge (gcall *call_stmt, int ecf_flags, edge->indirect_info = cgraph_allocate_init_indirect_info (); edge->indirect_info->ecf_flags = ecf_flags; edge->indirect_info->vptr_changed = true; + edge->indirect_info->targets = NULL; /* Record polymorphic call info. */ if (!cloning_p diff --git a/gcc/cgraph.h b/gcc/cgraph.h index d96690326..b84ff2f98 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1659,6 +1659,8 @@ public: int param_index; /* ECF flags determined from the caller. */ int ecf_flags; + /* Vector of potential call targets determined by analysis. */ + vec<cgraph_node *, va_gc_atomic> *targets; /* Number of speculative call targets, it's less than GCOV_TOPN_VALUES. */ unsigned num_speculative_call_targets : 16; diff --git a/gcc/common.opt b/gcc/common.opt index 1eb62ada5..e65a06af9 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1328,6 +1328,10 @@ fdevirtualize Common Var(flag_devirtualize) Optimization Try to convert virtual calls to direct ones. +fipa-ic +Common Var(flag_ipa_ic) Optimization Init(0) +Perform interprocedural analysis of indirect calls. + ficp Common Var(flag_icp) Optimization Init(0) Try to promote indirect calls to direct ones. @@ -2367,6 +2371,10 @@ fprefetch-loop-arrays Common Var(flag_prefetch_loop_arrays) Init(-1) Optimization Generate prefetch instructions, if available, for arrays in loops. +fipa-prefetch +Common Var(flag_ipa_prefetch) Init(0) Optimization +Generate prefetch instructions, if available, using IPA info. + fprofile Common Var(profile_flag) Enable basic program profiling code. diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc index 318535d06..dd3562d56 100644 --- a/gcc/ipa-devirt.cc +++ b/gcc/ipa-devirt.cc @@ -5758,6 +5758,54 @@ merge_fs_map_for_ftype_aliases () } } +/* Save results of indirect call analysis for the next passes. */ + +static void +save_analysis_results () +{ + if (dump_file) + fprintf (dump_file, "\n\nSave results of indirect call analysis.\n"); + + struct cgraph_node *n; + FOR_EACH_FUNCTION (n) + { + cgraph_edge *e, *next; + for (e = n->indirect_calls; e; e = next) + { + next = e->next_callee; + if (e->indirect_info->polymorphic) + continue; + gcall *stmt = e->call_stmt; + gcc_assert (stmt != NULL); + tree call_fn = gimple_call_fn (stmt); + tree call_fn_ty = TREE_TYPE (call_fn); + if (!POINTER_TYPE_P (call_fn_ty)) + continue; + + tree ctype = TYPE_CANONICAL (TREE_TYPE (call_fn_ty)); + unsigned ctype_uid = ctype ? TYPE_UID (ctype) : 0; + if (!ctype_uid || unsafe_types->count (ctype_uid) + || !fs_map->count (ctype_uid)) + continue; + /* TODO: cleanup noninterposable aliases. */ + decl_set *decls = (*fs_map)[ctype_uid]; + if (dump_file) + { + fprintf (dump_file, "For call "); + print_gimple_stmt (dump_file, stmt, 0); + } + vec_alloc (e->indirect_info->targets, decls->size ()); + for (decl_set::const_iterator it = decls->begin (); + it != decls->end (); it++) + { + struct cgraph_node *target = cgraph_node::get (*it); + /* TODO: maybe discard some targets. */ + e->indirect_info->targets->quick_push (target); + } + } + } +} + /* Dump function types with set of functions corresponding to it. */ static void @@ -5822,6 +5870,8 @@ collect_function_signatures () } } merge_fs_map_for_ftype_aliases (); + if (flag_ipa_ic) + save_analysis_results (); if (dump_file) dump_function_signature_sets (); } @@ -6217,7 +6267,7 @@ ipa_icp (void) optimize indirect calls. */ collect_function_type_aliases (); collect_function_signatures (); - bool optimized = optimize_indirect_calls (); + bool optimized = flag_icp ? optimize_indirect_calls () : false; remove_type_alias_map (ta_map); remove_type_alias_map (fta_map); @@ -6264,7 +6314,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return (optimize && flag_icp && !seen_error () + return (optimize && (flag_icp || flag_ipa_ic) && !seen_error () && (in_lto_p || flag_whole_program)); } diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc new file mode 100644 index 000000000..aeea51105 --- /dev/null +++ b/gcc/ipa-prefetch.cc @@ -0,0 +1,1819 @@ +/* IPA prefetch optimizations. + Copyright (C) 2023 Free Software Foundation, Inc. + Contributed by Ilia Diachkov. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* IPA prefetch is an interprocedural pass that detects cases of indirect + memory access potentially in loops and inserts prefetch instructions + to optimize cache usage during these indirect memory accesses. */ + +#include "config.h" +#define INCLUDE_SET +#define INCLUDE_MAP +#include "system.h" +#include "coretypes.h" +#include "target.h" +#include "tm.h" +#include "tree.h" +#include "tree-pass.h" +#include "cgraph.h" +#include "diagnostic-core.h" +#include "function.h" +#include "basic-block.h" +#include "gimple.h" +#include "vec.h" +#include "tree-pretty-print.h" +#include "gimple-pretty-print.h" +#include "gimple-iterator.h" +#include "gimple-walk.h" +#include "cfg.h" +#include "cfghooks.h" +#include "ssa.h" +#include "tree-dfa.h" +#include "fold-const.h" +#include "tree-inline.h" +#include "stor-layout.h" +#include "tree-into-ssa.h" +#include "tree-cfg.h" +#include "alloc-pool.h" +#include "symbol-summary.h" +#include "ipa-prop.h" +#include "tree-eh.h" +#include "bitmap.h" +#include "cfgloop.h" +#include "langhooks.h" +#include "ipa-param-manipulation.h" +#include "ipa-fnsummary.h" +#include "tree-ssa-loop.h" +#include "tree-ssa-loop-ivopts.h" +#include "gimple-fold.h" +#include "gimplify.h" + +namespace { + +/* Call graph analysis. */ + +typedef std::set<cgraph_edge *> edge_set; +typedef std::set<cgraph_node *> node_set; +typedef std::map<cgraph_node *, edge_set *> node_to_iedge_map; +typedef std::map<cgraph_node *, node_set *> node_to_node_map; +typedef std::map<cgraph_edge *, double> edge_in_loop; +typedef std::map<cgraph_node *, double> node_in_loop; + +static edge_in_loop *el_map = NULL; +static node_in_loop *nl_map = NULL; +static node_to_iedge_map *icn_map = NULL; +/* Contains nodes which reachable from a given node. */ +static node_to_node_map *nn_map = NULL; + +static bool +can_be_optimized (cgraph_node *n) +{ + /* TODO: maybe check also inlined_to. */ + return opt_for_fn (n->decl, flag_ipa_prefetch) && n->has_gimple_body_p (); +} + +static void +analyze_cgraph_edge (cgraph_edge *e) +{ + gcall *stmt = e->call_stmt; + gcc_checking_assert (e && stmt); + basic_block bb = gimple_bb (stmt); + gcc_checking_assert (bb); + /* TODO: add the same check for indirect calls. */ + if (e->callee && !can_be_optimized (e->callee)) + return; + + if (dump_file) + { + if (e->callee) + fprintf (dump_file, "\t%*s%s %s%*s ", 1, "", + e->callee->dump_name (), !e->inline_failed ? "inlined" : + cgraph_inline_failed_string (e->inline_failed), 1, ""); + else + fprintf (dump_file, "\t%*s%s %s%*s ", 1, "", "(indirect)", + "n/a", 1, ""); + fprintf (dump_file, "freq:%4.2f", e->sreal_frequency ().to_double ()); + + if (e->callee && cross_module_call_p (e)) + fprintf (dump_file, " cross module"); + + class ipa_call_summary *es = ipa_call_summaries->get (e); + if (es) + fprintf (dump_file, " loop depth:%2i size:%2i time: %2i", + es->loop_depth, es->call_stmt_size, es->call_stmt_time); + + fprintf (dump_file, "\n"); + } + if (e->indirect_info && dump_file) + { + fprintf (dump_file, "II: %p\n", (void *) e->indirect_info->targets); + unsigned i = 0; + cgraph_node *n; + if (e->indirect_info->targets) + for (i = 0; e->indirect_info->targets->iterate (i, &n); ++i) + fprintf (dump_file, "\t%s\n", n->dump_name ()); + } + + if (bb_loop_depth (bb) == 0) + return; + + if (dump_file) + { + if (e->callee) + fprintf (dump_file, "\tCall in loop (%d): ", bb_loop_depth (bb)); + else + fprintf (dump_file, "\tICall in loop (%d): ", bb_loop_depth (bb)); + print_gimple_stmt (dump_file, stmt, 0); + } + (*el_map)[e] = e->sreal_frequency ().to_double (); +} + +/* Walk optimizible cgraph nodes and collect info for edges. */ + +static void +analyse_cgraph () +{ + cgraph_node *n; + cgraph_edge *e; + FOR_EACH_DEFINED_FUNCTION (n) + { + if (dump_file) + { + fprintf (dump_file, "\n\nProcesing function %s\n", n->dump_name ()); + print_generic_expr (dump_file, n->decl); + fprintf (dump_file, "\n"); + } + if (!can_be_optimized (n)) + { + if (dump_file) + fprintf (dump_file, "Skip the function\n"); + continue; + } + + /* TODO: maybe remove loop info here. */ + push_cfun (DECL_STRUCT_FUNCTION (n->decl)); + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (LOOPS_NORMAL); + + for (e = n->callees; e; e = e->next_callee) + analyze_cgraph_edge (e); + for (e = n->indirect_calls; e; e = e->next_callee) + analyze_cgraph_edge (e); + + free_dominance_info (CDI_DOMINATORS); + loop_optimizer_finalize (); + + pop_cfun (); + } +} + +/* Save indirect call info to node:icall_target map. */ + +static void +prepare_indirect_call_info () +{ + cgraph_node *n, *n2; + cgraph_edge *e; + FOR_EACH_DEFINED_FUNCTION (n) + for (e = n->indirect_calls; e; e = e->next_callee) + { + if (!e->indirect_info->targets) + continue; + for (unsigned i = 0; e->indirect_info->targets->iterate (i, &n2); ++i) + { + if (icn_map->count (n2) == 0) + (*icn_map)[n2] = new edge_set; + (*icn_map)[n2]->insert (e); + } + } +} + +static void +collect_nn_info (struct cgraph_edge *e, struct cgraph_node *n) +{ + struct cgraph_node *n2 = e->caller; + if (nn_map->count (n2) == 0) + (*nn_map)[n2] = new node_set; + (*nn_map)[n2]->insert (n); + if (nn_map->count (n) != 0) + { + node_set *set = (*nn_map)[n]; + for (node_set::const_iterator it = set->begin (); + it != set->end (); it++) + (*nn_map)[n2]->insert (*it); + } +} + +static bool +check_loop_info_for_cgraph_edge (struct cgraph_edge *e, struct cgraph_node *n, + bool &all_in_loop, double &rate) +{ + collect_nn_info (e, n); + if (el_map->count (e) == 0) + { + if (dump_file) + fprintf (dump_file, "not all: %s->%s\n", + e->caller->dump_name (), n->dump_name ()); + all_in_loop = false; + return false; + } + rate += (*el_map)[e]; + return true; +} + +static bool +update_loop_info_for_cgraph_node (struct cgraph_node *n) +{ + bool changed = false, all_in_loop = true; + double rate = 0.0; + struct cgraph_edge *e; + + /* Iterate all direct callers. */ + if (n->callers) + for (e = n->callers; e; e = e->next_caller) + if (!check_loop_info_for_cgraph_edge (e, n, all_in_loop, rate)) + break; + + /* Iterate all possible indirect callers. */ + edge_set *set = (*icn_map)[n]; + if (set) + for (edge_set::const_iterator it = set->begin (); it != set->end (); it++) + if (!check_loop_info_for_cgraph_edge (*it, n, all_in_loop, rate)) + break; + + /* The node had 0 loop count but the rate is > 0, + so something is changed. */ + if (dump_file) + fprintf (dump_file, "%s: all=%d, nl->c=%lu, r=%4.2f\n", n->dump_name (), + all_in_loop, nl_map->count (n), rate); + + if (all_in_loop && nl_map->count (n) == 0 && rate > 0.0) + { + if (dump_file) + fprintf (dump_file, "%s: new rate %4.2f\n", n->dump_name (), rate); + changed = true; + } + if (all_in_loop) + { + (*nl_map)[n] = nl_map->count (n) ? (*nl_map)[n] + rate : rate; + for (e = n->callees; e; e = e->next_callee) + (*el_map)[e] = el_map->count (e) ? (*el_map)[e] + rate : rate; + for (e = n->indirect_calls; e; e = e->next_callee) + { + (*el_map)[e] = el_map->count (e) ? (*el_map)[e] + rate : rate; + if (dump_file) + fprintf (dump_file, "%s: reset indirect e=%p to %4.2f\n", + n->dump_name (), (void *) e, (*el_map)[e]); + } + } + return changed; +} + +/* Propagate in_loop info over the call graph. */ + +static void +propagate_loop_info_in_cgraph () +{ + struct cgraph_node *n; + bool changed; + unsigned iteration = 0; + do + { + changed = false; + if (dump_file) + fprintf (dump_file, "\nIteration %u\n", iteration++); + FOR_EACH_DEFINED_FUNCTION (n) + { + if (!n->callers && !(*icn_map)[n]) + continue; + if (update_loop_info_for_cgraph_node (n)) + changed = true; + } + } while (changed); + + if (dump_file) + { + fprintf (dump_file, "\nList of nodes in loops:\n"); + FOR_EACH_DEFINED_FUNCTION (n) + if (nl_map->count (n) != 0) + fprintf (dump_file, "%s: %4.2f\n", n->dump_name (), (*nl_map)[n]); + fprintf (dump_file, "\nList of callable nodes:\n"); + FOR_EACH_DEFINED_FUNCTION (n) + if (nn_map->count (n) != 0) + { + node_set *set = (*nn_map)[n]; + fprintf (dump_file, "%s: ", n->dump_name ()); + for (node_set::const_iterator it = set->begin (); + it != set->end (); it++) + fprintf (dump_file, "%s ", (*it)->dump_name ()); + fprintf (dump_file, "\n"); + } + } +} + +/* Analysis of memory references. */ + +typedef enum +{ + MR_NONE, + MR_SIMPLE, + MR_POLYNOMIAL, + MR_INDIRECT, + MR_UNSUPPORTED +} mr_type; +const char *mr_type_str[] = + {"none", "simple", "poly", "indirect", "unsuppoted"}; + +struct memref_type; +typedef std::set<memref_type *> memref_set; + +static unsigned max_mr_id = 0; +typedef struct memref_type +{ + unsigned mr_id = 0; + mr_type type = MR_NONE; + tree mem = NULL_TREE; + tree base = NULL_TREE; + tree offset = NULL_TREE; + vec<gimple *, va_heap, vl_ptr> stmts = vNULL; + memref_set used_mrs; + bool is_store = false; + bool is_incr = false; + tree step = NULL_TREE; +} memref_t; + +typedef std::map<tree, memref_t *> tree_memref_map; +typedef std::map<function *, vec<memref_t *> > function_mrs_map; +typedef std::map<function *, memref_set *> funct_mrs_map; +typedef std::map<memref_t *, memref_t *> memref_map; +typedef std::map<memref_t *, tree> memref_tree_map; + +typedef std::set<gimple *> stmt_set; +typedef std::map<tree, tree> tree_map; + +tree_memref_map *tm_map; +funct_mrs_map *fmrs_map; +funct_mrs_map *optimize_mrs_map; +memref_map *mr_candidate_map; +tree_map *decl_map; + +static void analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr); + +static memref_t* +get_memref (gimple *stmt, tree mem, bool is_store) +{ + if (tm_map->count (mem)) + { + if (dump_file) + fprintf (dump_file, "Found mr %d for %p.\n", + (*tm_map)[mem]->mr_id, (void *) mem); + return (*tm_map)[mem]; + } + + memref_t *mr = new memref_t; + mr->mr_id = ++max_mr_id; + mr->is_store = is_store; + mr->mem = mem; + (*tm_map)[mem] = mr; + if (dump_file) + fprintf (dump_file, "Create mr %d for %p.\n", + mr->mr_id, (void *) mem); + analyse_mem_ref (stmt, mem, mr); + return mr; +} + +static void +print_mrs_ids (memref_set &mrs, const char *start) +{ + if (start) + fprintf (dump_file, "%s", start); + for (memref_set::const_iterator it = mrs.begin (); it != mrs.end (); it++) + fprintf (dump_file, "%d ", (*it)->mr_id); + fprintf (dump_file, "\n"); +} + +static void +print_memref (memref_t *mr) +{ + fprintf (dump_file, "MR (%d) type: %s (%s) mem: ", mr->mr_id, + mr_type_str[mr->type], mr->is_store ? "st" : "ld"); + print_generic_expr (dump_file, mr->mem); + fprintf (dump_file, "\nbase: "); + if (mr->base) + print_generic_expr (dump_file, mr->base); + else + fprintf (dump_file, "null"); + fprintf (dump_file, "\noffset: "); + if (mr->offset) + print_generic_expr (dump_file, mr->offset); + else + fprintf (dump_file, "null"); + fprintf (dump_file, "\nstmts:\n"); + for (unsigned int i = 0; i < mr->stmts.length (); i++) + print_gimple_stmt (dump_file, mr->stmts[i], 0); + print_mrs_ids (mr->used_mrs, "\tused memrefs: "); + if (mr->is_incr) + { + fprintf (dump_file, "\tis incremental with step: "); + print_generic_expr (dump_file, mr->step); + } + fprintf (dump_file, "\n"); +} + +/* If there is a simple load or store to a memory reference in STMT, returns + the location of the memory reference, and sets IS_STORE according to whether + it is a store or load. Otherwise, returns NULL. + TODO: from gcc/tree-ssa-loop-im.c, maybe make it global. */ + +static tree * +simple_mem_ref_in_stmt (gimple *stmt, bool *is_store) +{ + tree *lhs, *rhs; + + /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */ + if (!gimple_assign_single_p (stmt)) + return NULL; + + lhs = gimple_assign_lhs_ptr (stmt); + rhs = gimple_assign_rhs1_ptr (stmt); + + if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt)) + { + *is_store = false; + return rhs; + } + else if (gimple_vdef (stmt) + && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs))) + { + *is_store = true; + return lhs; + } + else + return NULL; +} + +static void +analyse_incremental (gimple *stmt, memref_t* mr) +{ + if (!gimple_assign_single_p (stmt)) + return; + tree rhs1, rhs2; + /* TODO: maybe support other types of stmts. */ + while (stmt && is_gimple_assign (stmt)) + { + enum tree_code def_code = gimple_assign_rhs_code (stmt); + gimple_rhs_class rhs_class = gimple_assign_rhs_class (stmt); + if (dump_file) + { + fprintf (dump_file, "Incr: in assign (%s)\n", + get_tree_code_name (def_code)); + print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS); + } + gcc_assert (def_code != ERROR_MARK); + switch (rhs_class) + { + case GIMPLE_TERNARY_RHS: + if (dump_file) + fprintf (dump_file, "Incr: unsupported trinary rhs\n"); + stmt = NULL; + break; + case GIMPLE_UNARY_RHS: + case GIMPLE_SINGLE_RHS: + rhs1 = gimple_assign_rhs1 (stmt); + if (dump_file) + { + fprintf (dump_file, "Incr: (%s)", + get_tree_code_name (TREE_CODE (rhs1))); + print_generic_expr (dump_file, rhs1); + fprintf (dump_file, "\n"); + } + if (def_code == SSA_NAME) + stmt = SSA_NAME_DEF_STMT (rhs1); + else if (def_code == MEM_REF || def_code == COMPONENT_REF + || def_code == ARRAY_REF) + { + /* If we have dereference in address evaluation, + it's indirect memory access. */ + if (dump_file) + { + if (operand_equal_p (mr->mem, rhs1)) + fprintf (dump_file, "Incr: the same MEM\n"); + else + fprintf (dump_file, "Incr: diff MEM\n"); + print_generic_expr (dump_file, rhs1); + fprintf (dump_file, " "); + print_generic_expr (dump_file, mr->mem); + fprintf (dump_file, "\n"); + } + if (operand_equal_p (mr->mem, rhs1) && mr->step) + mr->is_incr = true; + stmt = NULL; + } + else + { + if (dump_file) + fprintf (dump_file, "Incr: unsupported unary/single\n"); + stmt = NULL; + } + break; + case GIMPLE_BINARY_RHS: + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + if (dump_file) + { + fprintf (dump_file, "(%s) (%s)", + get_tree_code_name (TREE_CODE (rhs1)), + get_tree_code_name (TREE_CODE (rhs2))); + print_generic_expr (dump_file, rhs1); + fprintf (dump_file, " "); + print_generic_expr (dump_file, rhs2); + fprintf (dump_file, "\n"); + } + /* TODO: extend for other types of incrementation. */ + if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST) + { + stmt = SSA_NAME_DEF_STMT (rhs1); + mr->step = rhs2; + if (dump_file) + { + fprintf (dump_file, "Incr: const increment stmt: "); + print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS); + } + } + else + stmt = NULL; + break; + default: + gcc_unreachable (); + } + } + if ((mr->step && !mr->is_incr) || (!mr->step && mr->is_incr)) + { + mr->step = NULL_TREE; + mr->is_incr = false; + } +} + +static mr_type +get_memref_type (memref_t *base, memref_t *used, enum tree_code code) +{ + /* TODO: improve memref type detection. */ + enum tree_code base_code = TREE_CODE (base->mem); + if (dump_file) + fprintf (dump_file, "get_memref_type: base=%d,%d used=%d,%d code=%s " + "base_code=%s\n", base->mr_id, base->type, + used ? used->mr_id : -1, used ? used->type : -1, + get_tree_code_name (code), get_tree_code_name (base_code)); + if (used) + { + if (base->type > used->type) + return base->type; + if (used->type == MR_SIMPLE) + return MR_POLYNOMIAL; + if (used->type == MR_POLYNOMIAL) + return base_code == ARRAY_REF ? MR_POLYNOMIAL : MR_INDIRECT; + if (used->type == MR_INDIRECT) + return MR_INDIRECT; + return MR_UNSUPPORTED; + } + if (code == MEM_REF || code == ARRAY_REF || code == COMPONENT_REF) + return base->type; + if (code == POINTER_PLUS_EXPR || code == PLUS_EXPR + || code == MINUS_EXPR || code == MULT_EXPR) + return base->type <= MR_POLYNOMIAL ? MR_POLYNOMIAL : base->type; + return base->type >= MR_INDIRECT ? base->type : MR_INDIRECT; +} + +/* Recursively walk defs of src expression and record used stmts and other mrs. + Return a base address candidate if it's found. */ + +static tree +analyse_addr_eval (tree src, memref_t* mr) +{ + if (TREE_CODE (src) != SSA_NAME) + return NULL_TREE; + gimple *stmt = SSA_NAME_DEF_STMT (src); + if (dump_file) + { + fprintf (dump_file, "Src_stmt: "); + print_gimple_stmt (dump_file, stmt, 0); + } + if (!is_gimple_assign (stmt)) + { + if (dump_file) + { + fprintf (dump_file, "Is not assign, stop analysis: "); + print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS); + } + mr->type = MR_UNSUPPORTED; + mr->stmts.safe_push (stmt); + return NULL_TREE; + } + enum tree_code def_code = gimple_assign_rhs_code (stmt); + if (def_code != MEM_REF && def_code != COMPONENT_REF + && def_code != ARRAY_REF) + mr->stmts.safe_push (stmt); + gimple_rhs_class rhs_class = gimple_assign_rhs_class (stmt); + tree rhs1, rhs2, base; + if (dump_file) + fprintf (dump_file, "In assign (%s): ", get_tree_code_name (def_code)); + + switch (rhs_class) + { + case GIMPLE_TERNARY_RHS: + if (dump_file) + fprintf (dump_file, "Unsupported trinary rhs\n"); + mr->type = MR_UNSUPPORTED; + return NULL_TREE; + case GIMPLE_UNARY_RHS: + case GIMPLE_SINGLE_RHS: + rhs1 = gimple_assign_rhs1 (stmt); + if (dump_file) + { + fprintf (dump_file, "(%s)", + get_tree_code_name (TREE_CODE (rhs1))); + print_generic_expr (dump_file, rhs1); + fprintf (dump_file, "\n"); + } + if (def_code == NOP_EXPR) + return analyse_addr_eval (rhs1, mr); + else if (def_code == MEM_REF || def_code == COMPONENT_REF + || def_code == ARRAY_REF) + { + memref_t *mr2 = get_memref (stmt, rhs1, false); + mr->type = get_memref_type (mr, mr2, def_code); + for (memref_set::const_iterator it = mr2->used_mrs.begin (); + it != mr2->used_mrs.end (); it++) + mr->used_mrs.insert (*it); + mr->used_mrs.insert (mr2); + return mr2->base; + } + else + { + if (dump_file) + fprintf (dump_file, "Unsupported unary/single\n"); + mr->type = MR_UNSUPPORTED; + } + return NULL_TREE; + case GIMPLE_BINARY_RHS: + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + if (dump_file) + { + fprintf (dump_file, "(%s) (%s)", + get_tree_code_name (TREE_CODE (rhs1)), + get_tree_code_name (TREE_CODE (rhs2))); + print_generic_expr (dump_file, rhs1); + fprintf (dump_file, " "); + print_generic_expr (dump_file, rhs2); + fprintf (dump_file, "\n"); + } + base = analyse_addr_eval (rhs1, mr); + analyse_addr_eval (rhs2, mr); + mr->type = get_memref_type (mr, NULL, def_code); + return base; + default: + gcc_unreachable (); + } + return NULL_TREE; +} + +static tree +get_mem_ref_address_ssa_name (tree mem, tree base) +{ + gcc_assert (TREE_CODE (mem) == MEM_REF); + if (base == NULL_TREE) + base = get_base_address (mem); + tree base_addr = NULL_TREE; + if (TREE_CODE (base) == MEM_REF) + base_addr = TREE_OPERAND (base, 0); + if (base_addr != NULL_TREE && TREE_CODE (base_addr) == SSA_NAME) + return base_addr; + return NULL_TREE; +} + +static void +analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr) +{ + tree base = get_base_address (mem); + if (dump_file) + fprintf (dump_file, "Codes: base = %s, mem = %s\n", + base ? get_tree_code_name (TREE_CODE (base)) : "null", + mem ? get_tree_code_name (TREE_CODE (mem)) : "null"); + + mr->stmts.safe_push (stmt); + mr->base = base; + switch (TREE_CODE (mem)) + { + case COMPONENT_REF: + if (mr->is_store) + analyse_incremental (stmt, mr); + mr->type = MR_SIMPLE; + mr->offset = TREE_OPERAND (mem, 1); + return; + case ARRAY_REF: + analyse_addr_eval (TREE_OPERAND (mem, 1), mr); + return; + case MEM_REF: + { + tree base_addr = get_mem_ref_address_ssa_name (mem, base); + if (dump_file) + { + fprintf (dump_file, "Base addr (%s): ", + base_addr ? get_tree_code_name (TREE_CODE (base_addr)) + : "null"); + if (base_addr) + print_generic_expr (dump_file, base_addr); + fprintf (dump_file, "\n"); + } + if (base_addr) + { + mr->base = analyse_addr_eval (base_addr, mr); + return; + } + break; + } + default: + break; + } + mr->type = MR_UNSUPPORTED; + mr->base = NULL_TREE; +} + +static void +analyse_stmt (gimple *stmt) +{ + bool is_store; + tree *mem = simple_mem_ref_in_stmt (stmt, &is_store); + if (!mem) + return; + if (dump_file) + { + fprintf (dump_file, "\n%s: mr is found in stmt (%s): ", + function_name (cfun), is_store ? "store" : "load"); + print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS); + } + memref_t *mr = get_memref (stmt, *mem, is_store); + (*fmrs_map)[cfun]->insert (mr); + if (dump_file) + print_memref (mr); +} + +/* Scan stmts for indirect stores/loads with bases passed as function args. */ + +static void +collect_memrefs_for_cgraph_node (struct cgraph_node *n) +{ + if (dump_file) + fprintf (dump_file, "\nCollect indirect ptr info in %s\n", n->dump_name ()); + n->get_body (); + function *fn = DECL_STRUCT_FUNCTION (n->decl); + gcc_assert (fn && n->has_gimple_body_p ()); + + push_cfun (fn); + basic_block bb; + gimple_stmt_iterator si; + (*fmrs_map)[fn] = new memref_set; + FOR_EACH_BB_FN (bb, fn) + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { + gimple *stmt = gsi_stmt (si); + analyse_stmt (stmt); + } + pop_cfun (); +} + +/* Walk cgraph nodes and collect memory references info. */ + +static void +collect_memory_references () +{ + struct cgraph_node *n; + /* TODO: collect info only for loops and functions in loops. */ + FOR_EACH_DEFINED_FUNCTION (n) + if (nl_map->count (n) != 0 && n->has_gimple_body_p ()) + collect_memrefs_for_cgraph_node (n); + + if (dump_file) + { + fprintf (dump_file, "\n\nDump mem references:\n"); + FOR_EACH_DEFINED_FUNCTION (n) + if (nl_map->count (n) != 0 && n->has_gimple_body_p ()) + { + function *fn = DECL_STRUCT_FUNCTION (n->decl); + fprintf (dump_file, "\nIn function %s (%s):\n", function_name (fn), + nl_map->count (n) != 0 ? "in loop" : ""); + for (memref_set::const_iterator it = (*fmrs_map)[fn]->begin (); + it != (*fmrs_map)[fn]->end (); it++) + print_memref (*it); + } + } +} + +/* Analysis of loops. */ + +memref_set *current_incr_mrs; +memref_set *current_indirect_mrs; + +static void +collect_memref (memref_t *mr, class loop *loop, bool check_loop) +{ + gimple *stmt = mr->stmts[0]; + gcc_assert (stmt); + if (check_loop && !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) + return; + + /* TODO: Improve base invariant analysis for memrefs which are not local + (located in called functions). */ + bool is_base_inv = false; + if (mr->base) + is_base_inv = expr_invariant_in_loop_p (loop, mr->base); + + if (dump_file && (mr->type == MR_INDIRECT || mr->is_incr)) + { + fprintf (dump_file, "%s MR (%d): ", mr->is_incr ? "INCR" : "INDIRECT", + mr->mr_id); + print_generic_expr (dump_file, mr->mem); + fprintf (dump_file, "\twith base: "); + if (mr->base) + print_generic_expr (dump_file, mr->base); + else + fprintf (dump_file, "null"); + fprintf (dump_file, " (is_inv=%d)\n", is_base_inv); + } + + if (!is_base_inv) + return; + if (mr->type == MR_INDIRECT) + current_indirect_mrs->insert (mr); + if (mr->is_incr) + current_incr_mrs->insert (mr); +} + +static void +analyse_callable_function (struct cgraph_node *n, class loop *loop) +{ + if (dump_file) + fprintf (dump_file, "Callable (%s):\n", n->dump_name ()); + + function *fn = DECL_STRUCT_FUNCTION (n->decl); + if (fmrs_map->count (fn)) + for (memref_set::const_iterator it = (*fmrs_map)[fn]->begin (); + it != (*fmrs_map)[fn]->end (); it++) + collect_memref (*it, loop, false); +} + +static void +insert_node_with_callable_nodes (node_set &s, struct cgraph_node *n) +{ + s.insert (n); + if (nn_map->count (n) == 0) + return; + node_set *set = (*nn_map)[n]; + for (node_set::const_iterator it = set->begin (); it != set->end (); it++) + s.insert ((*it)); +} + +static bool +compatible_memrefs_p (memref_t *mr1, memref_t *mr2, bool &compatible_offset) +{ + if (!mr1->base || !mr2->base || !mr2->offset) + return false; + tree base_type1 = TYPE_MAIN_VARIANT (TREE_TYPE (mr1->base)); + tree base_type2 = TYPE_MAIN_VARIANT (TREE_TYPE (mr2->base)); + if (base_type1 != base_type2) + return false; + if (mr1->offset && mr1->offset == mr2->offset) + compatible_offset = true; + else + compatible_offset = false; + return true; +} + +static void +compare_memrefs (memref_t* mr, memref_t* mr2) +{ + /* TODO: improve analysis of memrefs from different functions: take into + account data flow and context. */ + bool compatible_offset = false; + if (!compatible_memrefs_p (mr, mr2, compatible_offset)) + return; + if (!compatible_offset) + { + for (memref_set::const_iterator it = mr->used_mrs.begin (); + it != mr->used_mrs.end (); it++) + if ((*it)->offset && (*it)->offset == mr2->offset) + { + compatible_offset = true; + if (dump_file) + fprintf (dump_file, "Used MR (%d) and INC MR have " + "the same offset\n", (*it)->mr_id); + break; + } + } + if (!compatible_offset) + return; + if (dump_file) + { + fprintf (dump_file, "MR (%d) is optimization candidate with offset: ", + mr->mr_id); + print_generic_expr (dump_file, mr2->offset); + fprintf (dump_file, "\n"); + } + + if (!mr_candidate_map->count (mr)) + { + (*mr_candidate_map)[mr] = mr2; + return; + } + /* TODO: support analysis with incrementation of different fields. */ + if ((*mr_candidate_map)[mr]->offset != mr2->offset) + { + if (dump_file) + { + fprintf (dump_file, "It conflicts with previously found MR (%d) " + "with offset ", (*mr_candidate_map)[mr]->mr_id); + if ((*mr_candidate_map)[mr] != NULL) + print_generic_expr (dump_file, (*mr_candidate_map)[mr]->offset); + fprintf (dump_file, ", disable the optimization\n"); + } + (*mr_candidate_map)[mr] = NULL; + } +} + +/* In the given loop and all functions called from the loop, collect + indirect/incremental memrefs with invariant base address and inductive + offset. */ + +static void +collect_memrefs_for_loop (class loop *loop, struct cgraph_node *n, + function *fn) +{ + current_incr_mrs = new memref_set; + current_indirect_mrs = new memref_set; + + if (dump_file) + fprintf (dump_file, "Loop %d\n", loop->num); + if (fmrs_map->count (fn)) + for (memref_set::const_iterator it = (*fmrs_map)[fn]->begin (); + it != (*fmrs_map)[fn]->end (); it++) + collect_memref (*it, loop, true); + + /* Collect vector of functions called in the loop. */ + node_set set; + struct cgraph_edge *e; + struct cgraph_node *n2; + for (e = n->callees; e; e = e->next_callee) + { + gcall *stmt = e->call_stmt; + if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) + continue; + insert_node_with_callable_nodes (set, e->callee); + } + for (e = n->indirect_calls; e; e = e->next_callee) + { + gcall *stmt = e->call_stmt; + if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)) + || !e->indirect_info->targets) + continue; + for (unsigned i = 0; e->indirect_info->targets->iterate (i, &n2); ++i) + insert_node_with_callable_nodes (set, n2); + } + if (set.empty ()) + return; + if (dump_file) + fprintf (dump_file, "Go inside all callables of %s\n", n->dump_name ()); + + for (node_set::const_iterator it = set.begin (); it != set.end (); it++) + analyse_callable_function (*it, loop); + + if (!current_incr_mrs->empty () && !current_indirect_mrs->empty ()) + { + if (dump_file) + { + fprintf (dump_file, "Loop has both incr and indirect memrefs\n" + "Incr: "); + for (memref_set::const_iterator it = current_incr_mrs->begin (); + it != current_incr_mrs->end (); it++) + fprintf (dump_file, "%d ", (*it)->mr_id); + fprintf (dump_file, "\nIndirect: "); + for (memref_set::const_iterator it = current_indirect_mrs->begin (); + it != current_indirect_mrs->end (); it++) + fprintf (dump_file, "%d ", (*it)->mr_id); + fprintf (dump_file, "\n"); + } + /* Check if indirect memref has a base address similar to one of + incremental memref. */ + for (memref_set::const_iterator it = current_indirect_mrs->begin (); + it != current_indirect_mrs->end (); it++) + for (memref_set::const_iterator it2 = current_incr_mrs->begin (); + it2 != current_incr_mrs->end (); it2++) + compare_memrefs (*it, *it2); + } + + delete current_incr_mrs; + delete current_indirect_mrs; +} + +static void +analyse_loops_in_cgraph_node (struct cgraph_node *n) +{ + if (dump_file) + fprintf (dump_file, "\nAnalyse loops in %s\n", n->dump_name ()); + + n->get_body (); + function *fn = DECL_STRUCT_FUNCTION (n->decl); + gcc_assert (fn && n->has_gimple_body_p ()); + + push_cfun (fn); + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (LOOPS_NORMAL); + + for (auto loop : loops_list (cfun, 0)) + { + class loop *outer = loop_outer (loop); + /* Walk only outermost loops. */ + if (outer->num != 0) + continue; + collect_memrefs_for_loop (loop, n, fn); + } + + free_dominance_info (CDI_DOMINATORS); + loop_optimizer_finalize (); + pop_cfun (); +} + +static void +analyse_loops () +{ + if (dump_file) + fprintf (dump_file, "\n\nLoops: procesing functions\n"); + cgraph_node *n; + FOR_EACH_DEFINED_FUNCTION (n) + { + if (!can_be_optimized (n)) + { + if (dump_file) + fprintf (dump_file, "Skip the function\n"); + continue; + } + analyse_loops_in_cgraph_node (n); + } + + if (dump_file) + fprintf (dump_file, "\n\nList of optimization candidates:\n"); + + FOR_EACH_DEFINED_FUNCTION (n) + { + function *fn = DECL_STRUCT_FUNCTION (n->decl); + if (!can_be_optimized (n) || !fmrs_map->count (fn)) + continue; + for (memref_map::iterator it = mr_candidate_map->begin (); + it != mr_candidate_map->end (); ++it) + { + memref_t *mr = it->first, *mr2 = it->second; + if (mr2 == NULL || !(*fmrs_map)[fn]->count (mr)) + continue; + if (!optimize_mrs_map->count (fn)) + (*optimize_mrs_map)[fn] = new memref_set; + (*optimize_mrs_map)[fn]->insert (mr); + } + if (dump_file && optimize_mrs_map->count (fn)) + { + fprintf (dump_file, "Function %s\n", n->dump_name ()); + for (memref_set::const_iterator it + = (*optimize_mrs_map)[fn]->begin (); + it != (*optimize_mrs_map)[fn]->end (); it++) + { + memref_t *mr = *it, *mr2 = (*mr_candidate_map)[mr]; + fprintf (dump_file, "MRs %d,%d with incremental offset ", + mr->mr_id, mr2->mr_id); + print_generic_expr (dump_file, mr2->offset); + fprintf (dump_file, "\n"); + } + } + } +} + +/* Reduce the set filtering out memrefs with the same memory references, + return the result vector of memrefs. */ + +static void +reduce_memref_set (memref_set *set, vec<memref_t *> &vec) +{ + for (memref_set::const_iterator it = set->begin (); + it != set->end (); it++) + { + memref_t *mr1 = *it; + if (!vec.length ()) + vec.safe_push (mr1); + else + { + bool inserted = false; + for (unsigned int i = 0; i < vec.length (); i++) + { + /* mr2 is less than current mr1. */ + memref_t *mr2 = vec[i]; + if (operand_equal_p (mr1->mem, mr2->mem)) + { + if (dump_file) + fprintf (dump_file, "The same mems in MRs %d and %d\n", + mr1->mr_id, mr2->mr_id); + /* TODO: maybe build new memref which include stmts of both + mr1 and mr2. */ + if ((mr1->is_store && !mr2->is_store) + || mr1->stmts.length () > mr2->stmts.length ()) + { + inserted = true; + vec[i] = mr1; + } + } + } + if (!inserted) + vec.safe_push (mr1); + } + } + if (dump_file) + { + fprintf (dump_file, "MRs (%d) after filtering: ", vec.length ()); + for (unsigned int i = 0; i < vec.length (); i++) + fprintf (dump_file, "%d ", vec[i]->mr_id); + fprintf (dump_file, "\n"); + } +} + +static void +find_nearest_common_dominator (memref_t *mr, basic_block &dom) +{ + for (unsigned int i = 0; i < mr->stmts.length (); i++) + { + basic_block bb = gimple_bb (mr->stmts[i]); + gcc_assert (bb); + if (dom == bb) + continue; + if (dom) + dom = nearest_common_dominator (CDI_DOMINATORS, dom, bb); + else + dom = bb; + } +} + +/* Return true if DECL is a parameter or a SSA_NAME for a parameter. + TODO: from gcc/tree-inline.c, maybe make it global. */ + +static bool +is_parm (tree decl) +{ + if (TREE_CODE (decl) == SSA_NAME) + { + decl = SSA_NAME_VAR (decl); + if (!decl) + return false; + } + + return (TREE_CODE (decl) == PARM_DECL); +} + +/* TODO: the following functions are inspired by remap in gcc/tree-inline.c, + maybe we can share some functionality. */ + +static tree +remap_name (tree name, gimple *stmt, bool is_lhs) +{ + tree new_tree = NULL_TREE; + if (decl_map->count (name)) + { + if (dump_file) + { + fprintf (dump_file, "Find map: "); + print_generic_expr (dump_file, name); + fprintf (dump_file, " "); + print_generic_expr (dump_file, (*decl_map)[name]); + fprintf (dump_file, "\n"); + } + return unshare_expr ((*decl_map)[name]); + } + if (!is_lhs) + return name; + if (TREE_CODE (name) == SSA_NAME) + { + /* Remap anonymous SSA names or SSA names of anonymous decls. */ + tree var = SSA_NAME_VAR (name); + if (!var + || (!SSA_NAME_IS_DEFAULT_DEF (name) + && VAR_P (var) && !VAR_DECL_IS_VIRTUAL_OPERAND (var) + && DECL_ARTIFICIAL (var) && DECL_IGNORED_P (var) + && !DECL_NAME (var))) + { + new_tree = make_ssa_name (TREE_TYPE (name), stmt); + if (!var && SSA_NAME_IDENTIFIER (name)) + SET_SSA_NAME_VAR_OR_IDENTIFIER (new_tree, + SSA_NAME_IDENTIFIER (name)); + SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree) + = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name); + /* So can range-info. */ + if (!POINTER_TYPE_P (TREE_TYPE (name)) + && SSA_NAME_RANGE_INFO (name)) + duplicate_ssa_name_range_info (new_tree, + SSA_NAME_RANGE_TYPE (name), + SSA_NAME_RANGE_INFO (name)); + /* TODO: maybe correct the insertion. */ + (*decl_map)[name] = new_tree; + if (dump_file) + { + fprintf (dump_file, "New map (no var): "); + print_generic_expr (dump_file, name); + fprintf (dump_file, " "); + print_generic_expr (dump_file, new_tree); + fprintf (dump_file, "\n"); + } + return new_tree; + } + /* TODO: maybe remap_name or do the same as before for SSA_NAME_VAR. */ + new_tree = make_ssa_name (TREE_TYPE (name), stmt); + (*decl_map)[name] = new_tree; + if (dump_file) + { + fprintf (dump_file, "New map: "); + print_generic_expr (dump_file, name); + fprintf (dump_file, " "); + print_generic_expr (dump_file, new_tree); + fprintf (dump_file, "\n"); + } + } + else if (VAR_P (name) || TREE_CODE (name) == PARM_DECL) + { + if (dump_file) + { + fprintf (dump_file, "VAR/PARM: "); + print_generic_expr (dump_file, name); + fprintf (dump_file, "\n"); + } + return name; + } + else + { + if (dump_file) + { + fprintf (dump_file, "Unsupported: "); + print_generic_expr (dump_file, name); + fprintf (dump_file, "\n"); + } + //gcc_unreachable (); + return name; + } + return new_tree; +} + +/* Passed to walk_tree. Copies the node pointed to, if appropriate. */ + +static tree +ipa_copy_tree_r (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) +{ + enum tree_code code = TREE_CODE (*tp); + enum tree_code_class cl = TREE_CODE_CLASS (code); + + /* We make copies of most nodes. */ + if (IS_EXPR_CODE_CLASS (cl) + || code == TREE_LIST + || code == TREE_VEC + || code == TYPE_DECL + || code == OMP_CLAUSE) + { + /* Because the chain gets clobbered when we make a copy, we save it + here. */ + tree chain = NULL_TREE, new_tree; + + if (CODE_CONTAINS_STRUCT (code, TS_COMMON)) + chain = TREE_CHAIN (*tp); + + /* Copy the node. */ + new_tree = copy_node (*tp); + + *tp = new_tree; + + /* Now, restore the chain, if appropriate. That will cause + walk_tree to walk into the chain as well. */ + if (code == PARM_DECL + || code == TREE_LIST + || code == OMP_CLAUSE) + TREE_CHAIN (*tp) = chain; + + /* For now, we don't update BLOCKs when we make copies. So, we + have to nullify all BIND_EXPRs. */ + if (TREE_CODE (*tp) == BIND_EXPR) + BIND_EXPR_BLOCK (*tp) = NULL_TREE; + } + else if (code == CONSTRUCTOR || code == STATEMENT_LIST) + gcc_unreachable (); + else if (TREE_CODE_CLASS (code) == tcc_type + || TREE_CODE_CLASS (code) == tcc_declaration + || TREE_CODE_CLASS (code) == tcc_constant) + *walk_subtrees = 0; + return NULL_TREE; +} + +/* Remap the GIMPLE operand pointed to by *TP. DATA is really a + 'struct walk_stmt_info *'. DATA->INFO is a 'gimple *'. + WALK_SUBTREES is used to indicate walk_gimple_op whether to keep + recursing into the children nodes of *TP. */ + +static tree +remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data) +{ + struct walk_stmt_info *wi_p = (struct walk_stmt_info *) data; + gimple *stmt = (gimple *) wi_p->info; + + /* For recursive invocations this is no longer the LHS itself. */ + bool is_lhs = wi_p->is_lhs; + wi_p->is_lhs = false; + + if (TREE_CODE (*tp) == SSA_NAME) + { + *tp = remap_name (*tp, stmt, is_lhs); + *walk_subtrees = 0; + if (is_lhs) + SSA_NAME_DEF_STMT (*tp) = wi_p->stmt; + return NULL; + } + else if (auto_var_in_fn_p (*tp, cfun->decl)) + { + /* Local variables and labels need to be replaced by equivalent + variables. We don't want to copy static variables; there's + only one of those, no matter how many times we inline the + containing function. Similarly for globals from an outer + function. */ + tree new_decl; + + /* Remap the declaration. */ + new_decl = remap_name (*tp, stmt, is_lhs); + gcc_assert (new_decl); + /* Replace this variable with the copy. */ + STRIP_TYPE_NOPS (new_decl); + /* ??? The C++ frontend uses void * pointer zero to initialize + any other type. This confuses the middle-end type verification. + As cloned bodies do not go through gimplification again the fixup + there doesn't trigger. */ + if (TREE_CODE (new_decl) == INTEGER_CST + && !useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (new_decl))) + new_decl = fold_convert (TREE_TYPE (*tp), new_decl); + *tp = new_decl; + *walk_subtrees = 0; + } + else if (TREE_CODE (*tp) == STATEMENT_LIST || TREE_CODE (*tp) == SAVE_EXPR) + { + if (dump_file) + { + fprintf (dump_file, "Unexpected tree: "); + print_generic_expr (dump_file, *tp); + fprintf (dump_file, "\n"); + } + gcc_unreachable (); + } + else + { + /* Otherwise, just copy the node. Note that copy_tree_r already + knows not to copy VAR_DECLs, etc., so this is safe. */ + + if (TREE_CODE (*tp) == MEM_REF) + { + /* We need to re-canonicalize MEM_REFs from inline substitutions + that can happen when a pointer argument is an ADDR_EXPR. + Recurse here manually to allow that. */ + tree ptr = TREE_OPERAND (*tp, 0); + tree type = TREE_TYPE (*tp); + tree old = *tp; + walk_tree (&ptr, remap_gimple_op_r, data, NULL); + *tp = fold_build2 (MEM_REF, type, ptr, TREE_OPERAND (*tp, 1)); + TREE_THIS_VOLATILE (*tp) = TREE_THIS_VOLATILE (old); + TREE_SIDE_EFFECTS (*tp) = TREE_SIDE_EFFECTS (old); + TREE_NO_WARNING (*tp) = TREE_NO_WARNING (old); + /* TODO: maybe support this case. */ + gcc_assert (MR_DEPENDENCE_CLIQUE (old) == 0); + /* We cannot propagate the TREE_THIS_NOTRAP flag if we have + remapped a parameter as the property might be valid only + for the parameter itself. */ + if (TREE_THIS_NOTRAP (old) && (!is_parm (TREE_OPERAND (old, 0)))) + TREE_THIS_NOTRAP (*tp) = 1; + REF_REVERSE_STORAGE_ORDER (*tp) = REF_REVERSE_STORAGE_ORDER (old); + *walk_subtrees = 0; + return NULL; + } + + /* Here is the "usual case". Copy this tree node, and then + tweak some special cases. */ + ipa_copy_tree_r (tp, walk_subtrees, NULL); + gcc_assert (!(TREE_CODE (*tp) == TARGET_EXPR && TREE_OPERAND (*tp, 3))); + if (TREE_CODE (*tp) == ADDR_EXPR) + { + /* TODO: If this used to be invariant, but is not any longer, + then regimplification is probably needed. */ + walk_tree (&TREE_OPERAND (*tp, 0), remap_gimple_op_r, data, NULL); + recompute_tree_invariant_for_addr_expr (*tp); + *walk_subtrees = 0; + } + } + /* TODO: maybe we need to update TREE_BLOCK (*tp). */ + + /* Keep iterating. */ + return NULL_TREE; +} + +static void +create_cgraph_edge (cgraph_node *n, gimple *stmt) +{ + gcall *call_stmt = dyn_cast <gcall *> (stmt); + basic_block bb = gimple_bb (stmt); + tree decl = gimple_call_fndecl (call_stmt); + if (!decl) + return; + struct cgraph_edge *e = n->create_edge (cgraph_node::get_create (decl), + call_stmt, bb->count); + /* TODO: maybe we need to store ipa_call_summary result. */ + ipa_call_summaries->get_create (e); +} + +/* Insert prefetch intrinsics in this function, return nonzero on success. */ + +static int +optimize_function (cgraph_node *n, function *fn) +{ + /* In a given function, optimize only indirect memrefs with + the same incremental memref. + TODO: implement the optimization for other cases. */ + bool different_incrementals = false; + memref_t *first_mr = NULL; + memref_set used_mrs; + for (memref_set::const_iterator it = (*optimize_mrs_map)[fn]->begin (); + it != (*optimize_mrs_map)[fn]->end (); it++) + { + memref_t *mr = *it; + if (!first_mr) + first_mr = mr; + else if ((*mr_candidate_map)[first_mr] != (*mr_candidate_map)[mr]) + { + different_incrementals = true; + break; + } + for (memref_set::const_iterator it2 = mr->used_mrs.begin (); + it2 != mr->used_mrs.end (); it2++) + used_mrs.insert (*it2); + } + if (different_incrementals) + { + if (dump_file) + fprintf (dump_file, "It contains memrefs with different " + "incrementals. Skip the case.\n"); + return 0; + } + memref_t *inc_mr = (*mr_candidate_map)[first_mr]; + if (!inc_mr->stmts[0] || !gimple_assign_single_p (inc_mr->stmts[0])) + { + if (dump_file) + fprintf (dump_file, "Incremental MR with unexpected stmt. " + "Skip the case.\n"); + return 0; + } + if (dump_file && !used_mrs.empty ()) + print_mrs_ids (used_mrs, "Common list of used mrs:\n"); + + /* Find a memref in used mrs which corresponds to the found incremental + memref. */ + memref_t *comp_mr = NULL; + for (memref_set::const_iterator it = used_mrs.begin (); + it != used_mrs.end (); it++) + { + bool c_offset; + if ((*it)->type != MR_SIMPLE || inc_mr->type != MR_SIMPLE + || !compatible_memrefs_p (*it, inc_mr, c_offset)) + continue; + if (c_offset) + { + if (dump_file) + fprintf (dump_file, "Found compatible used MR (%d) and " + "incr MR (%d)\n", (*it)->mr_id, inc_mr->mr_id); + comp_mr = (*it); + } + } + if (!comp_mr || !comp_mr->stmts[0] + || !gimple_assign_single_p (comp_mr->stmts[0])) + { + if (dump_file) + fprintf (dump_file, "Compatible MR in this function is not found " + " or it has unexpected stmt. Skip the case.\n"); + return 0; + } + + /* Filter out memrefs with the same memory references. + TODO: maybe do the same with used mrs. */ + vec<memref_t *> vmrs = vNULL; + reduce_memref_set ((*optimize_mrs_map)[fn], vmrs); + + /* Find insertion place. Create new BB. */ + /* TODO: maybe it is useful to process also used_mrs. */ + basic_block dom_bb = NULL; + for (unsigned int i = 0; i < vmrs.length (); i++) + find_nearest_common_dominator (vmrs[i], dom_bb); + + if (!dom_bb) + { + if (dump_file) + fprintf (dump_file, "Dominator bb for MRs is not found. " + "Skip the case.\n"); + return 0; + } + else if (dump_file) + fprintf (dump_file, "Dominator bb %d for MRs\n", dom_bb->index); + + split_block (dom_bb, (gimple *) NULL); + gimple_stmt_iterator gsi = gsi_last_bb (dom_bb); + + /* Create new inc var. Insert new_var = old_var + step * factor. */ + decl_map = new tree_map; + gcc_assert (comp_mr->stmts[0] && gimple_assign_single_p (comp_mr->stmts[0])); + tree inc_var = gimple_assign_lhs (comp_mr->stmts[0]); + gimple_seq stmts = NULL; + tree var_type = TREE_TYPE (inc_var); + enum tree_code inc_code; + if (TREE_CODE (var_type) == POINTER_TYPE) + inc_code = POINTER_PLUS_EXPR; + else + inc_code = PLUS_EXPR; + tree step = inc_mr->step; + unsigned dist_val = tree_to_uhwi (step) * param_ipa_prefetch_distance_factor; + tree dist = build_int_cst (TREE_TYPE (step), dist_val); + tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist); + (*decl_map)[inc_var] = new_inc_var; + + /* Create other new vars. Insert new stmts. */ + struct walk_stmt_info wi; + stmt_set processed_stmts; + memref_tree_map mr_new_trees; + for (memref_set::const_iterator it = used_mrs.begin (); + it != used_mrs.end (); it++) + { + memref_t *mr = *it; + gimple *last_stmt = NULL; + if (mr == comp_mr) + continue; + for (int i = mr->stmts.length () - 1; i >= 0 ; i--) + { + if (processed_stmts.count (mr->stmts[i])) + continue; + processed_stmts.insert (mr->stmts[i]); + if (dump_file) + { + fprintf (dump_file, "Copy stmt %d from used MR (%d):\n", + i, mr->mr_id); + print_gimple_stmt (dump_file, mr->stmts[i], 0); + } + /* Create a new copy of STMT and duplicate STMT's virtual + operands. */ + gimple *copy = gimple_copy (mr->stmts[i]); + gcc_checking_assert (!is_gimple_debug (copy)); + + /* Remap all the operands in COPY. */ + memset (&wi, 0, sizeof (wi)); + last_stmt = copy; + wi.info = copy; + walk_gimple_op (copy, remap_gimple_op_r, &wi); + if (dump_file) + { + fprintf (dump_file, "Stmt %d after remap:\n",i); + print_gimple_stmt (dump_file, copy, 0); + } + gimple_seq_add_stmt (&stmts, copy); + } + gcc_assert (last_stmt); + mr_new_trees[mr] = gimple_assign_lhs (last_stmt); + if (dump_file) + { + fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id); + print_generic_expr (dump_file, gimple_assign_lhs (last_stmt)); + fprintf (dump_file, "\n"); + } + } + /* On new load check page fault. */ + /* Insert prefetch instructions. */ + if (dump_file) + fprintf (dump_file, "Evaluate addresses and insert prefetch insn.\n"); + + vec<gimple *> pcalls = vNULL; + tree local; + switch (param_ipa_prefetch_locality) + { + case 0: + local = integer_zero_node; + break; + case 1: + local = integer_one_node; + break; + case 2: + local = build_int_cst (integer_type_node, 2); + break; + default: + case 3: + local = integer_three_node; + break; + } + for (unsigned int j = 0; j < vmrs.length (); j++) + { + memref_t *mr = vmrs[j]; + /* Don't need to copy the last stmt, since we insert prefetch insn + instead of it. */ + for (int i = mr->stmts.length () - 1; i >= 1 ; i--) + { + if (processed_stmts.count (mr->stmts[i])) + continue; + processed_stmts.insert (mr->stmts[i]); + + gimple *copy = gimple_copy (mr->stmts[i]); + gcc_checking_assert (!is_gimple_debug (copy)); + + /* Remap all the operands in COPY. */ + memset (&wi, 0, sizeof (wi)); + wi.info = copy; + walk_gimple_op (copy, remap_gimple_op_r, &wi); + if (dump_file) + { + fprintf (dump_file, "Stmt %d after remap:\n",i); + print_gimple_stmt (dump_file, copy, 0); + } + gimple_seq_add_stmt (&stmts, copy); + } + gimple *last_stmt = mr->stmts[0]; + gcc_assert (last_stmt); + mr_new_trees[mr] = gimple_assign_lhs (last_stmt); + tree write_p = mr->is_store ? integer_one_node : integer_zero_node; + tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE); + if (decl_map->count (addr)) + addr = (*decl_map)[addr]; + last_stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH), + 3, addr, write_p, local); + pcalls.safe_push (last_stmt); + gimple_seq_add_stmt (&stmts, last_stmt); + } + + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); + delete decl_map; + + /* Modify cgraph inserting calls to prefetch intrinsics. */ + for (unsigned i = 0; i < pcalls.length (); i++) + create_cgraph_edge (n, pcalls[i]); + ipa_update_overall_fn_summary (n); + + return 1; +} + +static int +insert_prefetch () +{ + int res = 0; + cgraph_node *n; + FOR_EACH_DEFINED_FUNCTION (n) + { + function *fn = DECL_STRUCT_FUNCTION (n->decl); + if (!optimize_mrs_map->count (fn)) + continue; + if (dump_file) + fprintf (dump_file, "Optimize function %s\n", n->dump_name ()); + push_cfun (DECL_STRUCT_FUNCTION (n->decl)); + calculate_dominance_info (CDI_DOMINATORS); + res |= optimize_function (n, fn); + free_dominance_info (CDI_DOMINATORS); + pop_cfun (); + } + return res; +} + +static unsigned int +ipa_prefetch (void) +{ + if (!targetm.have_prefetch ()) + { + if (dump_file) + fprintf (dump_file, "Prefetch is not supported by the target.\n"); + return 0; + } + + unsigned int ret = 0; + el_map = new edge_in_loop; + nl_map = new node_in_loop; + icn_map = new node_to_iedge_map; + nn_map = new node_to_node_map; + tm_map = new tree_memref_map; + fmrs_map = new funct_mrs_map; + mr_candidate_map = new memref_map; + optimize_mrs_map = new funct_mrs_map; + + max_mr_id = 0; + /* TODO: check if we really need this init. */ + if (!builtin_decl_explicit_p (BUILT_IN_PREFETCH)) + { + tree type = build_function_type_list (void_type_node, + const_ptr_type_node, NULL_TREE); + tree decl = add_builtin_function ("__builtin_prefetch", type, + BUILT_IN_PREFETCH, BUILT_IN_NORMAL, + NULL, NULL_TREE); + DECL_IS_NOVOPS (decl) = true; + set_builtin_decl (BUILT_IN_PREFETCH, decl, false); + } + + analyse_cgraph (); + prepare_indirect_call_info (); + propagate_loop_info_in_cgraph (); + collect_memory_references (); + analyse_loops (); + + /* TODO: implement some specific heuristics. */ + if (!optimize_mrs_map->empty ()) + ret = insert_prefetch (); + + delete el_map; + delete nl_map; + for (node_to_iedge_map::iterator it = icn_map->begin (); + it != icn_map->end (); ++it) + delete it->second; + delete icn_map; + for (node_to_node_map::iterator it = nn_map->begin (); + it != nn_map->end (); ++it) + delete it->second; + delete nn_map; + for (tree_memref_map::iterator it = tm_map->begin (); + it != tm_map->end (); ++it) + delete it->second; + delete tm_map; + for (funct_mrs_map::iterator it = fmrs_map->begin (); + it != fmrs_map->end (); ++it) + delete it->second; + delete fmrs_map; + delete mr_candidate_map; + delete optimize_mrs_map; + + /* TODO: maybe add other todos. */ + return ret | TODO_verify_all; +} + +const pass_data pass_data_ipa_prefetch = +{ + SIMPLE_IPA_PASS, // type + "ipa_prefetch", // name + OPTGROUP_NONE, // optinfo_flags + TV_IPA_PREFETCH, // tv_id + 0, // properties_required + 0, // properties_provided + 0, // properties_destroyed + 0, // todo_flags_start + 0, // todo_flags_finish +}; + +class pass_ipa_prefetch : public simple_ipa_opt_pass +{ +public: + pass_ipa_prefetch (gcc::context *ctxt) + : simple_ipa_opt_pass (pass_data_ipa_prefetch, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *); + virtual unsigned int execute (function *) + { + return ipa_prefetch (); + } +}; // class pass_ipa_prefetch + +bool +pass_ipa_prefetch::gate (function *) +{ + return (optimize >= 3 + && flag_ipa_prefetch + /* Don't bother doing anything if the program has errors. */ + && !seen_error () + && flag_lto_partition == LTO_PARTITION_ONE + /* Only enable struct optimizations in lto or whole_program. */ + && (in_lto_p || flag_whole_program)); +} + +} // anon namespace + +simple_ipa_opt_pass * +make_pass_ipa_prefetch (gcc::context *ctxt) +{ + return new pass_ipa_prefetch (ctxt); +} diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc index 261a72085..5355cf2f4 100644 --- a/gcc/ipa-sra.cc +++ b/gcc/ipa-sra.cc @@ -3033,6 +3033,14 @@ process_edge_to_unknown_caller (cgraph_edge *cs) gcc_checking_assert (from_ifs); isra_call_summary *csum = call_sums->get (cs); + /* TODO: implement better support for call edges inserted after summary + collection but before sra wpa invocation. */ + if (!csum) + { + csum = call_sums->get_create (cs); + csum->m_return_ignored = true; + } + if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n", cs->caller->dump_name ()); diff --git a/gcc/params.opt b/gcc/params.opt index 7e5c119cf..5c07e3986 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -309,6 +309,14 @@ Maximum pieces that IPA-SRA tracks per formal parameter, as a consequence, also Common Joined UInteger Var(param_ipa_sra_ptr_growth_factor) Init(2) Param Optimization Maximum allowed growth of number and total size of new parameters that ipa-sra replaces a pointer to an aggregate with. +-param=ipa-prefetch-distance-factor= +Common Joined UInteger Var(param_ipa_prefetch_distance_factor) Init(4) Param Optimization +The factor represents the number of inductive variable incrementations to evaluate an indirect memory address for IPA prefetch. + +-param=ipa-prefetch-locality= +Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) Param Optimization +The flag represents temporal locality values in the following way: 0:pstl1strm, 1:pstl3keep, 2:pstl2keep, 3:pstl1keep. + -param=ira-loop-reserved-regs= Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization The number of registers in each class kept unused by loop invariant motion. diff --git a/gcc/passes.def b/gcc/passes.def index b7d4f7b4e..4c1436766 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -158,6 +158,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_icf); NEXT_PASS (pass_ipa_devirt); NEXT_PASS (pass_ipa_icp); + NEXT_PASS (pass_ipa_prefetch); NEXT_PASS (pass_ipa_cp); NEXT_PASS (pass_ipa_sra); NEXT_PASS (pass_ipa_cdtor_merge); diff --git a/gcc/timevar.def b/gcc/timevar.def index 18a9f62cc..810ae20fd 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") +DEFTIMEVAR (TV_IPA_PREFETCH , "ipa prefetch") DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 1733931c3..63f1192ae 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -529,6 +529,7 @@ extern ipa_opt_pass_d *make_pass_ipa_icp (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); +extern simple_ipa_opt_pass *make_pass_ipa_prefetch (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2