Projects
openEuler:24.03:SP1:Everything
gcc
_service:tar_scm:0309-CSPGO-Add-context-sensiti...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0309-CSPGO-Add-context-sensitive-PGO.patch of Package gcc
From 45a424e51c4c5de46062f2d7f355da8a99604d71 Mon Sep 17 00:00:00 2001 From: liyancheng <412998149@qq.com> Date: Tue, 19 Nov 2024 22:06:48 +0800 Subject: [PATCH] [CSPGO] Add context sensitive PGO Same as LLVM, GCC PGO profile counts are not context sensitive. Therefore, CSPGO is added to collect the profile again after PGO to obtain accurate execution information after inline for better performance. --- gcc/auto-profile.cc | 2 +- gcc/cgraph.cc | 47 ++++++++++ gcc/cgraph.h | 8 +- gcc/cgraphunit.cc | 63 ++++++++++++- gcc/common.opt | 20 ++++ gcc/coverage.cc | 162 +++++++++++++++++++++++--------- gcc/coverage.h | 5 +- gcc/gcc.cc | 5 +- gcc/ipa-profile.cc | 4 +- gcc/lto-cgraph.cc | 7 ++ gcc/opts.cc | 18 ++++ gcc/passes.cc | 71 ++++++++++++++ gcc/passes.def | 1 + gcc/profile.cc | 27 +++--- gcc/profile.h | 2 +- gcc/timevar.def | 1 + gcc/tree-pass.h | 2 + gcc/tree-profile.cc | 223 +++++++++++++++++++++++++++++++++++++++++++- gcc/value-prof.cc | 9 +- gcc/value-prof.h | 4 +- 20 files changed, 611 insertions(+), 70 deletions(-) diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index f45f0ec66..5e85381ce 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -1775,7 +1775,7 @@ auto_profile (void) if (symtab->state == FINISHED) return 0; - init_node_map (true); + init_node_map (true, false); profile_info = autofdo::afdo_profile_info; FOR_EACH_FUNCTION (node) diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc index 7d738b891..95619aefa 100644 --- a/gcc/cgraph.cc +++ b/gcc/cgraph.cc @@ -4076,6 +4076,53 @@ cgraph_node::get_body (void) return updated; } +/* Prepare function body. When doing LTO, read cgraph_node's body from disk + if it is not already present. When some IPA transformations are scheduled, + apply them. + Flag is used to control only skipping or enabling cspgo. */ + +bool +cgraph_node::ipa_transform_for_cspgo (bool is_cspgo) +{ + bool updated; + + bitmap_obstack_initialize (NULL); + updated = get_untransformed_body (); + + /* Getting transformed body makes no sense for inline clones; + we should never use this on real clones because they are materialized + early. + TODO: Materializing clones here will likely lead to smaller LTRANS + footprint. */ + gcc_assert (!inlined_to && !clone_of); + if (ipa_transforms_to_apply.exists ()) + { + opt_pass *saved_current_pass = current_pass; + FILE *saved_dump_file = dump_file; + const char *saved_dump_file_name = dump_file_name; + dump_flags_t saved_dump_flags = dump_flags; + dump_file_name = NULL; + set_dump_file (NULL); + + push_cfun (DECL_STRUCT_FUNCTION (decl)); + + update_ssa (TODO_update_ssa_only_virtuals); + execute_all_ipa_transforms_for_cspgo (is_cspgo); + cgraph_edge::rebuild_edges (); + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + pop_cfun (); + updated = true; + + current_pass = saved_current_pass; + set_dump_file (saved_dump_file); + dump_file_name = saved_dump_file_name; + dump_flags = saved_dump_flags; + } + bitmap_obstack_release (NULL); + return updated; +} + /* Return the DECL_STRUCT_FUNCTION of the function. */ struct function * diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 2332539e5..3fdf36769 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1097,11 +1097,17 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node present. */ bool get_untransformed_body (); - /* Prepare function body. When doing LTO, read cgraph_node's body from disk + /* Prepare function body. When doing LTO, read cgraph_node's body from disk if it is not already present. When some IPA transformations are scheduled, apply them. */ bool get_body (); + /* Prepare function body. When doing LTO, read cgraph_node's body from disk + if it is not already present. When some IPA transformations are scheduled, + apply them. + Flag is used to control only skipping or enabling cspgo. */ + bool ipa_transform_for_cspgo (bool); + void materialize_clone (void); /* Release memory used to represent body of function. diff --git a/gcc/cgraphunit.cc b/gcc/cgraphunit.cc index 5aa7b57c9..37cc83eab 100644 --- a/gcc/cgraphunit.cc +++ b/gcc/cgraphunit.cc @@ -208,6 +208,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-inline.h" #include "omp-offload.h" #include "symtab-thunks.h" +#include "profile.h" // for del_node_map /* Queue of cgraph nodes scheduled to be added into cgraph. This is a secondary queue used during optimization to accommodate passes that @@ -1928,6 +1929,29 @@ tp_first_run_node_cmp (const void *pa, const void *pb) return tp_first_run_a - tp_first_run_b; } +static bool +expand_node_with_cspgo (cgraph_node *node, bool is_cspgo) +{ + gcc_assert (node); + /* Nodes in other partition, inline to, and clone of are not + interesting in cspgo. */ + if (!node->has_gimple_body_p () + || node->in_other_partition + || node->inlined_to + || node->clone_of) + { + if (dump_file) + fprintf (dump_file, "[cspgo] node %s will not do" + " transform\n", node->dump_name ()); + return false; + } + + if (node->process) + node->ipa_transform_for_cspgo (is_cspgo); + return true; +} + + /* Expand all functions that must be output. Attempt to topologically sort the nodes so function is output when @@ -1968,6 +1992,39 @@ expand_all_functions (void) /* First output functions with time profile in specified order. */ qsort (tp_first_run_order, tp_first_run_order_pos, sizeof (cgraph_node *), tp_first_run_node_cmp); + + if (flag_csprofile_generate || flag_csprofile_use) + { + bool is_cspgo = false; + + /* We need to execute loop twice. The first performs all transforms + except cspgo, and the second performs cspgo transform. */ + for (int idx = 0; idx < 2; idx++) + { + for (i = 0; i < tp_first_run_order_pos; i++) + { + node = tp_first_run_order[i]; + if (!expand_node_with_cspgo (node, is_cspgo)) + continue; + } + + for (i = new_order_pos - 1; i >= 0; i--) + { + node = order[i]; + if (!expand_node_with_cspgo (node, is_cspgo)) + continue; + } + + is_cspgo = true; + } + + if (flag_csprofile_use) + handle_missing_profiles (); + + if (coverage_node_map_initialized_p ()) + del_node_map (); + } + for (i = 0; i < tp_first_run_order_pos; i++) { node = tp_first_run_order[i]; @@ -2009,6 +2066,10 @@ expand_all_functions (void) fprintf (symtab->dump_file, "Expanded functions with time profile:%u/%u\n", profiled_func_count, expanded_func_count); + /* Generate coverage variables and constructor for cspgo. */ + if (flag_csprofile_generate) + coverage_finish (true); + symtab->process_new_functions (); free_gimplify_stack (); delete ipa_saved_clone_sources; @@ -2176,7 +2237,7 @@ ipa_passes (void) if (!in_lto_p) { /* Generate coverage variables and constructors. */ - coverage_finish (); + coverage_finish (false); /* Process new functions added. */ set_cfun (NULL); diff --git a/gcc/common.opt b/gcc/common.opt index be5fcc681..fc2920cee 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2397,6 +2397,10 @@ Common Joined RejectNegative Var(profile_data_prefix) Set the top-level directory for storing the profile data. The default is 'pwd'. +fcfgo-csprofile-dir= +Common Joined RejectNegative Var(csprofile_data_prefix) +Set the top-level directory for storing the cs profile data. + fprofile-note= Common Joined RejectNegative Var(profile_note_location) Select the name for storing the profile note file. @@ -2461,6 +2465,14 @@ fprofile-generate= Common Joined RejectNegative Enable common options for generating profile info for profile feedback directed optimizations, and set -fprofile-dir=. +fcfgo-csprofile-generate +Common Var(flag_csprofile_generate) +Enable common options for generating context sensitive profile info for profile feedback directed optimizations. + +fcfgo-csprofile-generate= +Common Joined RejectNegative +Enable common options for generating context sensitive profile info for profile feedback directed optimizations, and set -fcfgo-csprofile-dir=. + fkernel-pgo Common Var(flag_kernel_pgo) Optimization Init(0) Disable TLS setting of instrumentation variables to support PGO kernel compilation in -fprofile-generate, as kernel does not support TLS. @@ -2485,6 +2497,14 @@ fprofile-use= Common Joined RejectNegative Enable common options for performing profile feedback directed optimizations, and set -fprofile-dir=. +fcfgo-csprofile-use +Common Var(flag_csprofile_use) +Enable common options for performing context sensitive profile feedback directed optimizations. + +fcfgo-csprofile-use= +Common Joined RejectNegative +Enable common options for performing context sensitive profile feedback directed optimizations, and set -fcfgo-csprofile-dir=. + fprofile-values Common Var(flag_profile_values) Insert code to profile values of expressions. diff --git a/gcc/coverage.cc b/gcc/coverage.cc index 8ece5db68..1a7fcb5df 100644 --- a/gcc/coverage.cc +++ b/gcc/coverage.cc @@ -86,7 +86,6 @@ struct counts_entry : pointer_hash <counts_entry> static GTY(()) struct coverage_data *functions_head = 0; static struct coverage_data **functions_tail = &functions_head; -static unsigned no_coverage = 0; /* Cumulative counter information for whole program. */ static unsigned prg_ctr_mask; /* Mask of counter types generated. */ @@ -114,6 +113,9 @@ static unsigned bbg_file_stamp; /* Name of the count data (gcda) file. */ static char *da_file_name; +/* Name of the cs count data (gcda) file. */ +static char *cs_da_file_name; + /* The names of merge functions for counters. */ #define STR(str) #str #define DEF_GCOV_COUNTER(COUNTER, NAME, FN_TYPE) STR(__gcov_merge ## FN_TYPE), @@ -173,23 +175,28 @@ counts_entry::remove (counts_entry *entry) /* Hash table of count data. */ static hash_table<counts_entry> *counts_hash; +/* Hash table of cs count data. */ +static hash_table<counts_entry> *cs_counts_hash; + /* Read in the counts file, if available. */ static void -read_counts_file (void) +read_counts_file (bool is_cspgo) { gcov_unsigned_t fn_ident = 0; gcov_unsigned_t tag; int is_error = 0; unsigned lineno_checksum = 0; unsigned cfg_checksum = 0; + char *gcda_file_name = (is_cspgo ? cs_da_file_name : da_file_name); - if (!gcov_open (da_file_name, 1)) + if (!gcov_open (gcda_file_name, 1)) return; if (!gcov_magic (gcov_read_unsigned (), GCOV_DATA_MAGIC)) { - warning (0, "%qs is not a gcov data file", da_file_name); + warning (0, "%qs is not a %s data file", gcda_file_name, + (is_cspgo ? "cs gcov" : "gcov")); gcov_close (); return; } @@ -201,7 +208,7 @@ read_counts_file (void) GCOV_UNSIGNED2STRING (e, GCOV_VERSION); warning (0, "%qs is version %q.*s, expected version %q.*s", - da_file_name, 4, v, 4, e); + gcda_file_name, 4, v, 4, e); gcov_close (); return; } @@ -213,7 +220,7 @@ read_counts_file (void) /* Read checksum. */ gcov_read_unsigned (); - counts_hash = new hash_table<counts_entry> (10); + (is_cspgo ? cs_counts_hash : counts_hash) = new hash_table<counts_entry> (10); while ((tag = gcov_read_unsigned ())) { gcov_unsigned_t length; @@ -234,9 +241,18 @@ read_counts_file (void) } else if (tag == GCOV_TAG_OBJECT_SUMMARY) { - profile_info = XCNEW (gcov_summary); - profile_info->runs = gcov_read_unsigned (); - profile_info->sum_max = gcov_read_unsigned (); + if (is_cspgo) + { + /* TODO: runs and sum_max need better handling for cspgo. */ + gcov_unsigned_t runs = gcov_read_unsigned (); + gcov_unsigned_t sum_max = gcov_read_unsigned (); + } + else + { + profile_info = XCNEW (gcov_summary); + profile_info->runs = gcov_read_unsigned (); + profile_info->sum_max = gcov_read_unsigned (); + } } else if (GCOV_TAG_IS_COUNTER (tag) && fn_ident) { @@ -249,7 +265,9 @@ read_counts_file (void) elt.ident = fn_ident; elt.ctr = GCOV_COUNTER_FOR_TAG (tag); - slot = counts_hash->find_slot (&elt, INSERT); + slot = (is_cspgo ? cs_counts_hash->find_slot (&elt, INSERT) : + counts_hash->find_slot (&elt, INSERT)); + entry = *slot; if (!entry) { @@ -264,12 +282,21 @@ read_counts_file (void) else if (entry->lineno_checksum != lineno_checksum || entry->cfg_checksum != cfg_checksum) { - error ("profile data for function %u is corrupted", fn_ident); + error ("%s data for function %u is corrupted", + (is_cspgo ? "cs profile" : "profile"), fn_ident); error ("checksum is (%x,%x) instead of (%x,%x)", entry->lineno_checksum, entry->cfg_checksum, lineno_checksum, cfg_checksum); - delete counts_hash; - counts_hash = NULL; + if (is_cspgo) + { + delete cs_counts_hash; + cs_counts_hash = NULL; + } + else + { + delete counts_hash; + counts_hash = NULL; + } break; } if (read_length > 0) @@ -282,9 +309,17 @@ read_counts_file (void) error (is_error < 0 ? G_("%qs has overflowed") : G_("%qs is corrupted"), - da_file_name); - delete counts_hash; - counts_hash = NULL; + gcda_file_name); + if (is_cspgo) + { + delete cs_counts_hash; + cs_counts_hash = NULL; + } + else + { + delete counts_hash; + counts_hash = NULL; + } break; } } @@ -296,26 +331,30 @@ read_counts_file (void) gcov_type * get_coverage_counts (unsigned counter, unsigned cfg_checksum, - unsigned lineno_checksum, unsigned int n_counts) + unsigned lineno_checksum, unsigned int n_counts, + bool is_cspgo) { counts_entry *entry, elt; + char *gcda_file_name = (is_cspgo ? cs_da_file_name : da_file_name); /* No hash table, no counts. */ - if (!counts_hash) + if ((is_cspgo ? (!cs_counts_hash) : (!counts_hash))) { static int warned = 0; if (!warned++) { warning (OPT_Wmissing_profile, - "%qs profile count data file not found", - da_file_name); + "%qs %s count data file not found", + gcda_file_name, (is_cspgo ? "cs profile" : "profile")); if (dump_enabled_p ()) { dump_user_location_t loc = dump_user_location_t::from_location_t (input_location); dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc, - "file %s not found, %s\n", da_file_name, + "%s file %s not found, %s\n", + (is_cspgo ? "cs profile" : "profile"), + gcda_file_name, (flag_guess_branch_prob ? "execution counts estimated" : "execution counts assumed to be zero")); @@ -331,13 +370,14 @@ get_coverage_counts (unsigned counter, unsigned cfg_checksum, elt.ident = cgraph_node::get (current_function_decl)->profile_id; } elt.ctr = counter; - entry = counts_hash->find (&elt); + entry = (is_cspgo ? cs_counts_hash->find (&elt) : counts_hash->find (&elt)); if (!entry) { if (counter == GCOV_COUNTER_ARCS) warning_at (DECL_SOURCE_LOCATION (current_function_decl), OPT_Wmissing_profile, - "profile for function %qD not found in profile data", + "%s for function %qD not found in profile data", + (is_cspgo ? "cs profile" : "profile"), current_function_decl); /* The function was not emitted, or is weak and not chosen in the final executable. Silently fail, because there's nothing we @@ -357,9 +397,10 @@ get_coverage_counts (unsigned counter, unsigned cfg_checksum, warning_printed = warning_at (DECL_SOURCE_LOCATION (current_function_decl), OPT_Wcoverage_mismatch, - "number of counters in profile data for function %qD " + "number of counters in %s data for function %qD " "does not match " "its profile data (counter %qs, expected %i and have %i)", + (is_cspgo ? "cs profile" : "profile"), current_function_decl, ctr_names[counter], entry->n_counts, n_counts); else @@ -367,7 +408,8 @@ get_coverage_counts (unsigned counter, unsigned cfg_checksum, warning_at (DECL_SOURCE_LOCATION (current_function_decl), OPT_Wcoverage_mismatch, "the control flow of function %qD does not match " - "its profile data (counter %qs)", current_function_decl, + "its %s data (counter %qs)", current_function_decl, + (is_cspgo ? "cs profile" : "profile"), ctr_names[counter]); if (warning_printed && dump_enabled_p ()) { @@ -413,9 +455,6 @@ get_coverage_counts (unsigned counter, unsigned cfg_checksum, int coverage_counter_alloc (unsigned counter, unsigned num) { - if (no_coverage) - return 0; - if (!num) return 1; @@ -623,7 +662,7 @@ coverage_begin_function (unsigned lineno_checksum, unsigned cfg_checksum) { /* We don't need to output .gcno file unless we're under -ftest-coverage (e.g. -fprofile-arcs/generate/use don't need .gcno to work). */ - if (no_coverage || !bbg_file_name) + if (!bbg_file_name) return 0; expanded_location startloc @@ -981,7 +1020,8 @@ build_info_type (tree type, tree fn_info_ptr_type) function info objects. */ static tree -build_info (tree info_type, tree fn_ary, unsigned object_checksum) +build_info (tree info_type, tree fn_ary, unsigned object_checksum, + bool is_cspgo) { tree info_fields = TYPE_FIELDS (info_type); tree merge_fn_type, n_funcs; @@ -1014,8 +1054,16 @@ build_info (tree info_type, tree fn_ary, unsigned object_checksum) info_fields = DECL_CHAIN (info_fields); /* Filename */ - da_file_name_len = strlen (da_file_name); - filename_string = build_string (da_file_name_len + 1, da_file_name); + if (is_cspgo) + { + da_file_name_len = strlen (cs_da_file_name); + filename_string = build_string (da_file_name_len + 1, cs_da_file_name); + } + else + { + da_file_name_len = strlen (da_file_name); + filename_string = build_string (da_file_name_len + 1, da_file_name); + } TREE_TYPE (filename_string) = build_array_type (char_type_node, build_index_type (size_int (da_file_name_len))); CONSTRUCTOR_APPEND_ELT (v1, info_fields, @@ -1142,7 +1190,7 @@ build_gcov_info_var_registration (tree gcov_info_type) for the object. Returns TRUE if coverage data is being emitted. */ static bool -coverage_obj_init (void) +coverage_obj_init (bool is_cspgo) { tree gcov_info_type; unsigned n_counters = 0; @@ -1151,8 +1199,6 @@ coverage_obj_init (void) struct coverage_data **fn_prev; char name_buf[32]; - no_coverage = 1; /* Disable any further coverage. */ - if (!prg_ctr_mask) return false; @@ -1161,7 +1207,9 @@ coverage_obj_init (void) /* Prune functions. */ for (fn_prev = &functions_head; (fn = *fn_prev);) - if (DECL_STRUCT_FUNCTION (fn->fn_decl)) + /* In cspgo, the DECL_STRUCT_FUNCTION attribute has been checked in + csprofile_transform. */ + if (is_cspgo || DECL_STRUCT_FUNCTION (fn->fn_decl)) fn_prev = &fn->next; else /* The function is not being emitted, remove from list. */ @@ -1225,7 +1273,7 @@ coverage_obj_fn (vec<constructor_elt, va_gc> *ctor, tree fn, static void coverage_obj_finish (vec<constructor_elt, va_gc> *ctor, - unsigned object_checksum) + unsigned object_checksum, bool is_cspgo) { unsigned n_functions = vec_safe_length (ctor); tree fn_info_ary_type = build_array_type @@ -1242,7 +1290,8 @@ coverage_obj_finish (vec<constructor_elt, va_gc> *ctor, varpool_node::finalize_decl (fn_info_ary); DECL_INITIAL (gcov_info_var) - = build_info (TREE_TYPE (gcov_info_var), fn_info_ary, object_checksum); + = build_info (TREE_TYPE (gcov_info_var), fn_info_ary, object_checksum, + is_cspgo); varpool_node::finalize_decl (gcov_info_var); } @@ -1310,11 +1359,32 @@ coverage_init (const char *filename) memcpy (da_file_name + prefix_len, filename, len); strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX); + /* Name of cspgo da file. */ + if (flag_csprofile_generate || flag_csprofile_use) + { + if (csprofile_data_prefix) + prefix_len = strlen (csprofile_data_prefix); + + cs_da_file_name = XNEWVEC (char, len + strlen (GCOV_DATA_SUFFIX) + + prefix_len + 2); + + if (csprofile_data_prefix) + { + memcpy (cs_da_file_name, csprofile_data_prefix, prefix_len); + cs_da_file_name[prefix_len++] = *separator; + } + memcpy (cs_da_file_name + prefix_len, filename, len); + strcpy (cs_da_file_name + prefix_len + len, GCOV_DATA_SUFFIX); + } + bbg_file_stamp = local_tick; if (flag_auto_profile) read_autofdo_file (); else if (flag_branch_probabilities) - read_counts_file (); + read_counts_file (false); + + if (flag_csprofile_use) + read_counts_file (true); /* Name of bbg file. */ if (flag_test_coverage && !flag_compare_debug) @@ -1354,7 +1424,7 @@ coverage_init (const char *filename) variables and constructor. */ void -coverage_finish (void) +coverage_finish (bool is_cspgo) { if (bbg_file_name && gcov_close ()) unlink (bbg_file_name); @@ -1368,7 +1438,7 @@ coverage_finish (void) /* Global GCDA checksum that aggregates all functions. */ unsigned object_checksum = 0; - if (coverage_obj_init ()) + if (coverage_obj_init (is_cspgo)) { vec<constructor_elt, va_gc> *fn_ctor = NULL; struct coverage_data *fn; @@ -1382,11 +1452,17 @@ coverage_finish (void) fn->lineno_checksum); object_checksum = crc32_unsigned (object_checksum, fn->cfg_checksum); } - coverage_obj_finish (fn_ctor, object_checksum); + coverage_obj_finish (fn_ctor, object_checksum, is_cspgo); } - XDELETEVEC (da_file_name); + if (da_file_name) + XDELETEVEC (da_file_name); da_file_name = NULL; + if (is_cspgo) + { + XDELETEVEC (cs_da_file_name); + cs_da_file_name = NULL; + } } #include "gt-coverage.h" diff --git a/gcc/coverage.h b/gcc/coverage.h index 0ac046c88..a4e90e8bd 100644 --- a/gcc/coverage.h +++ b/gcc/coverage.h @@ -23,7 +23,7 @@ along with GCC; see the file COPYING3. If not see #include "gcov-io.h" extern void coverage_init (const char *); -extern void coverage_finish (void); +extern void coverage_finish (bool); extern void coverage_remove_note_file (void); /* Start outputting coverage information for the current @@ -53,7 +53,8 @@ extern tree tree_coverage_counter_addr (unsigned /*counter*/, unsigned/*num*/); extern gcov_type *get_coverage_counts (unsigned /*counter*/, unsigned /*cfg_checksum*/, unsigned /*lineno_checksum*/, - unsigned /*n_counts*/); + unsigned /*n_counts*/, + bool /*is_cspgo*/); extern tree get_gcov_type (void); extern bool coverage_node_map_initialized_p (void); diff --git a/gcc/gcc.cc b/gcc/gcc.cc index 32e45adc2..b37b50be2 100644 --- a/gcc/gcc.cc +++ b/gcc/gcc.cc @@ -1147,7 +1147,8 @@ proper position among the other output files. */ %:include(libgomp.spec)%(link_gomp)}\ %{fgnu-tm:%:include(libitm.spec)%(link_itm)}\ %(mflib) " STACK_SPLIT_SPEC "\ - %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} " SANITIZER_SPEC " \ + %{fprofile-arcs|fprofile-generate*|fcfgo-csprofile-generate*|coverage:-lgcov} \ + " SANITIZER_SPEC " \ %{!nostdlib:%{!r:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}}\ %{!nostdlib:%{!r:%{!nostartfiles:%E}}} %{T*} \n%(post_link) }}}}}}" #endif @@ -1265,7 +1266,7 @@ static const char *cc1_options = %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %w%b.s}}}\ %{fsyntax-only:-o %j} %{-param*}\ %{coverage:-fprofile-arcs -ftest-coverage}\ - %{fprofile-arcs|fprofile-generate*|coverage:\ + %{fprofile-arcs|fprofile-generate*|fcfgo-csprofile-generate*|coverage:\ %{!fprofile-update=single:\ %{pthread:-fprofile-update=prefer-atomic}}}"; diff --git a/gcc/ipa-profile.cc b/gcc/ipa-profile.cc index ffdcb4476..27554e507 100644 --- a/gcc/ipa-profile.cc +++ b/gcc/ipa-profile.cc @@ -827,7 +827,7 @@ ipa_profile (void) if (dump_file) { if (!node_map_initialized) - init_node_map (false); + init_node_map (false, false); node_map_initialized = true; ipa_profile_dump_all_summaries (dump_file); @@ -850,7 +850,7 @@ ipa_profile (void) if (spec_count) { if (!node_map_initialized) - init_node_map (false); + init_node_map (false, false); node_map_initialized = true; ncommon++; diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index 237743ef0..fd41941d1 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -1677,6 +1677,13 @@ merge_profile_summaries (struct lto_file_decl_data **file_data_vec) if (flag_ltrans) return; + /* TODO: The different max_run values obtained during the cspgo GEN and USE + stages with unknown bug resulted in different scaling results, which led + different optimization decisions and finally led to coverage mismatch. + Therefore, skip the following processing steps when doing cspgo. */ + if (flag_csprofile_generate || flag_csprofile_use) + return; + /* Now compute count_materialization_scale of each node. During LTRANS we already have values of count_materialization_scale computed, so just update them. */ diff --git a/gcc/opts.cc b/gcc/opts.cc index d97f6079f..7900a658f 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -3016,6 +3016,15 @@ common_handle_option (struct gcc_options *opts, false); break; + case OPT_fcfgo_csprofile_use_: + opts->x_csprofile_data_prefix = xstrdup (arg); + value = true; + /* No break here - do -fcfgo-csprofile-use processing. */ + /* FALLTHRU */ + case OPT_fcfgo_csprofile_use: + SET_OPTION_IF_UNSET (opts, opts_set, flag_csprofile_use, value); + break; + case OPT_fauto_profile_: opts->x_auto_profile_file = xstrdup (arg); opts->x_flag_auto_profile = true; @@ -3059,6 +3068,15 @@ common_handle_option (struct gcc_options *opts, SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_bit_cp, value); break; + case OPT_fcfgo_csprofile_generate_: + opts->x_csprofile_data_prefix = xstrdup (arg); + value = true; + /* No break here - do -fcfgo-csprofile-generate processing. */ + /* FALLTHRU */ + case OPT_fcfgo_csprofile_generate: + SET_OPTION_IF_UNSET (opts, opts_set, flag_csprofile_generate, value); + break; + case OPT_fprofile_info_section: opts->x_profile_info_section = ".gcov_info"; break; diff --git a/gcc/passes.cc b/gcc/passes.cc index 36e5b4ac4..154690d02 100644 --- a/gcc/passes.cc +++ b/gcc/passes.cc @@ -2410,6 +2410,77 @@ execute_all_ipa_transforms (bool do_not_collect) node->ipa_transforms_to_apply.release (); } +/* When is_cspgo is true, execute all passes except cspgo and save the pointer + for the next execution. */ + +void +execute_all_ipa_transforms_for_cspgo (bool is_cspgo) +{ + struct cgraph_node *node; + ipa_opt_pass_d *cspgo_pass = NULL; + node = cgraph_node::get (current_function_decl); + + cgraph_node *next_clone; + for (cgraph_node *n = node->clones; n; n = next_clone) + { + next_clone = n->next_sibling_clone; + if (n->decl != node->decl) + n->materialize_clone (); + } + + int j = 0; + gcc::pass_manager *passes = g->get_passes (); + bool report = profile_report && (cfun->curr_properties & PROP_gimple) != 0; + + if (report) + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + + for (auto p : node->ipa_transforms_to_apply) + { + /* Execute all passes except cspgo, and save the pointer of cspgo pass + for the next execution. */ + if (!is_cspgo && strstr (p->name, "csprofile") != NULL) + { + cspgo_pass = p; + continue; + } + /* To get consistent statistics, we need to account each functio + to each IPA pass. */ + if (report) + { + for (;j < p->static_pass_number; j++) + if (passes->get_pass_for_id (j) + && passes->get_pass_for_id (j)->type == IPA_PASS + && ((ipa_opt_pass_d *)passes->get_pass_for_id (j)) + ->function_transform) + { + check_profile_consistency (j, true); + account_profile (j, true); + } + gcc_checking_assert (passes->get_pass_for_id (j) == p); + } + execute_one_ipa_transform_pass (node, p, true); + } + /* Account remaining IPA passes. */ + if (report) + { + for (;!passes->get_pass_for_id (j) + || passes->get_pass_for_id (j)->type != RTL_PASS; j++) + if (passes->get_pass_for_id (j) + && passes->get_pass_for_id (j)->type == IPA_PASS + && ((ipa_opt_pass_d *)passes->get_pass_for_id (j)) + ->function_transform) + { + check_profile_consistency (j, true); + account_profile (j, true); + } + pop_cfun (); + } + node->ipa_transforms_to_apply.release (); + if (!is_cspgo) + node->ipa_transforms_to_apply.safe_push (cspgo_pass); +} + /* Check if PASS is explicitly disabled or enabled and return the gate status. FUNC is the function to be processed, and GATE_STATUS is the gate status determined by pass manager by diff --git a/gcc/passes.def b/gcc/passes.def index e945af96a..862ef0d8f 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -164,6 +164,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_cdtor_merge); NEXT_PASS (pass_ipa_fn_summary); NEXT_PASS (pass_ipa_inline); + NEXT_PASS (pass_ipa_csprofile); NEXT_PASS (pass_ipa_pure_const); NEXT_PASS (pass_ipa_modref); NEXT_PASS (pass_ipa_free_fn_summary, false /* small_p */); diff --git a/gcc/profile.cc b/gcc/profile.cc index 40e105258..0ffc1ba4f 100644 --- a/gcc/profile.cc +++ b/gcc/profile.cc @@ -201,7 +201,7 @@ instrument_values (histogram_values values) CFG_CHECKSUM is the precomputed checksum for the CFG. */ static gcov_type * -get_exec_counts (unsigned cfg_checksum, unsigned lineno_checksum) +get_exec_counts (unsigned cfg_checksum, unsigned lineno_checksum, bool is_cspgo) { unsigned num_edges = 0; basic_block bb; @@ -219,7 +219,7 @@ get_exec_counts (unsigned cfg_checksum, unsigned lineno_checksum) } counts = get_coverage_counts (GCOV_COUNTER_ARCS, cfg_checksum, - lineno_checksum, num_edges); + lineno_checksum, num_edges, is_cspgo); if (!counts) return NULL; @@ -418,7 +418,8 @@ cmp_stats (const void *ptr1, const void *ptr2) CFG_CHECKSUM is the precomputed checksum for the CFG. */ static void -compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum) +compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum, + bool is_cspgo) { basic_block bb; int i; @@ -427,7 +428,8 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum) int passes; int hist_br_prob[20]; int num_branches; - gcov_type *exec_counts = get_exec_counts (cfg_checksum, lineno_checksum); + gcov_type *exec_counts = get_exec_counts (cfg_checksum, lineno_checksum, + is_cspgo); int inconsistent = 0; /* Very simple sanity checks so we catch bugs in our profiling code. */ @@ -868,7 +870,7 @@ sort_hist_values (histogram_value hist) static void compute_value_histograms (histogram_values values, unsigned cfg_checksum, - unsigned lineno_checksum) + unsigned lineno_checksum, bool is_cspgo) { unsigned i, j, t, any; unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS]; @@ -898,7 +900,8 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum, histogram_counts[t] = get_coverage_counts (COUNTER_FOR_HIST_TYPE (t), cfg_checksum, lineno_checksum, - n_histogram_counters[t]); + n_histogram_counters[t], + is_cspgo); if (histogram_counts[t]) any = 1; act_count[t] = histogram_counts[t]; @@ -1128,11 +1131,12 @@ compare_freqs (const void *p1, const void *p2) /* Only read execution count for thunks. */ void -read_thunk_profile (struct cgraph_node *node) +read_thunk_profile (struct cgraph_node *node, bool is_cspgo) { tree old = current_function_decl; current_function_decl = node->decl; - gcov_type *counts = get_coverage_counts (GCOV_COUNTER_ARCS, 0, 0, 1); + gcov_type *counts = get_coverage_counts (GCOV_COUNTER_ARCS, 0, 0, 1, + is_cspgo); if (counts) { node->callees->count = node->count @@ -1164,7 +1168,7 @@ read_thunk_profile (struct cgraph_node *node) Main entry point of this file. */ void -branch_prob (bool thunk) +branch_prob (bool thunk, bool is_cspgo) { basic_block bb; unsigned i; @@ -1507,9 +1511,10 @@ branch_prob (bool thunk) if (flag_branch_probabilities) { - compute_branch_probabilities (cfg_checksum, lineno_checksum); + compute_branch_probabilities (cfg_checksum, lineno_checksum, is_cspgo); if (flag_profile_values) - compute_value_histograms (values, cfg_checksum, lineno_checksum); + compute_value_histograms (values, cfg_checksum, lineno_checksum, + is_cspgo); } remove_fake_edges (); diff --git a/gcc/profile.h b/gcc/profile.h index c5b6f4889..e92d6154c 100644 --- a/gcc/profile.h +++ b/gcc/profile.h @@ -68,7 +68,7 @@ extern void mcf_smooth_cfg (void); extern gcov_type sum_edge_counts (vec<edge, va_gc> *edges); -extern void init_node_map (bool); +extern void init_node_map (bool, bool); extern void del_node_map (void); extern void get_working_sets (void); diff --git a/gcc/timevar.def b/gcc/timevar.def index fc2b1e1e7..6fdb2c767 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -104,6 +104,7 @@ DEFTIMEVAR (TV_WHOPR_PARTITIONING , "whopr partitioning") DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltrans") DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference") DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile") +DEFTIMEVAR (TV_IPA_CSPROFILE , "ipa csprofile") DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile") DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const") DEFTIMEVAR (TV_IPA_ICF , "ipa icf") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 18b0f8022..f9c2eed8b 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -519,6 +519,7 @@ extern simple_ipa_opt_pass *make_pass_ipa_increase_alignment (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_fn_summary (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt); +extern ipa_opt_pass_d *make_pass_ipa_csprofile (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt); @@ -667,6 +668,7 @@ extern void execute_pass_list (function *, opt_pass *); extern void execute_ipa_pass_list (opt_pass *); extern void execute_ipa_summary_passes (ipa_opt_pass_d *); extern void execute_all_ipa_transforms (bool); +extern void execute_all_ipa_transforms_for_cspgo (bool); extern void execute_all_ipa_stmt_fixups (struct cgraph_node *, gimple **); extern bool pass_init_dump_file (opt_pass *); extern void pass_fini_dump_file (opt_pass *); diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc index e7646f1a1..aa3a2b3a9 100644 --- a/gcc/tree-profile.cc +++ b/gcc/tree-profile.cc @@ -725,7 +725,7 @@ tree_profiling (void) cgraphunit.cc:ipa_passes(). */ gcc_assert (symtab->state == IPA_SSA); - init_node_map (true); + init_node_map (true, false); parse_profile_file_filtering (); FOR_EACH_DEFINED_FUNCTION (node) @@ -766,7 +766,7 @@ tree_profiling (void) time. */ else { - read_thunk_profile (node); + read_thunk_profile (node, false); continue; } } @@ -781,7 +781,7 @@ tree_profiling (void) && (execute_fixup_cfg () & TODO_cleanup_cfg)) cleanup_tree_cfg (); - branch_prob (thunk); + branch_prob (thunk, false); if (! flag_branch_probabilities && flag_profile_values) @@ -863,6 +863,170 @@ tree_profiling (void) return 0; } +/* Profile all functions in the callgraph with cs profile. */ + +static unsigned int +csprofile_transform (struct cgraph_node *node) +{ + basic_block bb; + bool thunk = false; + + parse_profile_file_filtering (); + + if (dump_file) + { + fprintf (dump_file, "[cspgo] trying cspgo on function:\n"); + dump_function_header (dump_file, cfun->decl, dump_flags); + } + + if (!DECL_STRUCT_FUNCTION (current_function_decl)) + { + if (dump_file) + fprintf (dump_file, "[cspgo] %s without function decl, skip.\n", + node->dump_name ()); + return 0; + } + + if (!gimple_has_body_p (node->decl) && !node->thunk) + { + if (dump_file) + fprintf (dump_file, "[cspgo] %s without gimple body, skip.\n", + node->dump_name ()); + return 0; + } + + /* Don't profile functions produced for builtin stuff. */ + if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) + { + if (dump_file) + fprintf (dump_file, "[cspgo] %s with BUILTINS_LOCATION, skip.\n", + node->dump_name ()); + return 0; + } + + const char *file = LOCATION_FILE (DECL_SOURCE_LOCATION (node->decl)); + if (!file || !include_source_file_for_profile (file)) + { + if (dump_file) + fprintf (dump_file, "[cspgo] %s is sub func or in filter-files, " + "skip.\n", node->dump_name ()); + return 0; + } + + if (lookup_attribute ("no_profile_instrument_function", + DECL_ATTRIBUTES (node->decl))) + { + if (dump_file) + fprintf (dump_file, "[cspgo] %s is no_profile_instrument_function," + " skip.\n", node->dump_name ()); + return 0; + } + + /* Do not instrument extern inline functions. */ + if (DECL_EXTERNAL (node->decl)) + { + if (dump_file) + fprintf (dump_file, "[cspgo] %s is DECL_EXTERNAL, skip.\n", + node->dump_name ()); + return 0; + } + + if (!coverage_node_map_initialized_p ()) + init_node_map (true, true); + + /* Node without profile id should skip. */ + if (!node->profile_id) + { + if (dump_file) + fprintf (dump_file, "[cspgo] %s does not has profile_id, skip.\n", + node->dump_name ()); + return 0; + } + + if (flag_csprofile_generate) + { + profile_arc_flag = 1; + flag_branch_probabilities = 0; + } + + /* Process thunk function. */ + if (node->thunk) + { + /* We cannot expand variadic thunks to Gimple. */ + if (stdarg_p (TREE_TYPE (node->decl))) + { + if (dump_file) + fprintf (dump_file, "[cspgo] %s is DECL_EXTERNAL, skip.\n", + node->dump_name ()); + return 0; + } + thunk = true; + /* When generate profile, expand thunk to gimple so it can be + instrumented same way as other functions. */ + if (profile_arc_flag) + expand_thunk (node, false, true); + /* Read cgraph profile but keep function as thunk at profile-use + time. */ + else + { + read_thunk_profile (node, true); + return 0; + } + } + + /* Local pure-const may imply need to fixup the cfg. */ + if (gimple_has_body_p (node->decl) + && (execute_fixup_cfg () & TODO_cleanup_cfg)) + cleanup_tree_cfg (); + + branch_prob (thunk, true); + + if (! flag_branch_probabilities + && flag_profile_values) + gimple_gen_ic_func_profiler (); + + if (flag_branch_probabilities + && !thunk + && flag_profile_values + && flag_value_profile_transformations + && profile_status_for_fn (cfun) == PROFILE_READ) + gimple_value_profile_transformations (); + + /* The above could hose dominator info. Currently there is + none coming in, this is a safety valve. It should be + easy to adjust it, if and when there is some. */ + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + + release_profile_file_filtering (); + + if (flag_csprofile_generate) + { + profile_arc_flag = 0; + flag_branch_probabilities = 1; + } + + /* Update call statements and rebuild the cgraph. */ + FOR_EACH_BB_FN (bb, cfun) + { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt)) + update_stmt (stmt); + } + } + + /* re-merge split blocks. */ + cleanup_tree_cfg (); + update_ssa (TODO_update_ssa); + + cgraph_edge::rebuild_edges (); + + return 0; +} + namespace { const pass_data pass_data_ipa_tree_profile = @@ -910,4 +1074,57 @@ make_pass_ipa_tree_profile (gcc::context *ctxt) return new pass_ipa_tree_profile (ctxt); } +namespace { + +const pass_data pass_data_ipa_csprofile = +{ + IPA_PASS, /* type */ + "csprofile", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_CSPROFILE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_ipa_csprofile : public ipa_opt_pass_d +{ +public: + pass_ipa_csprofile (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_csprofile, ctxt, + NULL, /* generate_summary */ + NULL, /* write_summary */ + NULL, /* read_summary */ + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* function_transform_todo_flags_start */ + csprofile_transform, /* function_transform */ + NULL) /* variable_transform */ + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return (flag_csprofile_generate || flag_csprofile_use); + } + /* The main process of cspgo is in csprofile_transform, execute does not need + to do anything. */ + virtual unsigned int execute (function *) + { + return 0; + } + +}; // class pass_ipa_csprofile + +} // anon namespace + +ipa_opt_pass_d * +make_pass_ipa_csprofile (gcc::context *ctxt) +{ + return new pass_ipa_csprofile (ctxt); +} + #include "gt-tree-profile.h" diff --git a/gcc/value-prof.cc b/gcc/value-prof.cc index c240a1863..9c7191287 100644 --- a/gcc/value-prof.cc +++ b/gcc/value-prof.cc @@ -1234,7 +1234,7 @@ coverage_node_map_initialized_p (void) that the PROFILE_IDs was already assigned. */ void -init_node_map (bool local) +init_node_map (bool local, bool is_cspgo) { struct cgraph_node *n; cgraph_node_map = new hash_map<profile_id_hash, cgraph_node *>; @@ -1245,6 +1245,12 @@ init_node_map (bool local) cgraph_node **val; dump_user_location_t loc = dump_user_location_t::from_function_decl (n->decl); + + /* In cspgo, inline and clone functions will not be expand, + so skipped. */ + if (is_cspgo && (n->inlined_to || n->clone_of)) + continue; + if (local) { n->profile_id = coverage_compute_profile_id (n); @@ -1290,6 +1296,7 @@ void del_node_map (void) { delete cgraph_node_map; + cgraph_node_map = 0; } /* Return cgraph node for function with pid */ diff --git a/gcc/value-prof.h b/gcc/value-prof.h index d852c41f3..0fe3821c3 100644 --- a/gcc/value-prof.h +++ b/gcc/value-prof.h @@ -112,8 +112,8 @@ extern struct cgraph_node* find_func_by_profile_id (int func_id); /* In profile.cc. */ extern void init_branch_prob (void); -extern void branch_prob (bool); -extern void read_thunk_profile (struct cgraph_node *); +extern void branch_prob (bool, bool); +extern void read_thunk_profile (struct cgraph_node *, bool); extern void end_branch_prob (void); #endif /* GCC_VALUE_PROF_H */ -- 2.25.1
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2