Projects
openEuler:24.03:SP1:Everything:64G
gcc
_service:tar_scm:0031-AutoBOLT-Support-saving-f...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch of Package gcc
From 72531376df5ed93c2d945469368ba5514eca8407 Mon Sep 17 00:00:00 2001 From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com> Date: Tue, 5 Dec 2023 15:33:08 +0800 Subject: [PATCH] [AutoBOLT] Support saving feedback count info to ELF segment 1/3 --- gcc/common.opt | 8 + gcc/final.cc | 405 ++++++++++++++++++++++++++++++++++++++++++++++++- gcc/opts.cc | 61 ++++++++ 3 files changed, 473 insertions(+), 1 deletion(-) diff --git a/gcc/common.opt b/gcc/common.opt index b01df919e..e69947fc2 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2546,6 +2546,14 @@ freorder-functions Common Var(flag_reorder_functions) Optimization Reorder functions to improve code placement. +fauto-bolt +Common Var(flag_auto_bolt) +Generate profile from AutoFDO or PGO and do BOLT optimization after linkage. + +fauto-bolt= +Common Joined RejectNegative +Specify the feedback data directory required by BOLT-plugin. The default is the current directory. + frerun-cse-after-loop Common Var(flag_rerun_cse_after_loop) Optimization Add a common subexpression elimination pass after loop optimizations. diff --git a/gcc/final.cc b/gcc/final.cc index a9868861b..d4c4fa08f 100644 --- a/gcc/final.cc +++ b/gcc/final.cc @@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see #include "rtl-iter.h" #include "print-rtl.h" #include "function-abi.h" +#include "insn-codes.h" #include "common/common-target.h" #ifdef XCOFF_DEBUGGING_INFO @@ -4266,7 +4267,403 @@ leaf_renumber_regs_insn (rtx in_rtx) } } #endif - + +#define ASM_FDO_SECTION_PREFIX ".text.fdo." + +#define ASM_FDO_CALLER_FLAG ".fdo.caller " +#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size " +#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind" + +#define ASM_FDO_CALLEE_FLAG ".fdo.callee" + +/* Return the relative offset address of the start instruction of BB, + return -1 if it is empty instruction. */ + +static int +get_bb_start_addr (basic_block bb) +{ + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + { + if (!INSN_P (insn)) + { + continue; + } + /* The jump target of call is not in this function, so + it should be excluded. */ + if (CALL_P (insn)) + { + return -1; + } + + int insn_code = recog_memoized (insn); + + /* The instruction NOP in llvm-bolt belongs to the previous + BB, so it needs to be skipped. */ + if (insn_code != CODE_FOR_nop) + { + return INSN_ADDRESSES (INSN_UID (insn)); + } + } + return -1; +} + +/* Return the relative offet address of the end instruction of BB, + return -1 if it is empty or call instruction. */ + +static int +get_bb_end_addr (basic_block bb) +{ + rtx_insn *insn; + int num_succs = EDGE_COUNT (bb->succs); + FOR_BB_INSNS_REVERSE (bb, insn) + { + if (!INSN_P (insn)) + { + continue; + } + /* The jump target of call is not in this function, so + it should be excluded. */ + if (CALL_P (insn)) + { + return -1; + } + if ((num_succs == 1) + || ((num_succs == 2) && any_condjump_p (insn))) + { + return INSN_ADDRESSES (INSN_UID (insn)); + } + else + { + return -1; + } + } + return -1; +} + +/* Return the end address of cfun. */ + +static int +get_function_end_addr () +{ + rtx_insn *insn = get_last_insn (); + for (; insn != get_insns (); insn = PREV_INSN (insn)) + { + if (!INSN_P (insn)) + { + continue; + } + return INSN_ADDRESSES (INSN_UID (insn)); + } + + return -1; +} + +/* Return the function profile status string. */ + +static const char * +get_function_profile_status () +{ + const char *profile_status[] = { + "PROFILE_ABSENT", + "PROFILE_GUESSED", + "PROFILE_READ", + "PROFILE_LAST" /* Last value, used by profile streaming. */ + }; + + return profile_status[profile_status_for_fn (cfun)]; +} + +/* Return the count from the feedback data, such as PGO or ADDO. */ + +inline static gcov_type +get_fdo_count (profile_count count) +{ + return count.quality () >= GUESSED + ? count.to_gcov_type () : 0; +} + +/* Return the profile quality string. */ + +static const char * +get_fdo_count_quality (profile_count count) +{ + const char *profile_quality[] = { + "UNINITIALIZED_PROFILE", + "GUESSED_LOCAL", + "GUESSED_GLOBAL0", + "GUESSED_GLOBAL0_ADJUSTED", + "GUESSED", + "AFDO", + "ADJUSTED", + "PRECISE" + }; + + return profile_quality[count.quality ()]; +} + +static const char * +alias_local_functions (const char *fnname) +{ + if (TREE_PUBLIC (cfun->decl)) + { + return fnname; + } + return concat (fnname, "/", lbasename (dump_base_name), NULL); +} + +/* Return function bind type string. */ + +static const char * +simple_get_function_bind () +{ + const char *function_bind[] = { + "GLOBAL", + "WEAK", + "LOCAL", + "UNKNOWN" + }; + + if (TREE_PUBLIC (cfun->decl)) + { + if (!(DECL_WEAK (cfun->decl))) + { + return function_bind[0]; + } + else + { + return function_bind[1]; + } + } + else + { + return function_bind[2]; + } + + return function_bind[3]; +} + +/* Dumo the callee functions insn in bb by CALL_P (insn). */ + +static void +dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count) +{ + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + { + if (insn && CALL_P (insn)) + { + tree callee = get_call_fndecl (insn); + + if (callee) + { + fprintf (asm_out_file, "\t.string \"%x\"\n", + INSN_ADDRESSES (INSN_UID (insn))); + + fprintf (asm_out_file, "\t.string \"%s%s\"\n", + ASM_FDO_CALLEE_FLAG, + alias_local_functions (get_fnname_from_decl (callee))); + + fprintf (asm_out_file, + "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n", + call_count); + + if (dump_file) + { + fprintf (dump_file, "call: %x --> %s \n", + INSN_ADDRESSES (INSN_UID (insn)), + alias_local_functions + (get_fnname_from_decl (callee))); + } + } + } + } +} + +/* Dump the edge info into asm. */ +static int +dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count) +{ + edge e; + edge_iterator ei; + gcov_type edge_total_count = 0; + + FOR_EACH_EDGE (e, ei, bb->succs) + { + gcov_type edge_count = get_fdo_count (e->count ()); + edge_total_count += edge_count; + + int edge_start_addr = get_bb_end_addr (e->src); + int edge_end_addr = get_bb_start_addr(e->dest); + + if (edge_start_addr == -1 || edge_end_addr == -1) + { + continue; + } + + /* This is a reserved assert for the original design. If this + assert is found, use the address of the previous instruction + as edge_start_addr. */ + gcc_assert (edge_start_addr != edge_end_addr); + + if (dump_file) + { + fprintf (dump_file, "edge: %x --> %x = (%ld)\n", + edge_start_addr, edge_end_addr, edge_count); + } + + if (edge_count > 0) + { + fprintf(asm_out_file, "\t.string \"%x\"\n", edge_start_addr); + fprintf(asm_out_file, "\t.string \"%x\"\n", edge_end_addr); + fprintf(asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n", + edge_count); + } + } + + gcov_type call_count = MAX (edge_total_count, bb_count); + if (call_count > 0) + { + dump_direct_callee_info_to_asm (bb, call_count); + } +} + +/* Dump the bb info into asm. */ + +static void +dump_bb_info_to_asm (basic_block bb, gcov_type bb_count) +{ + int bb_start_addr = get_bb_start_addr (bb); + if (bb_start_addr != -1) + { + fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr); + fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n", + bb_count); + } +} + +/* Dump the function info into asm. */ + +static void +dump_function_info_to_asm (const char *fnname) +{ + fprintf (asm_out_file, "\t.string \"%s%s\"\n", + ASM_FDO_CALLER_FLAG, alias_local_functions (fnname)); + fprintf (asm_out_file, "\t.string \"%s%d\"\n", + ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ()); + fprintf (asm_out_file, "\t.string \"%s%s\"\n", + ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ()); + + if (dump_file) + { + fprintf (dump_file, "\n FUNC_NAME: %s\n", + alias_local_functions (fnname)); + fprintf (dump_file, " file: %s\n", + dump_base_name); + fprintf (dump_file, "profile_status: %s\n", + get_function_profile_status ()); + fprintf (dump_file, " size: %x\n", + get_function_end_addr ()); + fprintf (dump_file, " function_bind: %s\n", + simple_get_function_bind ()); + } +} + +/* Dump function profile into form AutoFDO or PGO to asm. */ + +static void +dump_fdo_info_to_asm (const char *fnname) +{ + basic_block bb; + + dump_function_info_to_asm (fnname); + + FOR_EACH_BB_FN (bb, cfun) + { + gcov_type bb_count = get_fdo_count (bb->count); + if (bb_count == 0) + { + continue; + } + + if (dump_file) + { + fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n", + get_bb_start_addr (bb), get_bb_end_addr (bb), + bb_count, get_fdo_count_quality (bb->count)); + } + + if (flag_profile_use) + { + dump_edge_jump_info_to_asm (bb, bb_count); + } + else if (flag_auto_profile) + { + dump_bb_info_to_asm (bb, bb_count); + } + } +} + +/* When -fauto-bolt option is turnded on, the .text.fdo section + will be generated in the *.s file if there is feedback information + from PGO or AutoFDO. This section will parserd in BOLT-plugin. */ + +static void +dump_profile_to_elf_sections () +{ + if (!flag_function_sections) + { + error ("-fauto-bolt should work with -ffunction-section"); + return; + } + if (!flag_ipa_ra) + { + error ("-fauto-bolt should work with -fipa-ra"); + return; + } + if (flag_align_jumps) + { + error ("-fauto-bolt is not supported with -falign-jumps"); + return; + } + if (flag_align_labels) + { + error ("-fauto-bolt is not spported with -falign-loops"); + return; + } + if (flag_align_loops) + { + error ("-fauto-bolt is not supported with -falign-loops"); + return; + } + + /* Return if no feedback data. */ + if (!flag_profile_use && !flag_auto_profile) + { + error ("-fauto-bolt should use with -profile-use or -fauto-profile"); + return; + } + + /* Avoid empty functions. */ + if (TREE_CODE (cfun->decl) != FUNCTION_DECL) + { + return; + } + int flags = SECTION_DEBUG | SECTION_EXCLUDE; + const char *fnname = get_fnname_from_decl (current_function_decl); + char *profile_fnname = NULL; + + asprintf (&profile_fnname, "%s%s", ASM_FDO_SECTION_PREFIX, fnname); + switch_to_section (get_section (profile_fnname, flags, NULL)); + dump_fdo_info_to_asm (fnname); + + if (profile_fnname) + { + free (profile_fnname); + profile_fnname = NULL; + } +} + /* Turn the RTL into assembly. */ static unsigned int rest_of_handle_final (void) @@ -4334,6 +4731,12 @@ rest_of_handle_final (void) targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0), decl_fini_priority_lookup (current_function_decl)); + + if (flag_auto_bolt) + { + dump_profile_to_elf_sections (); + } + return 0; } diff --git a/gcc/opts.cc b/gcc/opts.cc index b868d189e..6d57e7d69 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -1279,6 +1279,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, if (opts->x_flag_vtable_verify && opts->x_flag_lto) sorry ("vtable verification is not supported with LTO"); + /* Currently -fauto-bolt is not supported for LTO. */ + if (opts->x_flag_auto_bolt && opts->x_flag_lto) + sorry ("%<-fauto-bolt%> is not supported with LTO"); + /* Control IPA optimizations based on different -flive-patching level. */ if (opts->x_flag_live_patching) control_options_for_live_patching (opts, opts_set, @@ -1291,6 +1295,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, = (opts->x_flag_unroll_loops || opts->x_flag_peel_loops || opts->x_optimize >= 3); + + if (opts->x_flag_auto_bolt) + { + /* Record the function section to facilitate the feedback + data storage. */ + if (!opts->x_flag_function_sections) + { + inform (loc, + "%<-fauto-bolt%> should work with %<-ffunction-sections%>," + " enabling %<-ffunction-sections%>"); + opts->x_flag_function_sections = true; + } + + /* Cancel the internal alignment of the function. The binary + optimizer bolt will cancel the internal alignment optimization + of the function, so the alignment is meaningless at this time, + and if not, it will bring trouble to the calculation of the + offset address of the instruction. */ + if (opts->x_flag_align_jumps) + { + inform (loc, + "%<-fauto-bolt%> should not work with %<-falign-jumps%>," + " disabling %<-falign-jumps%>"); + opts->x_flag_align_jumps = false; + } + + if (opts->x_flag_align_labels) + { + inform (loc, + "%<-fauto-bolt%> should not work with %<-falign-labels%>," + " disabling %<-falign-labels%>"); + opts->x_flag_align_labels = false; + } + + if (opts->x_flag_align_loops) + { + inform (loc, + "%<-fauto-bolt%> should not work with %<-falign-loops%>," + " disabling %<-falign-loops%>"); + opts->x_flag_align_loops = false; + } + + /* When parsing instructions in RTL phase, we need to know + the call information of instructions to avoid being optimized. */ + if (!opts->x_flag_ipa_ra) + { + inform (loc, + "%<-fauto-bolt%> should work with %<-fipa-ra%>," + " enabling %<-fipa-ra%>"); + opts->x_flag_ipa_ra = true; + } + } /* With -fcx-limited-range, we do cheap and quick complex arithmetic. */ if (opts->x_flag_cx_limited_range) @@ -3226,6 +3282,11 @@ common_handle_option (struct gcc_options *opts, &opts->x_flag_align_functions, &opts->x_str_align_functions); break; + + case OPT_fauto_bolt_: + case OPT_fauto_bolt: + /* Deferred. */ + break; case OPT_ftabstop_: /* It is documented that we silently ignore silly values. */ -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2