Projects
Mega:24.03:SP1:Everything
gcc
_service:tar_scm:0007-MULL64-2-3-Fold-series-of...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch of Package gcc
From 547ab9b3e073ef389e5fd89d961bb1e3e6934ae9 Mon Sep 17 00:00:00 2001 From: zhongyunde <zhongyunde@huawei.com> Date: Wed, 9 Nov 2022 17:04:13 +0800 Subject: [PATCH 07/22] [MULL64 2/3] Fold series of instructions into mul Merge the low part of series instructions into mul gcc/ * match.pd: Add simplifcations for low part of mul * common.opt: Add new option fmerge-mull enable with -O2 * opts.c: default_options_table gcc/testsuite/ * g++.dg/tree-ssa/mull64.C: New test. --- gcc/common.opt | 4 +++ gcc/match.pd | 27 ++++++++++++++++++++ gcc/opts.cc | 1 + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++ 4 files changed, 66 insertions(+) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C diff --git a/gcc/common.opt b/gcc/common.opt index 8a0dafc52..e365a48bc 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2126,6 +2126,10 @@ fmerge-debug-strings Common Var(flag_merge_debug_strings) Init(1) Attempt to merge identical debug strings across compilation units. +fmerge-mull +Common Var(flag_merge_mull) Init(0) Optimization +Attempt to merge series instructions into mul. + fmessage-length= Common RejectNegative Joined UInteger -fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping. diff --git a/gcc/match.pd b/gcc/match.pd index fd0857fc9..2092e6959 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4301,6 +4301,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) ) #endif +#if GIMPLE +/* These patterns are mostly used by FORWPROP1 to fold some operations into more + simple IR. The following scenario should be matched: + In0Lo = In0(D) & 4294967295; + In0Hi = In0(D) >> 32; + In1Lo = In1(D) & 4294967295; + In1Hi = In1(D) >> 32; + Addc = In0Lo * In1Hi + In0Hi * In1Lo; + addc32 = Addc << 32; + ResLo = In0Lo * In1Lo + addc32 */ +(simplify + (plus:c (mult @4 @5) + (lshift + (plus:c + (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) + (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) + INTEGER_CST@3 + ) + ) + (if (flag_merge_mull && INTEGRAL_TYPE_P (type) + && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) + && TYPE_PRECISION (type) == 64) + (mult (convert:type @0) (convert:type @1)) + ) +) +#endif + /* Simplification moved from fold_cond_expr_with_comparison. It may also be extended. */ /* This pattern implements two kinds simplification: diff --git a/gcc/opts.cc b/gcc/opts.cc index a97630d1c..eae71ed20 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -647,6 +647,7 @@ static const struct default_options default_options_table[] = VECT_COST_MODEL_VERY_CHEAP }, { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 }, /* -O2 and above optimizations, but not -Os or -Og. */ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C new file mode 100644 index 000000000..2a3b74604 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ + +# define BN_BITS4 32 +# define BN_MASK2 (0xffffffffffffffffL) +# define BN_MASK2l (0xffffffffL) +# define BN_MASK2h (0xffffffff00000000L) +# define BN_MASK2h1 (0xffffffff80000000L) +# define LBITS(a) ((a)&BN_MASK2l) +# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) +# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) + +void mul64(unsigned long in0, unsigned long in1, + unsigned long &retLo, unsigned long &retHi) { + unsigned long m00, m01, m10, m11, al, ah, bl, bh; + unsigned long Addc, addc32, low; + al = LBITS(in0); + ah = HBITS(in0); + bl = LBITS(in1); + bh = HBITS(in1); + m10 = bh * al; + m00 = bl * al; + m01 = bl * ah; + m11 = bh * ah; + Addc = (m10 + m01) & BN_MASK2; + if (Addc < m01) m11 += L2HBITS((unsigned long)1); + m11 += HBITS(Addc); + addc32 = L2HBITS(Addc); + low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++; + retLo = low; + retHi = m11; +} + +/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */ -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2