Projects
Mega:24.03:SP1:Everything
gcc
_service:tar_scm:0038-Add-option-to-allow-match...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch of Package gcc
From 6684509e81e4341675c73a7dc853180229a8abcb Mon Sep 17 00:00:00 2001 From: Pronin Alexander 00812787 <pronin.alexander@huawei.com> Date: Tue, 24 Jan 2023 16:43:40 +0300 Subject: [PATCH 04/18] Add option to allow matching uaddsub overflow for widen ops too. --- gcc/common.opt | 5 ++ gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++ gcc/tree-ssa-math-opts.cc | 43 ++++++++-- 3 files changed, 184 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c diff --git a/gcc/common.opt b/gcc/common.opt index dac477c04..39c90604e 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -3106,6 +3106,11 @@ freciprocal-math Common Var(flag_reciprocal_math) SetByCombined Optimization Same as -fassociative-math for expressions which include division. +fuaddsub-overflow-match-all +Common Var(flag_uaddsub_overflow_match_all) +Match unsigned add/sub overflow even if the target does not support +the corresponding instruction. + ; Nonzero means that unsafe floating-point math optimizations are allowed ; for the sake of speed. IEEE compliance is not guaranteed, and operations ; are allowed to assume that their arguments and results are "normal" diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c new file mode 100644 index 000000000..96c26d308 --- /dev/null +++ b/gcc/testsuite/gcc.dg/uaddsub.c @@ -0,0 +1,143 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */ +#include <stdint.h> + +typedef unsigned __int128 uint128_t; +typedef struct uint256_t +{ + uint128_t lo; + uint128_t hi; +} uint256_t; + +uint16_t add16 (uint8_t a, uint8_t b) +{ + uint8_t tmp = a + b; + uint8_t overflow = 0; + if (tmp < a) + overflow = 1; + + uint16_t res = overflow; + res <<= 8; + res += tmp; + return res; +} + +uint32_t add32 (uint16_t a, uint16_t b) +{ + uint16_t tmp = a + b; + uint16_t overflow = 0; + if (tmp < a) + overflow = 1; + + uint32_t res = overflow; + res <<= 16; + res += tmp; + return res; +} + +uint64_t add64 (uint32_t a, uint32_t b) +{ + uint32_t tmp = a + b; + uint32_t overflow = 0; + if (tmp < a) + overflow = 1; + + uint64_t res = overflow; + res <<= 32; + res += tmp; + return res; +} + +uint128_t add128 (uint64_t a, uint64_t b) +{ + uint64_t tmp = a + b; + uint64_t overflow = 0; + if (tmp < a) + overflow = 1; + + uint128_t res = overflow; + res <<= 64; + res += tmp; + return res; +} + +uint256_t add256 (uint128_t a, uint128_t b) +{ + uint128_t tmp = a + b; + uint128_t overflow = 0; + if (tmp < a) + overflow = 1; + + uint256_t res; + res.hi = overflow; + res.lo = tmp; + return res; +} + +uint16_t sub16 (uint8_t a, uint8_t b) +{ + uint8_t tmp = a - b; + uint8_t overflow = 0; + if (tmp > a) + overflow = -1; + + uint16_t res = overflow; + res <<= 8; + res += tmp; + return res; +} + +uint32_t sub32 (uint16_t a, uint16_t b) +{ + uint16_t tmp = a - b; + uint16_t overflow = 0; + if (tmp > a) + overflow = -1; + + uint32_t res = overflow; + res <<= 16; + res += tmp; + return res; +} + +uint64_t sub64 (uint32_t a, uint32_t b) +{ + uint32_t tmp = a - b; + uint32_t overflow = 0; + if (tmp > a) + overflow = -1; + + uint64_t res = overflow; + res <<= 32; + res += tmp; + return res; +} + +uint128_t sub128 (uint64_t a, uint64_t b) +{ + uint64_t tmp = a - b; + uint64_t overflow = 0; + if (tmp > a) + overflow = -1; + + uint128_t res = overflow; + res <<= 64; + res += tmp; + return res; +} + +uint256_t sub256 (uint128_t a, uint128_t b) +{ + uint128_t tmp = a - b; + uint128_t overflow = 0; + if (tmp > a) + overflow = -1; + + uint256_t res; + res.hi = overflow; + res.lo = tmp; + return res; +} + +/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */ diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 232e903b0..55d6ee8ae 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -3468,6 +3468,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, } } +/* Check if the corresponding operation has wider equivalent on the target. */ + +static bool +wider_optab_check_p (optab op, machine_mode mode, int unsignedp) +{ + machine_mode wider_mode; + FOR_EACH_WIDER_MODE (wider_mode, mode) + { + machine_mode next_mode; + if (optab_handler (op, wider_mode) != CODE_FOR_nothing + || (op == smul_optab + && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode) + && (find_widening_optab_handler ((unsignedp + ? umul_widen_optab + : smul_widen_optab), + next_mode, mode)))) + return true; + } + + return false; +} /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have a check for non-zero like: @@ -3903,15 +3924,22 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, || code == MINUS_EXPR || code == MULT_EXPR || code == BIT_NOT_EXPR); + int unsignedp = TYPE_UNSIGNED (type); if (!INTEGRAL_TYPE_P (type) - || !TYPE_UNSIGNED (type) - || has_zero_uses (lhs) - || (code != PLUS_EXPR - && code != MULT_EXPR - && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab, - TYPE_MODE (type)) == CODE_FOR_nothing)) + || !unsignedp + || has_zero_uses (lhs)) return false; + if (code == PLUS_EXPR || code == MINUS_EXPR) + { + machine_mode mode = TYPE_MODE (type); + optab op = code == PLUS_EXPR ? uaddv4_optab : usubv4_optab; + if (optab_handler (op, mode) == CODE_FOR_nothing + && (!flag_uaddsub_overflow_match_all + || !wider_optab_check_p (op, mode, unsignedp))) + return false; + } + tree rhs1 = gimple_assign_rhs1 (stmt); tree rhs2 = gimple_assign_rhs2 (stmt); FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) @@ -3986,7 +4014,8 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen)) || (code == PLUS_EXPR && optab_handler (uaddv4_optab, - TYPE_MODE (type)) == CODE_FOR_nothing) + TYPE_MODE (type)) == CODE_FOR_nothing + && !flag_uaddsub_overflow_match_all) || (code == MULT_EXPR && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab, TYPE_MODE (type)) == CODE_FOR_nothing)) -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2