Projects
Mega:24.03:SP1:Everything
gcc
_service:tar_scm:LoongArch-Use-bstrins-instruct...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:LoongArch-Use-bstrins-instruction-for-a-mask-and-a-m.patch of Package gcc
From 1c63c61f6508e3c718be79dd27dda25db2b291ee Mon Sep 17 00:00:00 2001 From: Xi Ruoyao <xry111@xry111.site> Date: Tue, 5 Sep 2023 19:42:30 +0800 Subject: [PATCH 068/124] LoongArch: Use bstrins instruction for (a & ~mask) and (a & mask) | (b & ~mask) [PR111252] If mask is a constant with value ((1 << N) - 1) << M we can perform this optimization. gcc/ChangeLog: PR target/111252 * config/loongarch/loongarch-protos.h (loongarch_pre_reload_split): Declare new function. (loongarch_use_bstrins_for_ior_with_mask): Likewise. * config/loongarch/loongarch.cc (loongarch_pre_reload_split): Implement. (loongarch_use_bstrins_for_ior_with_mask): Likewise. * config/loongarch/predicates.md (ins_zero_bitmask_operand): New predicate. * config/loongarch/loongarch.md (bstrins_<mode>_for_mask): New define_insn_and_split. (bstrins_<mode>_for_ior_mask): Likewise. (define_peephole2): Further optimize code sequence produced by bstrins_<mode>_for_ior_mask if possible. gcc/testsuite/ChangeLog: * g++.target/loongarch/bstrins-compile.C: New test. * g++.target/loongarch/bstrins-run.C: New test. Signed-off-by: Peng Fan <fanpeng@loongson.cn> Signed-off-by: ticat_fp <fanpeng@loongson.cn> --- gcc/config/loongarch/loongarch-protos.h | 4 +- gcc/config/loongarch/loongarch.cc | 36 ++++++++ gcc/config/loongarch/loongarch.md | 91 +++++++++++++++++++ gcc/config/loongarch/predicates.md | 8 ++ .../g++.target/loongarch/bstrins-compile.C | 22 +++++ .../g++.target/loongarch/bstrins-run.C | 65 +++++++++++++ 6 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.target/loongarch/bstrins-compile.C create mode 100644 gcc/testsuite/g++.target/loongarch/bstrins-run.C diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index 133ec9fa8..ea61cf567 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -56,7 +56,7 @@ enum loongarch_symbol_type { }; #define NUM_SYMBOL_TYPES (SYMBOL_TLSLDM + 1) -/* Routines implemented in loongarch.c. */ +/* Routines implemented in loongarch.cc. */ extern rtx loongarch_emit_move (rtx, rtx); extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int); extern void loongarch_expand_prologue (void); @@ -163,6 +163,8 @@ extern const char *current_section_name (void); extern unsigned int current_section_flags (void); extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); extern bool loongarch_check_zero_div_p (void); +extern bool loongarch_pre_reload_split (void); +extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *); union loongarch_gen_fn_ptrs { diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index dae35a479..4b0944d56 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -5478,6 +5478,42 @@ loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos) return true; } +/* Predicate for pre-reload splitters with associated instructions, + which can match any time before the split1 pass (usually combine), + then are unconditionally split in that pass and should not be + matched again afterwards. */ + +bool loongarch_pre_reload_split (void) +{ + return (can_create_pseudo_p () + && !(cfun->curr_properties & PROP_rtl_split_insns)); +} + +/* Check if we can use bstrins.<d> for + op0 = (op1 & op2) | (op3 & op4) + where op0, op1, op3 are regs, and op2, op4 are integer constants. */ +int +loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) +{ + unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]); + unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]); + + if (mask1 != ~mask2 || !mask1 || !mask2) + return 0; + + /* Try to avoid a right-shift. */ + if (low_bitmask_len (mode, mask1) != -1) + return -1; + + if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) + return 1; + + if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) + return -1; + + return 0; +} + /* Print the text for PRINT_OPERAND punctation character CH to FILE. The punctuation characters are: diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 3dde0ceb1..11c18bf15 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1322,6 +1322,97 @@ [(set_attr "move_type" "pick_ins") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "*bstrins_<mode>_for_mask" + [(set (match_operand:GPR 0 "register_operand") + (and:GPR (match_operand:GPR 1 "register_operand") + (match_operand:GPR 2 "ins_zero_bitmask_operand")))] + "" + "#" + "" + [(set (match_dup 0) (match_dup 1)) + (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 3)) + (const_int 0))] + { + unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]); + int lo = ffs_hwi (mask) - 1; + int len = low_bitmask_len (<MODE>mode, mask >> lo); + + len = MIN (len, GET_MODE_BITSIZE (<MODE>mode) - lo); + operands[2] = GEN_INT (len); + operands[3] = GEN_INT (lo); + }) + +(define_insn_and_split "*bstrins_<mode>_for_ior_mask" + [(set (match_operand:GPR 0 "register_operand") + (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand") + (match_operand:GPR 2 "const_int_operand")) + (and:GPR (match_operand:GPR 3 "register_operand") + (match_operand:GPR 4 "const_int_operand"))))] + "loongarch_pre_reload_split () && \ + loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)" + "#" + "" + [(set (match_dup 0) (match_dup 1)) + (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4)) + (match_dup 3))] + { + if (loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands) < 0) + { + std::swap (operands[1], operands[3]); + std::swap (operands[2], operands[4]); + } + + unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]); + int lo = ffs_hwi (mask) - 1; + int len = low_bitmask_len (<MODE>mode, mask >> lo); + + len = MIN (len, GET_MODE_BITSIZE (<MODE>mode) - lo); + operands[2] = GEN_INT (len); + operands[4] = GEN_INT (lo); + + if (lo) + { + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_move_insn (tmp, gen_rtx_ASHIFTRT(<MODE>mode, operands[3], + GEN_INT (lo))); + operands[3] = tmp; + } + }) + +;; We always avoid the shift operation in bstrins_<mode>_for_ior_mask +;; if possible, but the result may be sub-optimal when one of the masks +;; is (1 << N) - 1 and one of the src register is the dest register. +;; For example: +;; move t0, a0 +;; move a0, a1 +;; bstrins.d a0, t0, 42, 0 +;; ret +;; using a shift operation would be better: +;; srai.d t0, a1, 43 +;; bstrins.d a0, t0, 63, 43 +;; ret +;; unfortunately we cannot figure it out in split1: before reload we cannot +;; know if the dest register is one of the src register. Fix it up in +;; peephole2. +(define_peephole2 + [(set (match_operand:GPR 0 "register_operand") + (match_operand:GPR 1 "register_operand")) + (set (match_dup 1) (match_operand:GPR 2 "register_operand")) + (set (zero_extract:GPR (match_dup 1) + (match_operand:SI 3 "const_int_operand") + (const_int 0)) + (match_dup 0))] + "peep2_reg_dead_p (3, operands[0])" + [(const_int 0)] + { + int len = GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[3]); + + emit_insn (gen_ashr<mode>3 (operands[0], operands[2], operands[3])); + emit_insn (gen_insv<mode> (operands[1], GEN_INT (len), operands[3], + operands[0])); + DONE; + }) + (define_insn "*iorhi3" [(set (match_operand:HI 0 "register_operand" "=r,r") (ior:HI (match_operand:HI 1 "register_operand" "%r,r") diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index cf9361b73..ad6cee5c4 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -408,6 +408,14 @@ (define_predicate "muldiv_target_operand" (match_operand 0 "register_operand")) +(define_predicate "ins_zero_bitmask_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) != -1") + (match_test "INTVAL (op) & 1") + (match_test "low_bitmask_len (mode, \ + ~UINTVAL (op) | (~UINTVAL(op) - 1)) \ + > 12"))) + (define_predicate "const_call_insn_operand" (match_code "const,symbol_ref,label_ref") { diff --git a/gcc/testsuite/g++.target/loongarch/bstrins-compile.C b/gcc/testsuite/g++.target/loongarch/bstrins-compile.C new file mode 100644 index 000000000..3c0db1de4 --- /dev/null +++ b/gcc/testsuite/g++.target/loongarch/bstrins-compile.C @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c++14 -O2 -march=loongarch64 -mabi=lp64d" } */ +/* { dg-final { scan-assembler "bstrins\\.d.*7,4" } } */ +/* { dg-final { scan-assembler "bstrins\\.d.*15,4" } } */ +/* { dg-final { scan-assembler "bstrins\\.d.*31,4" } } */ +/* { dg-final { scan-assembler "bstrins\\.d.*47,4" } } */ +/* { dg-final { scan-assembler "bstrins\\.d.*3,0" } } */ + +typedef unsigned long u64; + +template <u64 mask> +u64 +test (u64 a, u64 b) +{ + return (a & mask) | (b & ~mask); +} + +template u64 test<0x0000'0000'0000'00f0l> (u64, u64); +template u64 test<0x0000'0000'0000'fff0l> (u64, u64); +template u64 test<0x0000'0000'ffff'fff0l> (u64, u64); +template u64 test<0x0000'ffff'ffff'fff0l> (u64, u64); +template u64 test<0xffff'ffff'ffff'fff0l> (u64, u64); diff --git a/gcc/testsuite/g++.target/loongarch/bstrins-run.C b/gcc/testsuite/g++.target/loongarch/bstrins-run.C new file mode 100644 index 000000000..68913d5e0 --- /dev/null +++ b/gcc/testsuite/g++.target/loongarch/bstrins-run.C @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +typedef unsigned long gr; + +template <int l, int r> +struct mask { + enum { value = (1ul << r) - (1ul << l) }; +}; + +template <int l> +struct mask<l, sizeof (gr) * __CHAR_BIT__> { + enum { value = -(1ul << l) }; +}; + +__attribute__ ((noipa)) void +test (gr a, gr b, gr mask, gr out) +{ + if (((a & mask) | (b & ~mask)) != out) + __builtin_abort (); +} + +__attribute__ ((noipa)) gr +no_optimize (gr x) +{ + return x; +} + +template <int l, int r> +struct test1 { + static void + run (void) + { + gr m = mask<l, r>::value; + gr a = no_optimize (-1ul); + gr b = no_optimize (0); + + test (a, b, m, (a & m) | (b & ~m)); + test (a, b, ~m, (a & ~m) | (b & m)); + test (a, 0, ~m, a & ~m); + + test1<l, r + 1>::run (); + } +}; + +template <int l> +struct test1<l, sizeof (gr) * __CHAR_BIT__ + 1> { + static void run (void) {} +}; + +template <int l> +void +test2 (void) +{ + test1<l, l + 1>::run (); + test2<l + 1> (); +} + +template <> void test2<sizeof (gr) * __CHAR_BIT__> (void) {} + +int +main () +{ + test2<0> (); +} -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2