Projects
openEuler:24.03:SP1:Everything
gcc
_service:tar_scm:0012-LoongArch-Implement-vec_w...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0012-LoongArch-Implement-vec_widen-standard-names.patch of Package gcc
From 81e2e22979d9f9d170b1c30ec27e30e1f25aec35 Mon Sep 17 00:00:00 2001 From: Jiahao Xu <xujiahao@loongson.cn> Date: Wed, 18 Oct 2023 17:39:40 +0800 Subject: [PATCH 012/188] LoongArch:Implement vec_widen standard names. Add support for vec_widen lo/hi patterns. These do not directly match on Loongarch lasx instructions but can be emulated with even/odd + vector merge. gcc/ChangeLog: * config/loongarch/lasx.md (vec_widen_<su>mult_even_v8si): New patterns. (vec_widen_<su>add_hi_<mode>): Ditto. (vec_widen_<su>add_lo_<mode>): Ditto. (vec_widen_<su>sub_hi_<mode>): Ditto. (vec_widen_<su>sub_lo_<mode>): Ditto. (vec_widen_<su>mult_hi_<mode>): Ditto. (vec_widen_<su>mult_lo_<mode>): Ditto. * config/loongarch/loongarch.md (u_bool): New iterator. * config/loongarch/loongarch-protos.h (loongarch_expand_vec_widen_hilo): New prototype. * config/loongarch/loongarch.cc (loongarch_expand_vec_interleave): New function. (loongarch_expand_vec_widen_hilo): New function. gcc/testsuite/ChangeLog: * gcc.target/loongarch/vect-widen-add.c: New test. * gcc.target/loongarch/vect-widen-mul.c: New test. * gcc.target/loongarch/vect-widen-sub.c: New test. --- gcc/config/loongarch/lasx.md | 82 ++++++++--- gcc/config/loongarch/loongarch-protos.h | 1 + gcc/config/loongarch/loongarch.cc | 137 ++++++++++++++++++ gcc/config/loongarch/loongarch.md | 2 + .../gcc.target/loongarch/vect-widen-add.c | 24 +++ .../gcc.target/loongarch/vect-widen-mul.c | 24 +++ .../gcc.target/loongarch/vect-widen-sub.c | 24 +++ 7 files changed, 277 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-add.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index c7496d68a..442fda246 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -5048,23 +5048,71 @@ [(set_attr "type" "simd_store") (set_attr "mode" "DI")]) -(define_insn "vec_widen_<su>mult_even_v8si" - [(set (match_operand:V4DI 0 "register_operand" "=f") - (mult:V4DI - (any_extend:V4DI - (vec_select:V4SI - (match_operand:V8SI 1 "register_operand" "%f") - (parallel [(const_int 0) (const_int 2) - (const_int 4) (const_int 6)]))) - (any_extend:V4DI - (vec_select:V4SI - (match_operand:V8SI 2 "register_operand" "f") - (parallel [(const_int 0) (const_int 2) - (const_int 4) (const_int 6)])))))] - "ISA_HAS_LASX" - "xvmulwev.d.w<u>\t%u0,%u1,%u2" - [(set_attr "type" "simd_int_arith") - (set_attr "mode" "V4DI")]) +(define_expand "vec_widen_<su>add_hi_<mode>" + [(match_operand:<VDMODE256> 0 "register_operand") + (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) + (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] + "ISA_HAS_LASX" +{ + loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], + <u_bool>, true, "add"); + DONE; +}) + +(define_expand "vec_widen_<su>add_lo_<mode>" + [(match_operand:<VDMODE256> 0 "register_operand") + (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) + (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] + "ISA_HAS_LASX" +{ + loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], + <u_bool>, false, "add"); + DONE; +}) + +(define_expand "vec_widen_<su>sub_hi_<mode>" + [(match_operand:<VDMODE256> 0 "register_operand") + (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) + (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] + "ISA_HAS_LASX" +{ + loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], + <u_bool>, true, "sub"); + DONE; +}) + +(define_expand "vec_widen_<su>sub_lo_<mode>" + [(match_operand:<VDMODE256> 0 "register_operand") + (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) + (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] + "ISA_HAS_LASX" +{ + loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], + <u_bool>, false, "sub"); + DONE; +}) + +(define_expand "vec_widen_<su>mult_hi_<mode>" + [(match_operand:<VDMODE256> 0 "register_operand") + (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) + (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] + "ISA_HAS_LASX" +{ + loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], + <u_bool>, true, "mult"); + DONE; +}) + +(define_expand "vec_widen_<su>mult_lo_<mode>" + [(match_operand:<VDMODE256> 0 "register_operand") + (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) + (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] + "ISA_HAS_LASX" +{ + loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], + <u_bool>, false, "mult"); + DONE; +}) ;; Vector reduction operation (define_expand "reduc_plus_scal_v4di" diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index ea61cf567..163162598 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -205,6 +205,7 @@ extern void loongarch_register_frame_header_opt (void); extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode, rtx *); +extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, bool, const char *); /* Routines implemented in loongarch-c.c. */ void loongarch_cpu_cpp_builtins (cpp_reader *); diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 9a629a999..c0f58f9a9 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -8028,6 +8028,143 @@ loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d) return loongarch_expand_vec_perm_even_odd_1 (d, odd); } +static void +loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p) +{ + struct expand_vec_perm_d d; + unsigned i, nelt, base; + bool ok; + + d.target = target; + d.op0 = op0; + d.op1 = op1; + d.vmode = GET_MODE (target); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.one_vector_p = false; + d.testing_p = false; + + base = high_p ? nelt / 2 : 0; + for (i = 0; i < nelt / 2; ++i) + { + d.perm[i * 2] = i + base; + d.perm[i * 2 + 1] = i + base + nelt; + } + + ok = loongarch_expand_vec_perm_interleave (&d); + gcc_assert (ok); +} + +/* The loongarch lasx instructions xvmulwev and xvmulwod return the even or odd + parts of the double sized result elements in the corresponding elements of + the target register. That's NOT what the vec_widen_umult_lo/hi patterns are + expected to do. We emulate the widening lo/hi multiplies with the even/odd + versions followed by a vector merge. */ + +void +loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2, + bool uns_p, bool high_p, const char *optab) +{ + machine_mode wmode = GET_MODE (dest); + machine_mode mode = GET_MODE (op1); + rtx t1, t2, t3; + + t1 = gen_reg_rtx (wmode); + t2 = gen_reg_rtx (wmode); + t3 = gen_reg_rtx (wmode); + switch (mode) + { + case V16HImode: + if (!strcmp (optab, "add")) + { + if (!uns_p) + { + emit_insn (gen_lasx_xvaddwev_w_h (t1, op1, op2)); + emit_insn (gen_lasx_xvaddwod_w_h (t2, op1, op2)); + } + else + { + emit_insn (gen_lasx_xvaddwev_w_hu (t1, op1, op2)); + emit_insn (gen_lasx_xvaddwod_w_hu (t2, op1, op2)); + } + } + else if (!strcmp (optab, "mult")) + { + if (!uns_p) + { + emit_insn (gen_lasx_xvmulwev_w_h (t1, op1, op2)); + emit_insn (gen_lasx_xvmulwod_w_h (t2, op1, op2)); + } + else + { + emit_insn (gen_lasx_xvmulwev_w_hu (t1, op1, op2)); + emit_insn (gen_lasx_xvmulwod_w_hu (t2, op1, op2)); + } + } + else if (!strcmp (optab, "sub")) + { + if (!uns_p) + { + emit_insn (gen_lasx_xvsubwev_w_h (t1, op1, op2)); + emit_insn (gen_lasx_xvsubwod_w_h (t2, op1, op2)); + } + else + { + emit_insn (gen_lasx_xvsubwev_w_hu (t1, op1, op2)); + emit_insn (gen_lasx_xvsubwod_w_hu (t2, op1, op2)); + } + } + break; + + case V32QImode: + if (!strcmp (optab, "add")) + { + if (!uns_p) + { + emit_insn (gen_lasx_xvaddwev_h_b (t1, op1, op2)); + emit_insn (gen_lasx_xvaddwod_h_b (t2, op1, op2)); + } + else + { + emit_insn (gen_lasx_xvaddwev_h_bu (t1, op1, op2)); + emit_insn (gen_lasx_xvaddwod_h_bu (t2, op1, op2)); + } + } + else if (!strcmp (optab, "mult")) + { + if (!uns_p) + { + emit_insn (gen_lasx_xvmulwev_h_b (t1, op1, op2)); + emit_insn (gen_lasx_xvmulwod_h_b (t2, op1, op2)); + } + else + { + emit_insn (gen_lasx_xvmulwev_h_bu (t1, op1, op2)); + emit_insn (gen_lasx_xvmulwod_h_bu (t2, op1, op2)); + } + } + else if (!strcmp (optab, "sub")) + { + if (!uns_p) + { + emit_insn (gen_lasx_xvsubwev_h_b (t1, op1, op2)); + emit_insn (gen_lasx_xvsubwod_h_b (t2, op1, op2)); + } + else + { + emit_insn (gen_lasx_xvsubwev_h_bu (t1, op1, op2)); + emit_insn (gen_lasx_xvsubwod_h_bu (t2, op1, op2)); + } + } + break; + + default: + gcc_unreachable (); + } + + loongarch_expand_vec_interleave (t3, t1, t2, high_p); + emit_move_insn (dest, gen_lowpart (wmode, t3)); +} + /* Expand a variable vector permutation for LASX. */ void diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 5f9e63d66..29ac950bf 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -509,6 +509,8 @@ ;; <su> is like <u>, but the signed form expands to "s" rather than "". (define_code_attr su [(sign_extend "s") (zero_extend "u")]) +(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")]) + ;; <optab> expands to the name of the optab for a particular code. (define_code_attr optab [(ashift "ashl") (ashiftrt "ashr") diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c new file mode 100644 index 000000000..0bf832d0e --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mlasx" } */ +/* { dg-final { scan-assembler "xvaddwev.w.h" } } */ +/* { dg-final { scan-assembler "xvaddwod.w.h" } } */ +/* { dg-final { scan-assembler "xvaddwev.w.hu" } } */ +/* { dg-final { scan-assembler "xvaddwod.w.hu" } } */ + +#include <stdint.h> + +#define SIZE 1024 + +void +wide_uadd (uint32_t *foo, uint16_t *a, uint16_t *b) +{ + for ( int i = 0; i < SIZE; i++) + foo[i] = a[i] + b[i]; +} + +void +wide_sadd (int32_t *foo, int16_t *a, int16_t *b) +{ + for ( int i = 0; i < SIZE; i++) + foo[i] = a[i] + b[i]; +} diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c new file mode 100644 index 000000000..84b020eea --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mlasx" } */ +/* { dg-final { scan-assembler "xvmulwev.w.h" } } */ +/* { dg-final { scan-assembler "xvmulwod.w.h" } } */ +/* { dg-final { scan-assembler "xvmulwev.w.hu" } } */ +/* { dg-final { scan-assembler "xvmulwod.w.hu" } } */ + +#include <stdint.h> + +#define SIZE 1024 + +void +wide_umul (uint32_t *foo, uint16_t *a, uint16_t *b) +{ + for ( int i = 0; i < SIZE; i++) + foo[i] = a[i] * b[i]; +} + +void +wide_smul (int32_t *foo, int16_t *a, int16_t *b) +{ + for ( int i = 0; i < SIZE; i++) + foo[i] = a[i] * b[i]; +} diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c new file mode 100644 index 000000000..69fc3a517 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mlasx" } */ +/* { dg-final { scan-assembler "xvsubwev.w.h" } } */ +/* { dg-final { scan-assembler "xvsubwod.w.h" } } */ +/* { dg-final { scan-assembler "xvsubwev.w.hu" } } */ +/* { dg-final { scan-assembler "xvsubwod.w.hu" } } */ + +#include <stdint.h> + +#define SIZE 1024 + +void +wide_usub (uint32_t *foo, uint16_t *a, uint16_t *b) +{ + for ( int i = 0; i < SIZE; i++) + foo[i] = a[i] - b[i]; +} + +void +wide_ssub (int32_t *foo, int16_t *a, int16_t *b) +{ + for ( int i = 0; i < SIZE; i++) + foo[i] = a[i] - b[i]; +} -- 2.43.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2