Projects
openEuler:24.03:SP1:Everything
gcc
_service:tar_scm:0059-LoongArch-Add-support-for...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch of Package gcc
From 88117f2703d06e44983e54a985ec0ad6f2397a46 Mon Sep 17 00:00:00 2001 From: Jiahao Xu <xujiahao@loongson.cn> Date: Wed, 6 Dec 2023 15:04:49 +0800 Subject: [PATCH 059/188] LoongArch: Add support for LoongArch V1.1 approximate instructions. This patch adds define_insn/builtins/intrinsics for these instructions, and add option -mfrecipe to control instruction generation. gcc/ChangeLog: * config/loongarch/genopts/isa-evolution.in (fecipe): Add. * config/loongarch/larchintrin.h (__frecipe_s): New intrinsic. (__frecipe_d): Ditto. (__frsqrte_s): Ditto. (__frsqrte_d): Ditto. * config/loongarch/lasx.md (lasx_xvfrecipe_<flasxfmt>): New insn pattern. (lasx_xvfrsqrte_<flasxfmt>): Ditto. * config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic. (__lasx_xvfrecipe_d): Ditto. (__lasx_xvfrsqrte_s): Ditto. (__lasx_xvfrsqrte_d): Ditto. * config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates. (LSX_EXT_BUILTIN): New macro. (LASX_EXT_BUILTIN): Ditto. * config/loongarch/loongarch-cpucfg-map.h: Regenerate. * config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe". * config/loongarch/loongarch-def.cc: Regenerate. * config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate. * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE. * config/loongarch/loongarch.md (loongarch_frecipe_<fmt>): New insn pattern. (loongarch_frsqrte_<fmt>): Ditto. * config/loongarch/loongarch.opt: Regenerate. * config/loongarch/lsx.md (lsx_vfrecipe_<flsxfmt>): New insn pattern. (lsx_vfrsqrte_<flsxfmt>): Ditto. * config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic. (__lsx_vfrecipe_d): Ditto. (__lsx_vfrsqrte_s): Ditto. (__lsx_vfrsqrte_d): Ditto. * doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics. gcc/testsuite/ChangeLog: * gcc.target/loongarch/larch-frecipe-builtin.c: New test. * gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test. * gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test. --- gcc/config/loongarch/genopts/isa-evolution.in | 1 + gcc/config/loongarch/larchintrin.h | 38 +++++++++++++++++ gcc/config/loongarch/lasx.md | 24 +++++++++++ gcc/config/loongarch/lasxintrin.h | 34 +++++++++++++++ gcc/config/loongarch/loongarch-builtins.cc | 42 +++++++++++++++++++ gcc/config/loongarch/loongarch-c.cc | 3 ++ gcc/config/loongarch/loongarch-cpucfg-map.h | 1 + gcc/config/loongarch/loongarch-def.cc | 3 +- gcc/config/loongarch/loongarch-str.h | 1 + gcc/config/loongarch/loongarch.cc | 1 + gcc/config/loongarch/loongarch.md | 35 +++++++++++++++- gcc/config/loongarch/loongarch.opt | 4 ++ gcc/config/loongarch/lsx.md | 24 +++++++++++ gcc/config/loongarch/lsxintrin.h | 34 +++++++++++++++ gcc/doc/extend.texi | 35 ++++++++++++++++ .../loongarch/larch-frecipe-builtin.c | 28 +++++++++++++ .../vector/lasx/lasx-frecipe-builtin.c | 30 +++++++++++++ .../vector/lsx/lsx-frecipe-builtin.c | 30 +++++++++++++ 18 files changed, 365 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in index a6bc3f87f..11a198b64 100644 --- a/gcc/config/loongarch/genopts/isa-evolution.in +++ b/gcc/config/loongarch/genopts/isa-evolution.in @@ -1,3 +1,4 @@ +2 25 frecipe Support frecipe.{s/d} and frsqrte.{s/d} instructions. 2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. 2 27 lam-bh Support am{swap/add}[_db].{b/h} instructions. 2 28 lamcas Support amcas[_db].{b/h/w/d} instructions. diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h index 2833f1487..22035e767 100644 --- a/gcc/config/loongarch/larchintrin.h +++ b/gcc/config/loongarch/larchintrin.h @@ -333,6 +333,44 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2) } #endif +#ifdef __loongarch_frecipe +/* Assembly instruction format: fd, fj. */ +/* Data types in instruction templates: SF, SF. */ +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__frecipe_s (float _1) +{ + __builtin_loongarch_frecipe_s ((float) _1); +} + +/* Assembly instruction format: fd, fj. */ +/* Data types in instruction templates: DF, DF. */ +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__frecipe_d (double _1) +{ + __builtin_loongarch_frecipe_d ((double) _1); +} + +/* Assembly instruction format: fd, fj. */ +/* Data types in instruction templates: SF, SF. */ +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__frsqrte_s (float _1) +{ + __builtin_loongarch_frsqrte_s ((float) _1); +} + +/* Assembly instruction format: fd, fj. */ +/* Data types in instruction templates: DF, DF. */ +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__frsqrte_d (double _1) +{ + __builtin_loongarch_frsqrte_d ((double) _1); +} +#endif + /* Assembly instruction format: ui15. */ /* Data types in instruction templates: USI. */ #define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1)) diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index de7c88f14..b1416f6c3 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -40,8 +40,10 @@ UNSPEC_LASX_XVFCVTL UNSPEC_LASX_XVFLOGB UNSPEC_LASX_XVFRECIP + UNSPEC_LASX_XVFRECIPE UNSPEC_LASX_XVFRINT UNSPEC_LASX_XVFRSQRT + UNSPEC_LASX_XVFRSQRTE UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SEQ UNSPEC_LASX_XVFCMP_SLE @@ -1633,6 +1635,17 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Instructions. + +(define_insn "lasx_xvfrecipe_<flasxfmt>" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRECIPE))] + "ISA_HAS_LASX && TARGET_FRECIPE" + "xvfrecipe.<flasxfmt>\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lasx_xvfrsqrt_<flasxfmt>" [(set (match_operand:FLASX 0 "register_operand" "=f") (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] @@ -1642,6 +1655,17 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "lasx_xvfrsqrte_<flasxfmt>" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRSQRTE))] + "ISA_HAS_LASX && TARGET_FRECIPE" + "xvfrsqrte.<flasxfmt>\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lasx_xvftint_u_<ilasxfmt_u>_<flasxfmt>" [(set (match_operand:<VIMODE256> 0 "register_operand" "=f") (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")] diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h index 7bce2c757..5e65e76e7 100644 --- a/gcc/config/loongarch/lasxintrin.h +++ b/gcc/config/loongarch/lasxintrin.h @@ -2399,6 +2399,40 @@ __m256d __lasx_xvfrecip_d (__m256d _1) return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1); } +#if defined(__loongarch_frecipe) +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrecipe_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrecipe_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrsqrte_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrsqrte_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1); +} +#endif + /* Assembly instruction format: xd, xj. */ /* Data types in instruction templates: V8SF, V8SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index f4523c8bf..bc156bd36 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -120,6 +120,9 @@ struct loongarch_builtin_description AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) AVAIL_ALL (lsx, ISA_HAS_LSX) AVAIL_ALL (lasx, ISA_HAS_LASX) +AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI) +AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE) +AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) /* Construct a loongarch_builtin_description from the given arguments. @@ -164,6 +167,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ FUNCTION_TYPE, loongarch_builtin_avail_lsx } + /* Define an LSX LARCH_BUILTIN_DIRECT function __builtin_lsx_<INSN> + for instruction CODE_FOR_lsx_<INSN>. FUNCTION_TYPE is a builtin_description + field. AVAIL is the name of the availability predicate, without the leading + loongarch_builtin_avail_. */ +#define LSX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ + { CODE_FOR_lsx_ ## INSN, \ + "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ + FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL } + /* Define an LSX LARCH_BUILTIN_LSX_TEST_BRANCH function __builtin_lsx_<INSN> for instruction CODE_FOR_lsx_<INSN>. FUNCTION_TYPE is a builtin_description @@ -189,6 +201,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \ FUNCTION_TYPE, loongarch_builtin_avail_lasx } +/* Define an LASX LARCH_BUILTIN_DIRECT function __builtin_lasx_<INSN> + for instruction CODE_FOR_lasx_<INSN>. FUNCTION_TYPE is a builtin_description + field. AVAIL is the name of the availability predicate, without the leading + loongarch_builtin_avail_. */ +#define LASX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ + { CODE_FOR_lasx_ ## INSN, \ + "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \ + FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL } + /* Define an LASX LARCH_BUILTIN_DIRECT_NO_TARGET function __builtin_lasx_<INSN> for instruction CODE_FOR_lasx_<INSN>. FUNCTION_TYPE is a builtin_description field. */ @@ -804,6 +825,27 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { DIRECT_NO_TARGET_BUILTIN (syscall, LARCH_VOID_FTYPE_USI, default), DIRECT_NO_TARGET_BUILTIN (break, LARCH_VOID_FTYPE_USI, default), + /* Built-in functions for frecipe.{s/d} and frsqrte.{s/d}. */ + + DIRECT_BUILTIN (frecipe_s, LARCH_SF_FTYPE_SF, frecipe), + DIRECT_BUILTIN (frecipe_d, LARCH_DF_FTYPE_DF, frecipe), + DIRECT_BUILTIN (frsqrte_s, LARCH_SF_FTYPE_SF, frecipe), + DIRECT_BUILTIN (frsqrte_d, LARCH_DF_FTYPE_DF, frecipe), + + /* Built-in functions for new LSX instructions. */ + + LSX_EXT_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe), + LSX_EXT_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe), + LSX_EXT_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe), + LSX_EXT_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe), + + /* Built-in functions for new LASX instructions. */ + + LASX_EXT_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe), + LASX_EXT_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe), + LASX_EXT_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe), + LASX_EXT_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe), + /* Built-in functions for LSX. */ LSX_BUILTIN (vsll_b, LARCH_V16QI_FTYPE_V16QI_V16QI), LSX_BUILTIN (vsll_h, LARCH_V8HI_FTYPE_V8HI_V8HI), diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc index 76c8ea8db..a89477a74 100644 --- a/gcc/config/loongarch/loongarch-c.cc +++ b/gcc/config/loongarch/loongarch-c.cc @@ -102,6 +102,9 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) else builtin_define ("__loongarch_frlen=0"); + if (TARGET_HARD_FLOAT && TARGET_FRECIPE) + builtin_define ("__loongarch_frecipe"); + if (ISA_HAS_LSX) { builtin_define ("__loongarch_simd"); diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h index 02ff16712..148333c24 100644 --- a/gcc/config/loongarch/loongarch-cpucfg-map.h +++ b/gcc/config/loongarch/loongarch-cpucfg-map.h @@ -29,6 +29,7 @@ static constexpr struct { unsigned int cpucfg_bit; HOST_WIDE_INT isa_evolution_bit; } cpucfg_map[] = { + { 2, 1u << 25, OPTION_MASK_ISA_FRECIPE }, { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH }, { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS }, diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc index bc6997e45..c41804a18 100644 --- a/gcc/config/loongarch/loongarch-def.cc +++ b/gcc/config/loongarch/loongarch-def.cc @@ -60,7 +60,8 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa = .fpu_ (ISA_EXT_FPU64) .simd_ (ISA_EXT_SIMD_LASX) .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA - | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)); + | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS + | OPTION_MASK_ISA_FRECIPE)); static inline loongarch_cache la464_cache () { diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h index 7144bbe28..a8821acb0 100644 --- a/gcc/config/loongarch/loongarch-str.h +++ b/gcc/config/loongarch/loongarch-str.h @@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see #define STR_EXPLICIT_RELOCS_NONE "none" #define STR_EXPLICIT_RELOCS_ALWAYS "always" +#define OPTSTR_FRECIPE "frecipe" #define OPTSTR_DIV32 "div32" #define OPTSTR_LAM_BH "lam-bh" #define OPTSTR_LAMCAS "lamcas" diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 3c8ae9a42..ce1c0a8bd 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -11503,6 +11503,7 @@ loongarch_asm_code_end (void) loongarch_cpu_strings [la_target.cpu_tune]); fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, loongarch_isa_base_strings [la_target.isa.base]); + DUMP_FEATURE (TARGET_FRECIPE); DUMP_FEATURE (TARGET_DIV32); DUMP_FEATURE (TARGET_LAM_BH); DUMP_FEATURE (TARGET_LAMCAS); diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index afc3c591f..9080cec1c 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -59,6 +59,12 @@ ;; Stack tie UNSPEC_TIE + ;; RSQRT + UNSPEC_RSQRTE + + ;; RECIP + UNSPEC_RECIPE + ;; CRC UNSPEC_CRC UNSPEC_CRCC @@ -220,6 +226,7 @@ ;; fmadd floating point multiply-add ;; fdiv floating point divide ;; frdiv floating point reciprocal divide +;; frecipe floating point approximate reciprocal ;; fabs floating point absolute value ;; flogb floating point exponent extract ;; fneg floating point negation @@ -229,6 +236,7 @@ ;; fscaleb floating point scale ;; fsqrt floating point square root ;; frsqrt floating point reciprocal square root +;; frsqrte floating point approximate reciprocal square root ;; multi multiword sequence (or user asm statements) ;; atomic atomic memory update instruction ;; syncloop memory atomic operation implemented as a sync loop @@ -238,8 +246,8 @@ "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical, shift,slt,signext,clz,trap,imul,idiv,move, - fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt, - fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost, + fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt, + fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost, simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd, simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp, simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill, @@ -908,6 +916,18 @@ [(set_attr "type" "frdiv") (set_attr "mode" "<UNITMODE>")]) +;; Approximate Reciprocal Instructions. + +(define_insn "loongarch_frecipe_<fmt>" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RECIPE))] + "TARGET_FRECIPE" + "frecipe.<fmt>\t%0,%1" + [(set_attr "type" "frecipe") + (set_attr "mode" "<UNITMODE>") + (set_attr "insn_count" "1")]) + ;; Integer division and modulus. (define_expand "<optab><mode>3" [(set (match_operand:GPR 0 "register_operand") @@ -1133,6 +1153,17 @@ [(set_attr "type" "frsqrt") (set_attr "mode" "<UNITMODE>") (set_attr "insn_count" "1")]) + +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "loongarch_frsqrte_<fmt>" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RSQRTE))] + "TARGET_FRECIPE" + "frsqrte.<fmt>\t%0,%1" + [(set_attr "type" "frsqrte") + (set_attr "mode" "<UNITMODE>")]) ;; ;; .................... diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt index 7fe36feb9..e7bc8bed4 100644 --- a/gcc/config/loongarch/loongarch.opt +++ b/gcc/config/loongarch/loongarch.opt @@ -260,6 +260,10 @@ default value is 4. Variable HOST_WIDE_INT isa_evolution = 0 +mfrecipe +Target Mask(ISA_FRECIPE) Var(isa_evolution) +Support frecipe.{s/d} and frsqrte.{s/d} instructions. + mdiv32 Target Mask(ISA_DIV32) Var(isa_evolution) Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index ce6ec6d69..37bdc6910 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -42,8 +42,10 @@ UNSPEC_LSX_VFCVTL UNSPEC_LSX_VFLOGB UNSPEC_LSX_VFRECIP + UNSPEC_LSX_VFRECIPE UNSPEC_LSX_VFRINT UNSPEC_LSX_VFRSQRT + UNSPEC_LSX_VFRSQRTE UNSPEC_LSX_VFCMP_SAF UNSPEC_LSX_VFCMP_SEQ UNSPEC_LSX_VFCMP_SLE @@ -1546,6 +1548,17 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Instructions. + +(define_insn "lsx_vfrecipe_<flsxfmt>" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRECIPE))] + "ISA_HAS_LSX && TARGET_FRECIPE" + "vfrecipe.<flsxfmt>\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lsx_vfrsqrt_<flsxfmt>" [(set (match_operand:FLSX 0 "register_operand" "=f") (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] @@ -1555,6 +1568,17 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "lsx_vfrsqrte_<flsxfmt>" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRSQRTE))] + "ISA_HAS_LSX && TARGET_FRECIPE" + "vfrsqrte.<flsxfmt>\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lsx_vftint_u_<ilsxfmt_u>_<flsxfmt>" [(set (match_operand:<VIMODE> 0 "register_operand" "=f") (unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")] diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h index 29553c093..57a6fc40a 100644 --- a/gcc/config/loongarch/lsxintrin.h +++ b/gcc/config/loongarch/lsxintrin.h @@ -2480,6 +2480,40 @@ __m128d __lsx_vfrecip_d (__m128d _1) return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1); } +#if defined(__loongarch_frecipe) +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrecipe_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrecipe_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrsqrte_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrsqrte_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1); +} +#endif + /* Assembly instruction format: vd, vj. */ /* Data types in instruction templates: V4SF, V4SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 7edd3974d..bb042ae78 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -16187,6 +16187,14 @@ The intrinsics provided are listed below: void __builtin_loongarch_break (imm0_32767) @end smallexample +These instrisic functions are available by using @option{-mfrecipe}. +@smallexample + float __builtin_loongarch_frecipe_s (float); + double __builtin_loongarch_frecipe_d (double); + float __builtin_loongarch_frsqrte_s (float); + double __builtin_loongarch_frsqrte_d (double); +@end smallexample + @emph{Note:}Since the control register is divided into 32-bit and 64-bit, but the access instruction is not distinguished. So GCC renames the control instructions when implementing intrinsics. @@ -16259,6 +16267,15 @@ function you need to include @code{larchintrin.h}. void __break (imm0_32767) @end smallexample +These instrisic functions are available by including @code{larchintrin.h} and +using @option{-mfrecipe}. +@smallexample + float __frecipe_s (float); + double __frecipe_d (double); + float __frsqrte_s (float); + double __frsqrte_d (double); +@end smallexample + Returns the value that is currently set in the @samp{tp} register. @smallexample void * __builtin_thread_pointer (void) @@ -17085,6 +17102,15 @@ __m128i __lsx_vxori_b (__m128i, imm0_255); __m128i __lsx_vxor_v (__m128i, __m128i); @end smallexample +These instrisic functions are available by including @code{lsxintrin.h} and +using @option{-mfrecipe} and @option{-mlsx}. +@smallexample +__m128d __lsx_vfrecipe_d (__m128d); +__m128 __lsx_vfrecipe_s (__m128); +__m128d __lsx_vfrsqrte_d (__m128d); +__m128 __lsx_vfrsqrte_s (__m128); +@end smallexample + @node LoongArch ASX Vector Intrinsics @subsection LoongArch ASX Vector Intrinsics @@ -17924,6 +17950,15 @@ __m256i __lasx_xvxori_b (__m256i, imm0_255); __m256i __lasx_xvxor_v (__m256i, __m256i); @end smallexample +These instrisic functions are available by including @code{lasxintrin.h} and +using @option{-mfrecipe} and @option{-mlasx}. +@smallexample +__m256d __lasx_xvfrecipe_d (__m256d); +__m256 __lasx_xvfrecipe_s (__m256); +__m256d __lasx_xvfrsqrte_d (__m256d); +__m256 __lasx_xvfrsqrte_s (__m256); +@end smallexample + @node MIPS DSP Built-in Functions @subsection MIPS DSP Built-in Functions diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c new file mode 100644 index 000000000..b9329f346 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c @@ -0,0 +1,28 @@ +/* Test builtins for frecipe.{s/d} and frsqrte.{s/d} instructions */ +/* { dg-do compile } */ +/* { dg-options "-mfrecipe" } */ +/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */ +/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */ +/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */ +/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */ + +float +test_frecipe_s (float _1) +{ + return __builtin_loongarch_frecipe_s (_1); +} +double +test_frecipe_d (double _1) +{ + return __builtin_loongarch_frecipe_d (_1); +} +float +test_frsqrte_s (float _1) +{ + return __builtin_loongarch_frsqrte_s (_1); +} +double +test_frsqrte_d (double _1) +{ + return __builtin_loongarch_frsqrte_d (_1); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c new file mode 100644 index 000000000..522535b45 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c @@ -0,0 +1,30 @@ +/* Test builtins for xvfrecipe.{s/d} and xvfrsqrte.{s/d} instructions */ +/* { dg-do compile } */ +/* { dg-options "-mlasx -mfrecipe" } */ +/* { dg-final { scan-assembler-times "lasx_xvfrecipe_s:.*xvfrecipe\\.s.*lasx_xvfrecipe_s" 1 } } */ +/* { dg-final { scan-assembler-times "lasx_xvfrecipe_d:.*xvfrecipe\\.d.*lasx_xvfrecipe_d" 1 } } */ +/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_s:.*xvfrsqrte\\.s.*lasx_xvfrsqrte_s" 1 } } */ +/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_d:.*xvfrsqrte\\.d.*lasx_xvfrsqrte_d" 1 } } */ + +#include <lasxintrin.h> + +v8f32 +__lasx_xvfrecipe_s (v8f32 _1) +{ + return __builtin_lasx_xvfrecipe_s (_1); +} +v4f64 +__lasx_xvfrecipe_d (v4f64 _1) +{ + return __builtin_lasx_xvfrecipe_d (_1); +} +v8f32 +__lasx_xvfrsqrte_s (v8f32 _1) +{ + return __builtin_lasx_xvfrsqrte_s (_1); +} +v4f64 +__lasx_xvfrsqrte_d (v4f64 _1) +{ + return __builtin_lasx_xvfrsqrte_d (_1); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c new file mode 100644 index 000000000..4ad0cb0ff --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c @@ -0,0 +1,30 @@ +/* Test builtins for vfrecipe.{s/d} and vfrsqrte.{s/d} instructions */ +/* { dg-do compile } */ +/* { dg-options "-mlsx -mfrecipe" } */ +/* { dg-final { scan-assembler-times "lsx_vfrecipe_s:.*vfrecipe\\.s.*lsx_vfrecipe_s" 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vfrecipe_d:.*vfrecipe\\.d.*lsx_vfrecipe_d" 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vfrsqrte_s:.*vfrsqrte\\.s.*lsx_vfrsqrte_s" 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vfrsqrte_d:.*vfrsqrte\\.d.*lsx_vfrsqrte_d" 1 } } */ + +#include <lsxintrin.h> + +v4f32 +__lsx_vfrecipe_s (v4f32 _1) +{ + return __builtin_lsx_vfrecipe_s (_1); +} +v2f64 +__lsx_vfrecipe_d (v2f64 _1) +{ + return __builtin_lsx_vfrecipe_d (_1); +} +v4f32 +__lsx_vfrsqrte_s (v4f32 _1) +{ + return __builtin_lsx_vfrsqrte_s (_1); +} +v2f64 +__lsx_vfrsqrte_d (v2f64 _1) +{ + return __builtin_lsx_vfrsqrte_d (_1); +} -- 2.43.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2